aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2010-11-13 14:04:32 -0500
committerWill Deacon <will.deacon@arm.com>2010-11-25 11:52:08 -0500
commit43eab87828fee65f89f4088736b2b7a187390a2f (patch)
treef43f9b595b1401e5429d630cf84367923e52e701 /arch/arm
parent629948310e4270e9b32c37b4a65a8cd5d6ebf38a (diff)
ARM: perf: separate PMU backends into multiple files
The ARM perf_event.c file contains all PMU backends and, as new PMUs are introduced, will continue to grow. This patch follows the example of x86 and splits the PMU implementations into separate files which are then #included back into the main file. Compile-time guards are added to each PMU file to avoid compiling in code that is not relevant for the version of the architecture which we are targetting. Acked-by: Jean Pihet <j-pihet@ti.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'arch/arm')
-rw-r--r--arch/arm/kernel/perf_event.c2357
-rw-r--r--arch/arm/kernel/perf_event_v6.c672
-rw-r--r--arch/arm/kernel/perf_event_v7.c906
-rw-r--r--arch/arm/kernel/perf_event_xscale.c807
4 files changed, 2390 insertions, 2352 deletions
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index ac4e9a1ed80b..421a4bb88fed 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -4,9 +4,7 @@
4 * ARM performance counter support. 4 * ARM performance counter support.
5 * 5 *
6 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles 6 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
7 * 7 * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
8 * ARMv7 support: Jean Pihet <jpihet@mvista.com>
9 * 2010 (c) MontaVista Software, LLC.
10 * 8 *
11 * This code is based on the sparc64 perf event code, which is in turn based 9 * This code is based on the sparc64 perf event code, which is in turn based
12 * on the x86 code. Callchain code is based on the ARM OProfile backtrace 10 * on the x86 code. Callchain code is based on the ARM OProfile backtrace
@@ -606,2355 +604,10 @@ static struct pmu pmu = {
606 .read = armpmu_read, 604 .read = armpmu_read,
607}; 605};
608 606
609/* 607/* Include the PMU-specific implementations. */
610 * ARMv6 Performance counter handling code. 608#include "perf_event_xscale.c"
611 * 609#include "perf_event_v6.c"
612 * ARMv6 has 2 configurable performance counters and a single cycle counter. 610#include "perf_event_v7.c"
613 * They all share a single reset bit but can be written to zero so we can use
614 * that for a reset.
615 *
616 * The counters can't be individually enabled or disabled so when we remove
617 * one event and replace it with another we could get spurious counts from the
618 * wrong event. However, we can take advantage of the fact that the
619 * performance counters can export events to the event bus, and the event bus
620 * itself can be monitored. This requires that we *don't* export the events to
621 * the event bus. The procedure for disabling a configurable counter is:
622 * - change the counter to count the ETMEXTOUT[0] signal (0x20). This
623 * effectively stops the counter from counting.
624 * - disable the counter's interrupt generation (each counter has it's
625 * own interrupt enable bit).
626 * Once stopped, the counter value can be written as 0 to reset.
627 *
628 * To enable a counter:
629 * - enable the counter's interrupt generation.
630 * - set the new event type.
631 *
632 * Note: the dedicated cycle counter only counts cycles and can't be
633 * enabled/disabled independently of the others. When we want to disable the
634 * cycle counter, we have to just disable the interrupt reporting and start
635 * ignoring that counter. When re-enabling, we have to reset the value and
636 * enable the interrupt.
637 */
638
639enum armv6_perf_types {
640 ARMV6_PERFCTR_ICACHE_MISS = 0x0,
641 ARMV6_PERFCTR_IBUF_STALL = 0x1,
642 ARMV6_PERFCTR_DDEP_STALL = 0x2,
643 ARMV6_PERFCTR_ITLB_MISS = 0x3,
644 ARMV6_PERFCTR_DTLB_MISS = 0x4,
645 ARMV6_PERFCTR_BR_EXEC = 0x5,
646 ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
647 ARMV6_PERFCTR_INSTR_EXEC = 0x7,
648 ARMV6_PERFCTR_DCACHE_HIT = 0x9,
649 ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
650 ARMV6_PERFCTR_DCACHE_MISS = 0xB,
651 ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
652 ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
653 ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
654 ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
655 ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
656 ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
657 ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
658 ARMV6_PERFCTR_NOP = 0x20,
659};
660
661enum armv6_counters {
662 ARMV6_CYCLE_COUNTER = 1,
663 ARMV6_COUNTER0,
664 ARMV6_COUNTER1,
665};
666
667/*
668 * The hardware events that we support. We do support cache operations but
669 * we have harvard caches and no way to combine instruction and data
670 * accesses/misses in hardware.
671 */
672static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
673 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
674 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
675 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
676 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
677 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
678 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
679 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
680};
681
682static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
683 [PERF_COUNT_HW_CACHE_OP_MAX]
684 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
685 [C(L1D)] = {
686 /*
687 * The performance counters don't differentiate between read
688 * and write accesses/misses so this isn't strictly correct,
689 * but it's the best we can do. Writes and reads get
690 * combined.
691 */
692 [C(OP_READ)] = {
693 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
694 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
695 },
696 [C(OP_WRITE)] = {
697 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
698 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
699 },
700 [C(OP_PREFETCH)] = {
701 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
702 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
703 },
704 },
705 [C(L1I)] = {
706 [C(OP_READ)] = {
707 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
708 [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
709 },
710 [C(OP_WRITE)] = {
711 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
712 [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
713 },
714 [C(OP_PREFETCH)] = {
715 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
716 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
717 },
718 },
719 [C(LL)] = {
720 [C(OP_READ)] = {
721 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
722 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
723 },
724 [C(OP_WRITE)] = {
725 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
726 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
727 },
728 [C(OP_PREFETCH)] = {
729 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
730 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
731 },
732 },
733 [C(DTLB)] = {
734 /*
735 * The ARM performance counters can count micro DTLB misses,
736 * micro ITLB misses and main TLB misses. There isn't an event
737 * for TLB misses, so use the micro misses here and if users
738 * want the main TLB misses they can use a raw counter.
739 */
740 [C(OP_READ)] = {
741 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
742 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
743 },
744 [C(OP_WRITE)] = {
745 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
746 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
747 },
748 [C(OP_PREFETCH)] = {
749 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
750 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
751 },
752 },
753 [C(ITLB)] = {
754 [C(OP_READ)] = {
755 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
756 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
757 },
758 [C(OP_WRITE)] = {
759 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
760 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
761 },
762 [C(OP_PREFETCH)] = {
763 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
764 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
765 },
766 },
767 [C(BPU)] = {
768 [C(OP_READ)] = {
769 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
770 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
771 },
772 [C(OP_WRITE)] = {
773 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
774 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
775 },
776 [C(OP_PREFETCH)] = {
777 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
778 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
779 },
780 },
781};
782
783enum armv6mpcore_perf_types {
784 ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0,
785 ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1,
786 ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2,
787 ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3,
788 ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4,
789 ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5,
790 ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6,
791 ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7,
792 ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8,
793 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
794 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB,
795 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
796 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD,
797 ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
798 ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF,
799 ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10,
800 ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
801 ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12,
802 ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13,
803 ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF,
804};
805
806/*
807 * The hardware events that we support. We do support cache operations but
808 * we have harvard caches and no way to combine instruction and data
809 * accesses/misses in hardware.
810 */
811static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
812 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
813 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
814 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
815 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
816 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
817 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
818 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
819};
820
821static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
822 [PERF_COUNT_HW_CACHE_OP_MAX]
823 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
824 [C(L1D)] = {
825 [C(OP_READ)] = {
826 [C(RESULT_ACCESS)] =
827 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
828 [C(RESULT_MISS)] =
829 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
830 },
831 [C(OP_WRITE)] = {
832 [C(RESULT_ACCESS)] =
833 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
834 [C(RESULT_MISS)] =
835 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
836 },
837 [C(OP_PREFETCH)] = {
838 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
839 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
840 },
841 },
842 [C(L1I)] = {
843 [C(OP_READ)] = {
844 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
845 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
846 },
847 [C(OP_WRITE)] = {
848 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
849 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
850 },
851 [C(OP_PREFETCH)] = {
852 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
853 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
854 },
855 },
856 [C(LL)] = {
857 [C(OP_READ)] = {
858 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
859 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
860 },
861 [C(OP_WRITE)] = {
862 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
863 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
864 },
865 [C(OP_PREFETCH)] = {
866 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
867 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
868 },
869 },
870 [C(DTLB)] = {
871 /*
872 * The ARM performance counters can count micro DTLB misses,
873 * micro ITLB misses and main TLB misses. There isn't an event
874 * for TLB misses, so use the micro misses here and if users
875 * want the main TLB misses they can use a raw counter.
876 */
877 [C(OP_READ)] = {
878 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
879 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
880 },
881 [C(OP_WRITE)] = {
882 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
883 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
884 },
885 [C(OP_PREFETCH)] = {
886 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
887 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
888 },
889 },
890 [C(ITLB)] = {
891 [C(OP_READ)] = {
892 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
893 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
894 },
895 [C(OP_WRITE)] = {
896 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
897 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
898 },
899 [C(OP_PREFETCH)] = {
900 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
901 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
902 },
903 },
904 [C(BPU)] = {
905 [C(OP_READ)] = {
906 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
907 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
908 },
909 [C(OP_WRITE)] = {
910 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
911 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
912 },
913 [C(OP_PREFETCH)] = {
914 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
915 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
916 },
917 },
918};
919
920static inline unsigned long
921armv6_pmcr_read(void)
922{
923 u32 val;
924 asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
925 return val;
926}
927
928static inline void
929armv6_pmcr_write(unsigned long val)
930{
931 asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
932}
933
934#define ARMV6_PMCR_ENABLE (1 << 0)
935#define ARMV6_PMCR_CTR01_RESET (1 << 1)
936#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
937#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
938#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
939#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
940#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
941#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
942#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
943#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
944#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
945#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
946#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
947#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
948
949#define ARMV6_PMCR_OVERFLOWED_MASK \
950 (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
951 ARMV6_PMCR_CCOUNT_OVERFLOW)
952
953static inline int
954armv6_pmcr_has_overflowed(unsigned long pmcr)
955{
956 return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
957}
958
959static inline int
960armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
961 enum armv6_counters counter)
962{
963 int ret = 0;
964
965 if (ARMV6_CYCLE_COUNTER == counter)
966 ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
967 else if (ARMV6_COUNTER0 == counter)
968 ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
969 else if (ARMV6_COUNTER1 == counter)
970 ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
971 else
972 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
973
974 return ret;
975}
976
977static inline u32
978armv6pmu_read_counter(int counter)
979{
980 unsigned long value = 0;
981
982 if (ARMV6_CYCLE_COUNTER == counter)
983 asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
984 else if (ARMV6_COUNTER0 == counter)
985 asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
986 else if (ARMV6_COUNTER1 == counter)
987 asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
988 else
989 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
990
991 return value;
992}
993
994static inline void
995armv6pmu_write_counter(int counter,
996 u32 value)
997{
998 if (ARMV6_CYCLE_COUNTER == counter)
999 asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
1000 else if (ARMV6_COUNTER0 == counter)
1001 asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
1002 else if (ARMV6_COUNTER1 == counter)
1003 asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
1004 else
1005 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
1006}
1007
1008void
1009armv6pmu_enable_event(struct hw_perf_event *hwc,
1010 int idx)
1011{
1012 unsigned long val, mask, evt, flags;
1013
1014 if (ARMV6_CYCLE_COUNTER == idx) {
1015 mask = 0;
1016 evt = ARMV6_PMCR_CCOUNT_IEN;
1017 } else if (ARMV6_COUNTER0 == idx) {
1018 mask = ARMV6_PMCR_EVT_COUNT0_MASK;
1019 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
1020 ARMV6_PMCR_COUNT0_IEN;
1021 } else if (ARMV6_COUNTER1 == idx) {
1022 mask = ARMV6_PMCR_EVT_COUNT1_MASK;
1023 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
1024 ARMV6_PMCR_COUNT1_IEN;
1025 } else {
1026 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1027 return;
1028 }
1029
1030 /*
1031 * Mask out the current event and set the counter to count the event
1032 * that we're interested in.
1033 */
1034 spin_lock_irqsave(&pmu_lock, flags);
1035 val = armv6_pmcr_read();
1036 val &= ~mask;
1037 val |= evt;
1038 armv6_pmcr_write(val);
1039 spin_unlock_irqrestore(&pmu_lock, flags);
1040}
1041
1042static irqreturn_t
1043armv6pmu_handle_irq(int irq_num,
1044 void *dev)
1045{
1046 unsigned long pmcr = armv6_pmcr_read();
1047 struct perf_sample_data data;
1048 struct cpu_hw_events *cpuc;
1049 struct pt_regs *regs;
1050 int idx;
1051
1052 if (!armv6_pmcr_has_overflowed(pmcr))
1053 return IRQ_NONE;
1054
1055 regs = get_irq_regs();
1056
1057 /*
1058 * The interrupts are cleared by writing the overflow flags back to
1059 * the control register. All of the other bits don't have any effect
1060 * if they are rewritten, so write the whole value back.
1061 */
1062 armv6_pmcr_write(pmcr);
1063
1064 perf_sample_data_init(&data, 0);
1065
1066 cpuc = &__get_cpu_var(cpu_hw_events);
1067 for (idx = 0; idx <= armpmu->num_events; ++idx) {
1068 struct perf_event *event = cpuc->events[idx];
1069 struct hw_perf_event *hwc;
1070
1071 if (!test_bit(idx, cpuc->active_mask))
1072 continue;
1073
1074 /*
1075 * We have a single interrupt for all counters. Check that
1076 * each counter has overflowed before we process it.
1077 */
1078 if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
1079 continue;
1080
1081 hwc = &event->hw;
1082 armpmu_event_update(event, hwc, idx);
1083 data.period = event->hw.last_period;
1084 if (!armpmu_event_set_period(event, hwc, idx))
1085 continue;
1086
1087 if (perf_event_overflow(event, 0, &data, regs))
1088 armpmu->disable(hwc, idx);
1089 }
1090
1091 /*
1092 * Handle the pending perf events.
1093 *
1094 * Note: this call *must* be run with interrupts disabled. For
1095 * platforms that can have the PMU interrupts raised as an NMI, this
1096 * will not work.
1097 */
1098 irq_work_run();
1099
1100 return IRQ_HANDLED;
1101}
1102
1103static void
1104armv6pmu_start(void)
1105{
1106 unsigned long flags, val;
1107
1108 spin_lock_irqsave(&pmu_lock, flags);
1109 val = armv6_pmcr_read();
1110 val |= ARMV6_PMCR_ENABLE;
1111 armv6_pmcr_write(val);
1112 spin_unlock_irqrestore(&pmu_lock, flags);
1113}
1114
1115static void
1116armv6pmu_stop(void)
1117{
1118 unsigned long flags, val;
1119
1120 spin_lock_irqsave(&pmu_lock, flags);
1121 val = armv6_pmcr_read();
1122 val &= ~ARMV6_PMCR_ENABLE;
1123 armv6_pmcr_write(val);
1124 spin_unlock_irqrestore(&pmu_lock, flags);
1125}
1126
1127static int
1128armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
1129 struct hw_perf_event *event)
1130{
1131 /* Always place a cycle counter into the cycle counter. */
1132 if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
1133 if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
1134 return -EAGAIN;
1135
1136 return ARMV6_CYCLE_COUNTER;
1137 } else {
1138 /*
1139 * For anything other than a cycle counter, try and use
1140 * counter0 and counter1.
1141 */
1142 if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) {
1143 return ARMV6_COUNTER1;
1144 }
1145
1146 if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) {
1147 return ARMV6_COUNTER0;
1148 }
1149
1150 /* The counters are all in use. */
1151 return -EAGAIN;
1152 }
1153}
1154
1155static void
1156armv6pmu_disable_event(struct hw_perf_event *hwc,
1157 int idx)
1158{
1159 unsigned long val, mask, evt, flags;
1160
1161 if (ARMV6_CYCLE_COUNTER == idx) {
1162 mask = ARMV6_PMCR_CCOUNT_IEN;
1163 evt = 0;
1164 } else if (ARMV6_COUNTER0 == idx) {
1165 mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
1166 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
1167 } else if (ARMV6_COUNTER1 == idx) {
1168 mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
1169 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
1170 } else {
1171 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1172 return;
1173 }
1174
1175 /*
1176 * Mask out the current event and set the counter to count the number
1177 * of ETM bus signal assertion cycles. The external reporting should
1178 * be disabled and so this should never increment.
1179 */
1180 spin_lock_irqsave(&pmu_lock, flags);
1181 val = armv6_pmcr_read();
1182 val &= ~mask;
1183 val |= evt;
1184 armv6_pmcr_write(val);
1185 spin_unlock_irqrestore(&pmu_lock, flags);
1186}
1187
1188static void
1189armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
1190 int idx)
1191{
1192 unsigned long val, mask, flags, evt = 0;
1193
1194 if (ARMV6_CYCLE_COUNTER == idx) {
1195 mask = ARMV6_PMCR_CCOUNT_IEN;
1196 } else if (ARMV6_COUNTER0 == idx) {
1197 mask = ARMV6_PMCR_COUNT0_IEN;
1198 } else if (ARMV6_COUNTER1 == idx) {
1199 mask = ARMV6_PMCR_COUNT1_IEN;
1200 } else {
1201 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1202 return;
1203 }
1204
1205 /*
1206 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
1207 * simply disable the interrupt reporting.
1208 */
1209 spin_lock_irqsave(&pmu_lock, flags);
1210 val = armv6_pmcr_read();
1211 val &= ~mask;
1212 val |= evt;
1213 armv6_pmcr_write(val);
1214 spin_unlock_irqrestore(&pmu_lock, flags);
1215}
1216
1217static const struct arm_pmu armv6pmu = {
1218 .id = ARM_PERF_PMU_ID_V6,
1219 .name = "v6",
1220 .handle_irq = armv6pmu_handle_irq,
1221 .enable = armv6pmu_enable_event,
1222 .disable = armv6pmu_disable_event,
1223 .read_counter = armv6pmu_read_counter,
1224 .write_counter = armv6pmu_write_counter,
1225 .get_event_idx = armv6pmu_get_event_idx,
1226 .start = armv6pmu_start,
1227 .stop = armv6pmu_stop,
1228 .cache_map = &armv6_perf_cache_map,
1229 .event_map = &armv6_perf_map,
1230 .raw_event_mask = 0xFF,
1231 .num_events = 3,
1232 .max_period = (1LLU << 32) - 1,
1233};
1234
1235const struct arm_pmu *__init armv6pmu_init(void)
1236{
1237 return &armv6pmu;
1238}
1239
1240/*
1241 * ARMv6mpcore is almost identical to single core ARMv6 with the exception
1242 * that some of the events have different enumerations and that there is no
1243 * *hack* to stop the programmable counters. To stop the counters we simply
1244 * disable the interrupt reporting and update the event. When unthrottling we
1245 * reset the period and enable the interrupt reporting.
1246 */
1247static const struct arm_pmu armv6mpcore_pmu = {
1248 .id = ARM_PERF_PMU_ID_V6MP,
1249 .name = "v6mpcore",
1250 .handle_irq = armv6pmu_handle_irq,
1251 .enable = armv6pmu_enable_event,
1252 .disable = armv6mpcore_pmu_disable_event,
1253 .read_counter = armv6pmu_read_counter,
1254 .write_counter = armv6pmu_write_counter,
1255 .get_event_idx = armv6pmu_get_event_idx,
1256 .start = armv6pmu_start,
1257 .stop = armv6pmu_stop,
1258 .cache_map = &armv6mpcore_perf_cache_map,
1259 .event_map = &armv6mpcore_perf_map,
1260 .raw_event_mask = 0xFF,
1261 .num_events = 3,
1262 .max_period = (1LLU << 32) - 1,
1263};
1264
1265const struct arm_pmu *__init armv6mpcore_pmu_init(void)
1266{
1267 return &armv6mpcore_pmu;
1268}
1269
1270/*
1271 * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
1272 *
1273 * Copied from ARMv6 code, with the low level code inspired
1274 * by the ARMv7 Oprofile code.
1275 *
1276 * Cortex-A8 has up to 4 configurable performance counters and
1277 * a single cycle counter.
1278 * Cortex-A9 has up to 31 configurable performance counters and
1279 * a single cycle counter.
1280 *
1281 * All counters can be enabled/disabled and IRQ masked separately. The cycle
1282 * counter and all 4 performance counters together can be reset separately.
1283 */
1284
1285/* Common ARMv7 event types */
1286enum armv7_perf_types {
1287 ARMV7_PERFCTR_PMNC_SW_INCR = 0x00,
1288 ARMV7_PERFCTR_IFETCH_MISS = 0x01,
1289 ARMV7_PERFCTR_ITLB_MISS = 0x02,
1290 ARMV7_PERFCTR_DCACHE_REFILL = 0x03,
1291 ARMV7_PERFCTR_DCACHE_ACCESS = 0x04,
1292 ARMV7_PERFCTR_DTLB_REFILL = 0x05,
1293 ARMV7_PERFCTR_DREAD = 0x06,
1294 ARMV7_PERFCTR_DWRITE = 0x07,
1295
1296 ARMV7_PERFCTR_EXC_TAKEN = 0x09,
1297 ARMV7_PERFCTR_EXC_EXECUTED = 0x0A,
1298 ARMV7_PERFCTR_CID_WRITE = 0x0B,
1299 /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
1300 * It counts:
1301 * - all branch instructions,
1302 * - instructions that explicitly write the PC,
1303 * - exception generating instructions.
1304 */
1305 ARMV7_PERFCTR_PC_WRITE = 0x0C,
1306 ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D,
1307 ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F,
1308 ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10,
1309 ARMV7_PERFCTR_CLOCK_CYCLES = 0x11,
1310
1311 ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12,
1312
1313 ARMV7_PERFCTR_CPU_CYCLES = 0xFF
1314};
1315
1316/* ARMv7 Cortex-A8 specific event types */
1317enum armv7_a8_perf_types {
1318 ARMV7_PERFCTR_INSTR_EXECUTED = 0x08,
1319
1320 ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E,
1321
1322 ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40,
1323 ARMV7_PERFCTR_L2_STORE_MERGED = 0x41,
1324 ARMV7_PERFCTR_L2_STORE_BUFF = 0x42,
1325 ARMV7_PERFCTR_L2_ACCESS = 0x43,
1326 ARMV7_PERFCTR_L2_CACH_MISS = 0x44,
1327 ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45,
1328 ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46,
1329 ARMV7_PERFCTR_MEMORY_REPLAY = 0x47,
1330 ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48,
1331 ARMV7_PERFCTR_L1_DATA_MISS = 0x49,
1332 ARMV7_PERFCTR_L1_INST_MISS = 0x4A,
1333 ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B,
1334 ARMV7_PERFCTR_L1_NEON_DATA = 0x4C,
1335 ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D,
1336 ARMV7_PERFCTR_L2_NEON = 0x4E,
1337 ARMV7_PERFCTR_L2_NEON_HIT = 0x4F,
1338 ARMV7_PERFCTR_L1_INST = 0x50,
1339 ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51,
1340 ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52,
1341 ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53,
1342 ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54,
1343 ARMV7_PERFCTR_OP_EXECUTED = 0x55,
1344 ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56,
1345 ARMV7_PERFCTR_CYCLES_INST = 0x57,
1346 ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58,
1347 ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59,
1348 ARMV7_PERFCTR_NEON_CYCLES = 0x5A,
1349
1350 ARMV7_PERFCTR_PMU0_EVENTS = 0x70,
1351 ARMV7_PERFCTR_PMU1_EVENTS = 0x71,
1352 ARMV7_PERFCTR_PMU_EVENTS = 0x72,
1353};
1354
1355/* ARMv7 Cortex-A9 specific event types */
1356enum armv7_a9_perf_types {
1357 ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40,
1358 ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41,
1359 ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42,
1360
1361 ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50,
1362 ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51,
1363
1364 ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60,
1365 ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61,
1366 ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62,
1367 ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63,
1368 ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64,
1369 ARMV7_PERFCTR_DATA_EVICTION = 0x65,
1370 ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66,
1371 ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67,
1372 ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68,
1373
1374 ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E,
1375
1376 ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70,
1377 ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71,
1378 ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72,
1379 ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73,
1380 ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74,
1381
1382 ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80,
1383 ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81,
1384 ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82,
1385 ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83,
1386 ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84,
1387 ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85,
1388 ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86,
1389
1390 ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A,
1391 ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B,
1392
1393 ARMV7_PERFCTR_ISB_INST = 0x90,
1394 ARMV7_PERFCTR_DSB_INST = 0x91,
1395 ARMV7_PERFCTR_DMB_INST = 0x92,
1396 ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93,
1397
1398 ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0,
1399 ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1,
1400 ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2,
1401 ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3,
1402 ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4,
1403 ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5
1404};
1405
1406/*
1407 * Cortex-A8 HW events mapping
1408 *
1409 * The hardware events that we support. We do support cache operations but
1410 * we have harvard caches and no way to combine instruction and data
1411 * accesses/misses in hardware.
1412 */
1413static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
1414 [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
1415 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
1416 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
1417 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
1418 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1419 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1420 [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
1421};
1422
1423static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1424 [PERF_COUNT_HW_CACHE_OP_MAX]
1425 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1426 [C(L1D)] = {
1427 /*
1428 * The performance counters don't differentiate between read
1429 * and write accesses/misses so this isn't strictly correct,
1430 * but it's the best we can do. Writes and reads get
1431 * combined.
1432 */
1433 [C(OP_READ)] = {
1434 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
1435 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
1436 },
1437 [C(OP_WRITE)] = {
1438 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
1439 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
1440 },
1441 [C(OP_PREFETCH)] = {
1442 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1443 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1444 },
1445 },
1446 [C(L1I)] = {
1447 [C(OP_READ)] = {
1448 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
1449 [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
1450 },
1451 [C(OP_WRITE)] = {
1452 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
1453 [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
1454 },
1455 [C(OP_PREFETCH)] = {
1456 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1457 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1458 },
1459 },
1460 [C(LL)] = {
1461 [C(OP_READ)] = {
1462 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
1463 [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
1464 },
1465 [C(OP_WRITE)] = {
1466 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
1467 [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
1468 },
1469 [C(OP_PREFETCH)] = {
1470 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1471 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1472 },
1473 },
1474 [C(DTLB)] = {
1475 /*
1476 * Only ITLB misses and DTLB refills are supported.
1477 * If users want the DTLB refills misses a raw counter
1478 * must be used.
1479 */
1480 [C(OP_READ)] = {
1481 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1482 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
1483 },
1484 [C(OP_WRITE)] = {
1485 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1486 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
1487 },
1488 [C(OP_PREFETCH)] = {
1489 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1490 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1491 },
1492 },
1493 [C(ITLB)] = {
1494 [C(OP_READ)] = {
1495 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1496 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
1497 },
1498 [C(OP_WRITE)] = {
1499 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1500 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
1501 },
1502 [C(OP_PREFETCH)] = {
1503 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1504 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1505 },
1506 },
1507 [C(BPU)] = {
1508 [C(OP_READ)] = {
1509 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
1510 [C(RESULT_MISS)]
1511 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1512 },
1513 [C(OP_WRITE)] = {
1514 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
1515 [C(RESULT_MISS)]
1516 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1517 },
1518 [C(OP_PREFETCH)] = {
1519 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1520 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1521 },
1522 },
1523};
1524
1525/*
1526 * Cortex-A9 HW events mapping
1527 */
1528static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
1529 [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
1530 [PERF_COUNT_HW_INSTRUCTIONS] =
1531 ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
1532 [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT,
1533 [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS,
1534 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1535 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1536 [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
1537};
1538
1539static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1540 [PERF_COUNT_HW_CACHE_OP_MAX]
1541 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1542 [C(L1D)] = {
1543 /*
1544 * The performance counters don't differentiate between read
1545 * and write accesses/misses so this isn't strictly correct,
1546 * but it's the best we can do. Writes and reads get
1547 * combined.
1548 */
1549 [C(OP_READ)] = {
1550 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
1551 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
1552 },
1553 [C(OP_WRITE)] = {
1554 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
1555 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
1556 },
1557 [C(OP_PREFETCH)] = {
1558 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1559 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1560 },
1561 },
1562 [C(L1I)] = {
1563 [C(OP_READ)] = {
1564 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1565 [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
1566 },
1567 [C(OP_WRITE)] = {
1568 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1569 [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
1570 },
1571 [C(OP_PREFETCH)] = {
1572 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1573 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1574 },
1575 },
1576 [C(LL)] = {
1577 [C(OP_READ)] = {
1578 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1579 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1580 },
1581 [C(OP_WRITE)] = {
1582 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1583 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1584 },
1585 [C(OP_PREFETCH)] = {
1586 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1587 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1588 },
1589 },
1590 [C(DTLB)] = {
1591 /*
1592 * Only ITLB misses and DTLB refills are supported.
1593 * If users want the DTLB refills misses a raw counter
1594 * must be used.
1595 */
1596 [C(OP_READ)] = {
1597 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1598 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
1599 },
1600 [C(OP_WRITE)] = {
1601 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1602 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
1603 },
1604 [C(OP_PREFETCH)] = {
1605 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1606 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1607 },
1608 },
1609 [C(ITLB)] = {
1610 [C(OP_READ)] = {
1611 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1612 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
1613 },
1614 [C(OP_WRITE)] = {
1615 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1616 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
1617 },
1618 [C(OP_PREFETCH)] = {
1619 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1620 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1621 },
1622 },
1623 [C(BPU)] = {
1624 [C(OP_READ)] = {
1625 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
1626 [C(RESULT_MISS)]
1627 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1628 },
1629 [C(OP_WRITE)] = {
1630 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
1631 [C(RESULT_MISS)]
1632 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1633 },
1634 [C(OP_PREFETCH)] = {
1635 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1636 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1637 },
1638 },
1639};
1640
1641/*
1642 * Perf Events counters
1643 */
1644enum armv7_counters {
1645 ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */
1646 ARMV7_COUNTER0 = 2, /* First event counter */
1647};
1648
1649/*
1650 * The cycle counter is ARMV7_CYCLE_COUNTER.
1651 * The first event counter is ARMV7_COUNTER0.
1652 * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
1653 */
1654#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1)
1655
1656/*
1657 * ARMv7 low level PMNC access
1658 */
1659
1660/*
1661 * Per-CPU PMNC: config reg
1662 */
1663#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */
1664#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */
1665#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */
1666#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */
1667#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */
1668#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
1669#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */
1670#define ARMV7_PMNC_N_MASK 0x1f
1671#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */
1672
1673/*
1674 * Available counters
1675 */
1676#define ARMV7_CNT0 0 /* First event counter */
1677#define ARMV7_CCNT 31 /* Cycle counter */
1678
1679/* Perf Event to low level counters mapping */
1680#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
1681
1682/*
1683 * CNTENS: counters enable reg
1684 */
1685#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1686#define ARMV7_CNTENS_C (1 << ARMV7_CCNT)
1687
1688/*
1689 * CNTENC: counters disable reg
1690 */
1691#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1692#define ARMV7_CNTENC_C (1 << ARMV7_CCNT)
1693
1694/*
1695 * INTENS: counters overflow interrupt enable reg
1696 */
1697#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1698#define ARMV7_INTENS_C (1 << ARMV7_CCNT)
1699
1700/*
1701 * INTENC: counters overflow interrupt disable reg
1702 */
1703#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1704#define ARMV7_INTENC_C (1 << ARMV7_CCNT)
1705
1706/*
1707 * EVTSEL: Event selection reg
1708 */
1709#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */
1710
1711/*
1712 * SELECT: Counter selection reg
1713 */
1714#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */
1715
1716/*
1717 * FLAG: counters overflow flag status reg
1718 */
1719#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1720#define ARMV7_FLAG_C (1 << ARMV7_CCNT)
1721#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */
1722#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
1723
1724static inline unsigned long armv7_pmnc_read(void)
1725{
1726 u32 val;
1727 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
1728 return val;
1729}
1730
1731static inline void armv7_pmnc_write(unsigned long val)
1732{
1733 val &= ARMV7_PMNC_MASK;
1734 asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
1735}
1736
1737static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
1738{
1739 return pmnc & ARMV7_OVERFLOWED_MASK;
1740}
1741
1742static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
1743 enum armv7_counters counter)
1744{
1745 int ret = 0;
1746
1747 if (counter == ARMV7_CYCLE_COUNTER)
1748 ret = pmnc & ARMV7_FLAG_C;
1749 else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
1750 ret = pmnc & ARMV7_FLAG_P(counter);
1751 else
1752 pr_err("CPU%u checking wrong counter %d overflow status\n",
1753 smp_processor_id(), counter);
1754
1755 return ret;
1756}
1757
1758static inline int armv7_pmnc_select_counter(unsigned int idx)
1759{
1760 u32 val;
1761
1762 if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
1763 pr_err("CPU%u selecting wrong PMNC counter"
1764 " %d\n", smp_processor_id(), idx);
1765 return -1;
1766 }
1767
1768 val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
1769 asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
1770
1771 return idx;
1772}
1773
1774static inline u32 armv7pmu_read_counter(int idx)
1775{
1776 unsigned long value = 0;
1777
1778 if (idx == ARMV7_CYCLE_COUNTER)
1779 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
1780 else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1781 if (armv7_pmnc_select_counter(idx) == idx)
1782 asm volatile("mrc p15, 0, %0, c9, c13, 2"
1783 : "=r" (value));
1784 } else
1785 pr_err("CPU%u reading wrong counter %d\n",
1786 smp_processor_id(), idx);
1787
1788 return value;
1789}
1790
1791static inline void armv7pmu_write_counter(int idx, u32 value)
1792{
1793 if (idx == ARMV7_CYCLE_COUNTER)
1794 asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
1795 else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1796 if (armv7_pmnc_select_counter(idx) == idx)
1797 asm volatile("mcr p15, 0, %0, c9, c13, 2"
1798 : : "r" (value));
1799 } else
1800 pr_err("CPU%u writing wrong counter %d\n",
1801 smp_processor_id(), idx);
1802}
1803
1804static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
1805{
1806 if (armv7_pmnc_select_counter(idx) == idx) {
1807 val &= ARMV7_EVTSEL_MASK;
1808 asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
1809 }
1810}
1811
1812static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
1813{
1814 u32 val;
1815
1816 if ((idx != ARMV7_CYCLE_COUNTER) &&
1817 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1818 pr_err("CPU%u enabling wrong PMNC counter"
1819 " %d\n", smp_processor_id(), idx);
1820 return -1;
1821 }
1822
1823 if (idx == ARMV7_CYCLE_COUNTER)
1824 val = ARMV7_CNTENS_C;
1825 else
1826 val = ARMV7_CNTENS_P(idx);
1827
1828 asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
1829
1830 return idx;
1831}
1832
1833static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
1834{
1835 u32 val;
1836
1837
1838 if ((idx != ARMV7_CYCLE_COUNTER) &&
1839 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1840 pr_err("CPU%u disabling wrong PMNC counter"
1841 " %d\n", smp_processor_id(), idx);
1842 return -1;
1843 }
1844
1845 if (idx == ARMV7_CYCLE_COUNTER)
1846 val = ARMV7_CNTENC_C;
1847 else
1848 val = ARMV7_CNTENC_P(idx);
1849
1850 asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
1851
1852 return idx;
1853}
1854
1855static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
1856{
1857 u32 val;
1858
1859 if ((idx != ARMV7_CYCLE_COUNTER) &&
1860 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1861 pr_err("CPU%u enabling wrong PMNC counter"
1862 " interrupt enable %d\n", smp_processor_id(), idx);
1863 return -1;
1864 }
1865
1866 if (idx == ARMV7_CYCLE_COUNTER)
1867 val = ARMV7_INTENS_C;
1868 else
1869 val = ARMV7_INTENS_P(idx);
1870
1871 asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
1872
1873 return idx;
1874}
1875
1876static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
1877{
1878 u32 val;
1879
1880 if ((idx != ARMV7_CYCLE_COUNTER) &&
1881 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1882 pr_err("CPU%u disabling wrong PMNC counter"
1883 " interrupt enable %d\n", smp_processor_id(), idx);
1884 return -1;
1885 }
1886
1887 if (idx == ARMV7_CYCLE_COUNTER)
1888 val = ARMV7_INTENC_C;
1889 else
1890 val = ARMV7_INTENC_P(idx);
1891
1892 asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
1893
1894 return idx;
1895}
1896
1897static inline u32 armv7_pmnc_getreset_flags(void)
1898{
1899 u32 val;
1900
1901 /* Read */
1902 asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1903
1904 /* Write to clear flags */
1905 val &= ARMV7_FLAG_MASK;
1906 asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
1907
1908 return val;
1909}
1910
1911#ifdef DEBUG
1912static void armv7_pmnc_dump_regs(void)
1913{
1914 u32 val;
1915 unsigned int cnt;
1916
1917 printk(KERN_INFO "PMNC registers dump:\n");
1918
1919 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
1920 printk(KERN_INFO "PMNC =0x%08x\n", val);
1921
1922 asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
1923 printk(KERN_INFO "CNTENS=0x%08x\n", val);
1924
1925 asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
1926 printk(KERN_INFO "INTENS=0x%08x\n", val);
1927
1928 asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1929 printk(KERN_INFO "FLAGS =0x%08x\n", val);
1930
1931 asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
1932 printk(KERN_INFO "SELECT=0x%08x\n", val);
1933
1934 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
1935 printk(KERN_INFO "CCNT =0x%08x\n", val);
1936
1937 for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
1938 armv7_pmnc_select_counter(cnt);
1939 asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
1940 printk(KERN_INFO "CNT[%d] count =0x%08x\n",
1941 cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1942 asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
1943 printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
1944 cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1945 }
1946}
1947#endif
1948
1949void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
1950{
1951 unsigned long flags;
1952
1953 /*
1954 * Enable counter and interrupt, and set the counter to count
1955 * the event that we're interested in.
1956 */
1957 spin_lock_irqsave(&pmu_lock, flags);
1958
1959 /*
1960 * Disable counter
1961 */
1962 armv7_pmnc_disable_counter(idx);
1963
1964 /*
1965 * Set event (if destined for PMNx counters)
1966 * We don't need to set the event if it's a cycle count
1967 */
1968 if (idx != ARMV7_CYCLE_COUNTER)
1969 armv7_pmnc_write_evtsel(idx, hwc->config_base);
1970
1971 /*
1972 * Enable interrupt for this counter
1973 */
1974 armv7_pmnc_enable_intens(idx);
1975
1976 /*
1977 * Enable counter
1978 */
1979 armv7_pmnc_enable_counter(idx);
1980
1981 spin_unlock_irqrestore(&pmu_lock, flags);
1982}
1983
1984static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
1985{
1986 unsigned long flags;
1987
1988 /*
1989 * Disable counter and interrupt
1990 */
1991 spin_lock_irqsave(&pmu_lock, flags);
1992
1993 /*
1994 * Disable counter
1995 */
1996 armv7_pmnc_disable_counter(idx);
1997
1998 /*
1999 * Disable interrupt for this counter
2000 */
2001 armv7_pmnc_disable_intens(idx);
2002
2003 spin_unlock_irqrestore(&pmu_lock, flags);
2004}
2005
2006static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
2007{
2008 unsigned long pmnc;
2009 struct perf_sample_data data;
2010 struct cpu_hw_events *cpuc;
2011 struct pt_regs *regs;
2012 int idx;
2013
2014 /*
2015 * Get and reset the IRQ flags
2016 */
2017 pmnc = armv7_pmnc_getreset_flags();
2018
2019 /*
2020 * Did an overflow occur?
2021 */
2022 if (!armv7_pmnc_has_overflowed(pmnc))
2023 return IRQ_NONE;
2024
2025 /*
2026 * Handle the counter(s) overflow(s)
2027 */
2028 regs = get_irq_regs();
2029
2030 perf_sample_data_init(&data, 0);
2031
2032 cpuc = &__get_cpu_var(cpu_hw_events);
2033 for (idx = 0; idx <= armpmu->num_events; ++idx) {
2034 struct perf_event *event = cpuc->events[idx];
2035 struct hw_perf_event *hwc;
2036
2037 if (!test_bit(idx, cpuc->active_mask))
2038 continue;
2039
2040 /*
2041 * We have a single interrupt for all counters. Check that
2042 * each counter has overflowed before we process it.
2043 */
2044 if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
2045 continue;
2046
2047 hwc = &event->hw;
2048 armpmu_event_update(event, hwc, idx);
2049 data.period = event->hw.last_period;
2050 if (!armpmu_event_set_period(event, hwc, idx))
2051 continue;
2052
2053 if (perf_event_overflow(event, 0, &data, regs))
2054 armpmu->disable(hwc, idx);
2055 }
2056
2057 /*
2058 * Handle the pending perf events.
2059 *
2060 * Note: this call *must* be run with interrupts disabled. For
2061 * platforms that can have the PMU interrupts raised as an NMI, this
2062 * will not work.
2063 */
2064 irq_work_run();
2065
2066 return IRQ_HANDLED;
2067}
2068
2069static void armv7pmu_start(void)
2070{
2071 unsigned long flags;
2072
2073 spin_lock_irqsave(&pmu_lock, flags);
2074 /* Enable all counters */
2075 armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
2076 spin_unlock_irqrestore(&pmu_lock, flags);
2077}
2078
2079static void armv7pmu_stop(void)
2080{
2081 unsigned long flags;
2082
2083 spin_lock_irqsave(&pmu_lock, flags);
2084 /* Disable all counters */
2085 armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
2086 spin_unlock_irqrestore(&pmu_lock, flags);
2087}
2088
2089static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
2090 struct hw_perf_event *event)
2091{
2092 int idx;
2093
2094 /* Always place a cycle counter into the cycle counter. */
2095 if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
2096 if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
2097 return -EAGAIN;
2098
2099 return ARMV7_CYCLE_COUNTER;
2100 } else {
2101 /*
2102 * For anything other than a cycle counter, try and use
2103 * the events counters
2104 */
2105 for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
2106 if (!test_and_set_bit(idx, cpuc->used_mask))
2107 return idx;
2108 }
2109
2110 /* The counters are all in use. */
2111 return -EAGAIN;
2112 }
2113}
2114
2115static struct arm_pmu armv7pmu = {
2116 .handle_irq = armv7pmu_handle_irq,
2117 .enable = armv7pmu_enable_event,
2118 .disable = armv7pmu_disable_event,
2119 .read_counter = armv7pmu_read_counter,
2120 .write_counter = armv7pmu_write_counter,
2121 .get_event_idx = armv7pmu_get_event_idx,
2122 .start = armv7pmu_start,
2123 .stop = armv7pmu_stop,
2124 .raw_event_mask = 0xFF,
2125 .max_period = (1LLU << 32) - 1,
2126};
2127
2128static u32 __init armv7_reset_read_pmnc(void)
2129{
2130 u32 nb_cnt;
2131
2132 /* Initialize & Reset PMNC: C and P bits */
2133 armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
2134
2135 /* Read the nb of CNTx counters supported from PMNC */
2136 nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
2137
2138 /* Add the CPU cycles counter and return */
2139 return nb_cnt + 1;
2140}
2141
2142const struct arm_pmu *__init armv7_a8_pmu_init(void)
2143{
2144 armv7pmu.id = ARM_PERF_PMU_ID_CA8;
2145 armv7pmu.name = "ARMv7 Cortex-A8";
2146 armv7pmu.cache_map = &armv7_a8_perf_cache_map;
2147 armv7pmu.event_map = &armv7_a8_perf_map;
2148 armv7pmu.num_events = armv7_reset_read_pmnc();
2149 return &armv7pmu;
2150}
2151
2152const struct arm_pmu *__init armv7_a9_pmu_init(void)
2153{
2154 armv7pmu.id = ARM_PERF_PMU_ID_CA9;
2155 armv7pmu.name = "ARMv7 Cortex-A9";
2156 armv7pmu.cache_map = &armv7_a9_perf_cache_map;
2157 armv7pmu.event_map = &armv7_a9_perf_map;
2158 armv7pmu.num_events = armv7_reset_read_pmnc();
2159 return &armv7pmu;
2160}
2161
2162
2163/*
2164 * ARMv5 [xscale] Performance counter handling code.
2165 *
2166 * Based on xscale OProfile code.
2167 *
2168 * There are two variants of the xscale PMU that we support:
2169 * - xscale1pmu: 2 event counters and a cycle counter
2170 * - xscale2pmu: 4 event counters and a cycle counter
2171 * The two variants share event definitions, but have different
2172 * PMU structures.
2173 */
2174
2175enum xscale_perf_types {
2176 XSCALE_PERFCTR_ICACHE_MISS = 0x00,
2177 XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
2178 XSCALE_PERFCTR_DATA_STALL = 0x02,
2179 XSCALE_PERFCTR_ITLB_MISS = 0x03,
2180 XSCALE_PERFCTR_DTLB_MISS = 0x04,
2181 XSCALE_PERFCTR_BRANCH = 0x05,
2182 XSCALE_PERFCTR_BRANCH_MISS = 0x06,
2183 XSCALE_PERFCTR_INSTRUCTION = 0x07,
2184 XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08,
2185 XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
2186 XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A,
2187 XSCALE_PERFCTR_DCACHE_MISS = 0x0B,
2188 XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C,
2189 XSCALE_PERFCTR_PC_CHANGED = 0x0D,
2190 XSCALE_PERFCTR_BCU_REQUEST = 0x10,
2191 XSCALE_PERFCTR_BCU_FULL = 0x11,
2192 XSCALE_PERFCTR_BCU_DRAIN = 0x12,
2193 XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14,
2194 XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15,
2195 XSCALE_PERFCTR_RMW = 0x16,
2196 /* XSCALE_PERFCTR_CCNT is not hardware defined */
2197 XSCALE_PERFCTR_CCNT = 0xFE,
2198 XSCALE_PERFCTR_UNUSED = 0xFF,
2199};
2200
2201enum xscale_counters {
2202 XSCALE_CYCLE_COUNTER = 1,
2203 XSCALE_COUNTER0,
2204 XSCALE_COUNTER1,
2205 XSCALE_COUNTER2,
2206 XSCALE_COUNTER3,
2207};
2208
2209static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
2210 [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
2211 [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
2212 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
2213 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
2214 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
2215 [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
2216 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
2217};
2218
2219static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
2220 [PERF_COUNT_HW_CACHE_OP_MAX]
2221 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
2222 [C(L1D)] = {
2223 [C(OP_READ)] = {
2224 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
2225 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
2226 },
2227 [C(OP_WRITE)] = {
2228 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
2229 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
2230 },
2231 [C(OP_PREFETCH)] = {
2232 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2233 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2234 },
2235 },
2236 [C(L1I)] = {
2237 [C(OP_READ)] = {
2238 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2239 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
2240 },
2241 [C(OP_WRITE)] = {
2242 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2243 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
2244 },
2245 [C(OP_PREFETCH)] = {
2246 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2247 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2248 },
2249 },
2250 [C(LL)] = {
2251 [C(OP_READ)] = {
2252 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2253 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2254 },
2255 [C(OP_WRITE)] = {
2256 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2257 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2258 },
2259 [C(OP_PREFETCH)] = {
2260 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2261 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2262 },
2263 },
2264 [C(DTLB)] = {
2265 [C(OP_READ)] = {
2266 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2267 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
2268 },
2269 [C(OP_WRITE)] = {
2270 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2271 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
2272 },
2273 [C(OP_PREFETCH)] = {
2274 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2275 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2276 },
2277 },
2278 [C(ITLB)] = {
2279 [C(OP_READ)] = {
2280 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2281 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
2282 },
2283 [C(OP_WRITE)] = {
2284 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2285 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
2286 },
2287 [C(OP_PREFETCH)] = {
2288 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2289 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2290 },
2291 },
2292 [C(BPU)] = {
2293 [C(OP_READ)] = {
2294 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2295 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2296 },
2297 [C(OP_WRITE)] = {
2298 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2299 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2300 },
2301 [C(OP_PREFETCH)] = {
2302 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2303 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2304 },
2305 },
2306};
2307
2308#define XSCALE_PMU_ENABLE 0x001
2309#define XSCALE_PMN_RESET 0x002
2310#define XSCALE_CCNT_RESET 0x004
2311#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
2312#define XSCALE_PMU_CNT64 0x008
2313
2314#define XSCALE1_OVERFLOWED_MASK 0x700
2315#define XSCALE1_CCOUNT_OVERFLOW 0x400
2316#define XSCALE1_COUNT0_OVERFLOW 0x100
2317#define XSCALE1_COUNT1_OVERFLOW 0x200
2318#define XSCALE1_CCOUNT_INT_EN 0x040
2319#define XSCALE1_COUNT0_INT_EN 0x010
2320#define XSCALE1_COUNT1_INT_EN 0x020
2321#define XSCALE1_COUNT0_EVT_SHFT 12
2322#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
2323#define XSCALE1_COUNT1_EVT_SHFT 20
2324#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
2325
2326static inline u32
2327xscale1pmu_read_pmnc(void)
2328{
2329 u32 val;
2330 asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
2331 return val;
2332}
2333
2334static inline void
2335xscale1pmu_write_pmnc(u32 val)
2336{
2337 /* upper 4bits and 7, 11 are write-as-0 */
2338 val &= 0xffff77f;
2339 asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
2340}
2341
2342static inline int
2343xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
2344 enum xscale_counters counter)
2345{
2346 int ret = 0;
2347
2348 switch (counter) {
2349 case XSCALE_CYCLE_COUNTER:
2350 ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
2351 break;
2352 case XSCALE_COUNTER0:
2353 ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
2354 break;
2355 case XSCALE_COUNTER1:
2356 ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
2357 break;
2358 default:
2359 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2360 }
2361
2362 return ret;
2363}
2364
2365static irqreturn_t
2366xscale1pmu_handle_irq(int irq_num, void *dev)
2367{
2368 unsigned long pmnc;
2369 struct perf_sample_data data;
2370 struct cpu_hw_events *cpuc;
2371 struct pt_regs *regs;
2372 int idx;
2373
2374 /*
2375 * NOTE: there's an A stepping erratum that states if an overflow
2376 * bit already exists and another occurs, the previous
2377 * Overflow bit gets cleared. There's no workaround.
2378 * Fixed in B stepping or later.
2379 */
2380 pmnc = xscale1pmu_read_pmnc();
2381
2382 /*
2383 * Write the value back to clear the overflow flags. Overflow
2384 * flags remain in pmnc for use below. We also disable the PMU
2385 * while we process the interrupt.
2386 */
2387 xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2388
2389 if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
2390 return IRQ_NONE;
2391
2392 regs = get_irq_regs();
2393
2394 perf_sample_data_init(&data, 0);
2395
2396 cpuc = &__get_cpu_var(cpu_hw_events);
2397 for (idx = 0; idx <= armpmu->num_events; ++idx) {
2398 struct perf_event *event = cpuc->events[idx];
2399 struct hw_perf_event *hwc;
2400
2401 if (!test_bit(idx, cpuc->active_mask))
2402 continue;
2403
2404 if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
2405 continue;
2406
2407 hwc = &event->hw;
2408 armpmu_event_update(event, hwc, idx);
2409 data.period = event->hw.last_period;
2410 if (!armpmu_event_set_period(event, hwc, idx))
2411 continue;
2412
2413 if (perf_event_overflow(event, 0, &data, regs))
2414 armpmu->disable(hwc, idx);
2415 }
2416
2417 irq_work_run();
2418
2419 /*
2420 * Re-enable the PMU.
2421 */
2422 pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2423 xscale1pmu_write_pmnc(pmnc);
2424
2425 return IRQ_HANDLED;
2426}
2427
2428static void
2429xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
2430{
2431 unsigned long val, mask, evt, flags;
2432
2433 switch (idx) {
2434 case XSCALE_CYCLE_COUNTER:
2435 mask = 0;
2436 evt = XSCALE1_CCOUNT_INT_EN;
2437 break;
2438 case XSCALE_COUNTER0:
2439 mask = XSCALE1_COUNT0_EVT_MASK;
2440 evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
2441 XSCALE1_COUNT0_INT_EN;
2442 break;
2443 case XSCALE_COUNTER1:
2444 mask = XSCALE1_COUNT1_EVT_MASK;
2445 evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
2446 XSCALE1_COUNT1_INT_EN;
2447 break;
2448 default:
2449 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2450 return;
2451 }
2452
2453 spin_lock_irqsave(&pmu_lock, flags);
2454 val = xscale1pmu_read_pmnc();
2455 val &= ~mask;
2456 val |= evt;
2457 xscale1pmu_write_pmnc(val);
2458 spin_unlock_irqrestore(&pmu_lock, flags);
2459}
2460
2461static void
2462xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
2463{
2464 unsigned long val, mask, evt, flags;
2465
2466 switch (idx) {
2467 case XSCALE_CYCLE_COUNTER:
2468 mask = XSCALE1_CCOUNT_INT_EN;
2469 evt = 0;
2470 break;
2471 case XSCALE_COUNTER0:
2472 mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
2473 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
2474 break;
2475 case XSCALE_COUNTER1:
2476 mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
2477 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
2478 break;
2479 default:
2480 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2481 return;
2482 }
2483
2484 spin_lock_irqsave(&pmu_lock, flags);
2485 val = xscale1pmu_read_pmnc();
2486 val &= ~mask;
2487 val |= evt;
2488 xscale1pmu_write_pmnc(val);
2489 spin_unlock_irqrestore(&pmu_lock, flags);
2490}
2491
2492static int
2493xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
2494 struct hw_perf_event *event)
2495{
2496 if (XSCALE_PERFCTR_CCNT == event->config_base) {
2497 if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
2498 return -EAGAIN;
2499
2500 return XSCALE_CYCLE_COUNTER;
2501 } else {
2502 if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) {
2503 return XSCALE_COUNTER1;
2504 }
2505
2506 if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) {
2507 return XSCALE_COUNTER0;
2508 }
2509
2510 return -EAGAIN;
2511 }
2512}
2513
2514static void
2515xscale1pmu_start(void)
2516{
2517 unsigned long flags, val;
2518
2519 spin_lock_irqsave(&pmu_lock, flags);
2520 val = xscale1pmu_read_pmnc();
2521 val |= XSCALE_PMU_ENABLE;
2522 xscale1pmu_write_pmnc(val);
2523 spin_unlock_irqrestore(&pmu_lock, flags);
2524}
2525
2526static void
2527xscale1pmu_stop(void)
2528{
2529 unsigned long flags, val;
2530
2531 spin_lock_irqsave(&pmu_lock, flags);
2532 val = xscale1pmu_read_pmnc();
2533 val &= ~XSCALE_PMU_ENABLE;
2534 xscale1pmu_write_pmnc(val);
2535 spin_unlock_irqrestore(&pmu_lock, flags);
2536}
2537
2538static inline u32
2539xscale1pmu_read_counter(int counter)
2540{
2541 u32 val = 0;
2542
2543 switch (counter) {
2544 case XSCALE_CYCLE_COUNTER:
2545 asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
2546 break;
2547 case XSCALE_COUNTER0:
2548 asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
2549 break;
2550 case XSCALE_COUNTER1:
2551 asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
2552 break;
2553 }
2554
2555 return val;
2556}
2557
2558static inline void
2559xscale1pmu_write_counter(int counter, u32 val)
2560{
2561 switch (counter) {
2562 case XSCALE_CYCLE_COUNTER:
2563 asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
2564 break;
2565 case XSCALE_COUNTER0:
2566 asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
2567 break;
2568 case XSCALE_COUNTER1:
2569 asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
2570 break;
2571 }
2572}
2573
2574static const struct arm_pmu xscale1pmu = {
2575 .id = ARM_PERF_PMU_ID_XSCALE1,
2576 .name = "xscale1",
2577 .handle_irq = xscale1pmu_handle_irq,
2578 .enable = xscale1pmu_enable_event,
2579 .disable = xscale1pmu_disable_event,
2580 .read_counter = xscale1pmu_read_counter,
2581 .write_counter = xscale1pmu_write_counter,
2582 .get_event_idx = xscale1pmu_get_event_idx,
2583 .start = xscale1pmu_start,
2584 .stop = xscale1pmu_stop,
2585 .cache_map = &xscale_perf_cache_map,
2586 .event_map = &xscale_perf_map,
2587 .raw_event_mask = 0xFF,
2588 .num_events = 3,
2589 .max_period = (1LLU << 32) - 1,
2590};
2591
2592const struct arm_pmu *__init xscale1pmu_init(void)
2593{
2594 return &xscale1pmu;
2595}
2596
2597#define XSCALE2_OVERFLOWED_MASK 0x01f
2598#define XSCALE2_CCOUNT_OVERFLOW 0x001
2599#define XSCALE2_COUNT0_OVERFLOW 0x002
2600#define XSCALE2_COUNT1_OVERFLOW 0x004
2601#define XSCALE2_COUNT2_OVERFLOW 0x008
2602#define XSCALE2_COUNT3_OVERFLOW 0x010
2603#define XSCALE2_CCOUNT_INT_EN 0x001
2604#define XSCALE2_COUNT0_INT_EN 0x002
2605#define XSCALE2_COUNT1_INT_EN 0x004
2606#define XSCALE2_COUNT2_INT_EN 0x008
2607#define XSCALE2_COUNT3_INT_EN 0x010
2608#define XSCALE2_COUNT0_EVT_SHFT 0
2609#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
2610#define XSCALE2_COUNT1_EVT_SHFT 8
2611#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
2612#define XSCALE2_COUNT2_EVT_SHFT 16
2613#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
2614#define XSCALE2_COUNT3_EVT_SHFT 24
2615#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
2616
2617static inline u32
2618xscale2pmu_read_pmnc(void)
2619{
2620 u32 val;
2621 asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
2622 /* bits 1-2 and 4-23 are read-unpredictable */
2623 return val & 0xff000009;
2624}
2625
2626static inline void
2627xscale2pmu_write_pmnc(u32 val)
2628{
2629 /* bits 4-23 are write-as-0, 24-31 are write ignored */
2630 val &= 0xf;
2631 asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
2632}
2633
2634static inline u32
2635xscale2pmu_read_overflow_flags(void)
2636{
2637 u32 val;
2638 asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
2639 return val;
2640}
2641
2642static inline void
2643xscale2pmu_write_overflow_flags(u32 val)
2644{
2645 asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
2646}
2647
2648static inline u32
2649xscale2pmu_read_event_select(void)
2650{
2651 u32 val;
2652 asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
2653 return val;
2654}
2655
2656static inline void
2657xscale2pmu_write_event_select(u32 val)
2658{
2659 asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
2660}
2661
2662static inline u32
2663xscale2pmu_read_int_enable(void)
2664{
2665 u32 val;
2666 asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
2667 return val;
2668}
2669
2670static void
2671xscale2pmu_write_int_enable(u32 val)
2672{
2673 asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
2674}
2675
2676static inline int
2677xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
2678 enum xscale_counters counter)
2679{
2680 int ret = 0;
2681
2682 switch (counter) {
2683 case XSCALE_CYCLE_COUNTER:
2684 ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
2685 break;
2686 case XSCALE_COUNTER0:
2687 ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
2688 break;
2689 case XSCALE_COUNTER1:
2690 ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
2691 break;
2692 case XSCALE_COUNTER2:
2693 ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
2694 break;
2695 case XSCALE_COUNTER3:
2696 ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
2697 break;
2698 default:
2699 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2700 }
2701
2702 return ret;
2703}
2704
2705static irqreturn_t
2706xscale2pmu_handle_irq(int irq_num, void *dev)
2707{
2708 unsigned long pmnc, of_flags;
2709 struct perf_sample_data data;
2710 struct cpu_hw_events *cpuc;
2711 struct pt_regs *regs;
2712 int idx;
2713
2714 /* Disable the PMU. */
2715 pmnc = xscale2pmu_read_pmnc();
2716 xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2717
2718 /* Check the overflow flag register. */
2719 of_flags = xscale2pmu_read_overflow_flags();
2720 if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
2721 return IRQ_NONE;
2722
2723 /* Clear the overflow bits. */
2724 xscale2pmu_write_overflow_flags(of_flags);
2725
2726 regs = get_irq_regs();
2727
2728 perf_sample_data_init(&data, 0);
2729
2730 cpuc = &__get_cpu_var(cpu_hw_events);
2731 for (idx = 0; idx <= armpmu->num_events; ++idx) {
2732 struct perf_event *event = cpuc->events[idx];
2733 struct hw_perf_event *hwc;
2734
2735 if (!test_bit(idx, cpuc->active_mask))
2736 continue;
2737
2738 if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
2739 continue;
2740
2741 hwc = &event->hw;
2742 armpmu_event_update(event, hwc, idx);
2743 data.period = event->hw.last_period;
2744 if (!armpmu_event_set_period(event, hwc, idx))
2745 continue;
2746
2747 if (perf_event_overflow(event, 0, &data, regs))
2748 armpmu->disable(hwc, idx);
2749 }
2750
2751 irq_work_run();
2752
2753 /*
2754 * Re-enable the PMU.
2755 */
2756 pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2757 xscale2pmu_write_pmnc(pmnc);
2758
2759 return IRQ_HANDLED;
2760}
2761
2762static void
2763xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
2764{
2765 unsigned long flags, ien, evtsel;
2766
2767 ien = xscale2pmu_read_int_enable();
2768 evtsel = xscale2pmu_read_event_select();
2769
2770 switch (idx) {
2771 case XSCALE_CYCLE_COUNTER:
2772 ien |= XSCALE2_CCOUNT_INT_EN;
2773 break;
2774 case XSCALE_COUNTER0:
2775 ien |= XSCALE2_COUNT0_INT_EN;
2776 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2777 evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
2778 break;
2779 case XSCALE_COUNTER1:
2780 ien |= XSCALE2_COUNT1_INT_EN;
2781 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2782 evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
2783 break;
2784 case XSCALE_COUNTER2:
2785 ien |= XSCALE2_COUNT2_INT_EN;
2786 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2787 evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
2788 break;
2789 case XSCALE_COUNTER3:
2790 ien |= XSCALE2_COUNT3_INT_EN;
2791 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2792 evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
2793 break;
2794 default:
2795 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2796 return;
2797 }
2798
2799 spin_lock_irqsave(&pmu_lock, flags);
2800 xscale2pmu_write_event_select(evtsel);
2801 xscale2pmu_write_int_enable(ien);
2802 spin_unlock_irqrestore(&pmu_lock, flags);
2803}
2804
2805static void
2806xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
2807{
2808 unsigned long flags, ien, evtsel;
2809
2810 ien = xscale2pmu_read_int_enable();
2811 evtsel = xscale2pmu_read_event_select();
2812
2813 switch (idx) {
2814 case XSCALE_CYCLE_COUNTER:
2815 ien &= ~XSCALE2_CCOUNT_INT_EN;
2816 break;
2817 case XSCALE_COUNTER0:
2818 ien &= ~XSCALE2_COUNT0_INT_EN;
2819 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2820 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
2821 break;
2822 case XSCALE_COUNTER1:
2823 ien &= ~XSCALE2_COUNT1_INT_EN;
2824 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2825 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
2826 break;
2827 case XSCALE_COUNTER2:
2828 ien &= ~XSCALE2_COUNT2_INT_EN;
2829 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2830 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
2831 break;
2832 case XSCALE_COUNTER3:
2833 ien &= ~XSCALE2_COUNT3_INT_EN;
2834 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2835 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
2836 break;
2837 default:
2838 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2839 return;
2840 }
2841
2842 spin_lock_irqsave(&pmu_lock, flags);
2843 xscale2pmu_write_event_select(evtsel);
2844 xscale2pmu_write_int_enable(ien);
2845 spin_unlock_irqrestore(&pmu_lock, flags);
2846}
2847
2848static int
2849xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
2850 struct hw_perf_event *event)
2851{
2852 int idx = xscale1pmu_get_event_idx(cpuc, event);
2853 if (idx >= 0)
2854 goto out;
2855
2856 if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
2857 idx = XSCALE_COUNTER3;
2858 else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
2859 idx = XSCALE_COUNTER2;
2860out:
2861 return idx;
2862}
2863
2864static void
2865xscale2pmu_start(void)
2866{
2867 unsigned long flags, val;
2868
2869 spin_lock_irqsave(&pmu_lock, flags);
2870 val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
2871 val |= XSCALE_PMU_ENABLE;
2872 xscale2pmu_write_pmnc(val);
2873 spin_unlock_irqrestore(&pmu_lock, flags);
2874}
2875
2876static void
2877xscale2pmu_stop(void)
2878{
2879 unsigned long flags, val;
2880
2881 spin_lock_irqsave(&pmu_lock, flags);
2882 val = xscale2pmu_read_pmnc();
2883 val &= ~XSCALE_PMU_ENABLE;
2884 xscale2pmu_write_pmnc(val);
2885 spin_unlock_irqrestore(&pmu_lock, flags);
2886}
2887
2888static inline u32
2889xscale2pmu_read_counter(int counter)
2890{
2891 u32 val = 0;
2892
2893 switch (counter) {
2894 case XSCALE_CYCLE_COUNTER:
2895 asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
2896 break;
2897 case XSCALE_COUNTER0:
2898 asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
2899 break;
2900 case XSCALE_COUNTER1:
2901 asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
2902 break;
2903 case XSCALE_COUNTER2:
2904 asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
2905 break;
2906 case XSCALE_COUNTER3:
2907 asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
2908 break;
2909 }
2910
2911 return val;
2912}
2913
2914static inline void
2915xscale2pmu_write_counter(int counter, u32 val)
2916{
2917 switch (counter) {
2918 case XSCALE_CYCLE_COUNTER:
2919 asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
2920 break;
2921 case XSCALE_COUNTER0:
2922 asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
2923 break;
2924 case XSCALE_COUNTER1:
2925 asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
2926 break;
2927 case XSCALE_COUNTER2:
2928 asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
2929 break;
2930 case XSCALE_COUNTER3:
2931 asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
2932 break;
2933 }
2934}
2935
2936static const struct arm_pmu xscale2pmu = {
2937 .id = ARM_PERF_PMU_ID_XSCALE2,
2938 .name = "xscale2",
2939 .handle_irq = xscale2pmu_handle_irq,
2940 .enable = xscale2pmu_enable_event,
2941 .disable = xscale2pmu_disable_event,
2942 .read_counter = xscale2pmu_read_counter,
2943 .write_counter = xscale2pmu_write_counter,
2944 .get_event_idx = xscale2pmu_get_event_idx,
2945 .start = xscale2pmu_start,
2946 .stop = xscale2pmu_stop,
2947 .cache_map = &xscale_perf_cache_map,
2948 .event_map = &xscale_perf_map,
2949 .raw_event_mask = 0xFF,
2950 .num_events = 5,
2951 .max_period = (1LLU << 32) - 1,
2952};
2953
2954const struct arm_pmu *__init xscale2pmu_init(void)
2955{
2956 return &xscale2pmu;
2957}
2958 611
2959static int __init 612static int __init
2960init_hw_perf_events(void) 613init_hw_perf_events(void)
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
new file mode 100644
index 000000000000..7aeb07da9076
--- /dev/null
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -0,0 +1,672 @@
1/*
2 * ARMv6 Performance counter handling code.
3 *
4 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
5 *
6 * ARMv6 has 2 configurable performance counters and a single cycle counter.
7 * They all share a single reset bit but can be written to zero so we can use
8 * that for a reset.
9 *
10 * The counters can't be individually enabled or disabled so when we remove
11 * one event and replace it with another we could get spurious counts from the
12 * wrong event. However, we can take advantage of the fact that the
13 * performance counters can export events to the event bus, and the event bus
14 * itself can be monitored. This requires that we *don't* export the events to
15 * the event bus. The procedure for disabling a configurable counter is:
16 * - change the counter to count the ETMEXTOUT[0] signal (0x20). This
17 * effectively stops the counter from counting.
18 * - disable the counter's interrupt generation (each counter has it's
19 * own interrupt enable bit).
20 * Once stopped, the counter value can be written as 0 to reset.
21 *
22 * To enable a counter:
23 * - enable the counter's interrupt generation.
24 * - set the new event type.
25 *
26 * Note: the dedicated cycle counter only counts cycles and can't be
27 * enabled/disabled independently of the others. When we want to disable the
28 * cycle counter, we have to just disable the interrupt reporting and start
29 * ignoring that counter. When re-enabling, we have to reset the value and
30 * enable the interrupt.
31 */
32
33#ifdef CONFIG_CPU_V6
34enum armv6_perf_types {
35 ARMV6_PERFCTR_ICACHE_MISS = 0x0,
36 ARMV6_PERFCTR_IBUF_STALL = 0x1,
37 ARMV6_PERFCTR_DDEP_STALL = 0x2,
38 ARMV6_PERFCTR_ITLB_MISS = 0x3,
39 ARMV6_PERFCTR_DTLB_MISS = 0x4,
40 ARMV6_PERFCTR_BR_EXEC = 0x5,
41 ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
42 ARMV6_PERFCTR_INSTR_EXEC = 0x7,
43 ARMV6_PERFCTR_DCACHE_HIT = 0x9,
44 ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
45 ARMV6_PERFCTR_DCACHE_MISS = 0xB,
46 ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
47 ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
48 ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
49 ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
50 ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
51 ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
52 ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
53 ARMV6_PERFCTR_NOP = 0x20,
54};
55
56enum armv6_counters {
57 ARMV6_CYCLE_COUNTER = 1,
58 ARMV6_COUNTER0,
59 ARMV6_COUNTER1,
60};
61
62/*
63 * The hardware events that we support. We do support cache operations but
64 * we have harvard caches and no way to combine instruction and data
65 * accesses/misses in hardware.
66 */
67static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
68 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
69 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
70 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
71 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
72 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
73 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
74 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
75};
76
77static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
78 [PERF_COUNT_HW_CACHE_OP_MAX]
79 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
80 [C(L1D)] = {
81 /*
82 * The performance counters don't differentiate between read
83 * and write accesses/misses so this isn't strictly correct,
84 * but it's the best we can do. Writes and reads get
85 * combined.
86 */
87 [C(OP_READ)] = {
88 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
89 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
90 },
91 [C(OP_WRITE)] = {
92 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
93 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
94 },
95 [C(OP_PREFETCH)] = {
96 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
97 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
98 },
99 },
100 [C(L1I)] = {
101 [C(OP_READ)] = {
102 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
103 [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
104 },
105 [C(OP_WRITE)] = {
106 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
107 [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
108 },
109 [C(OP_PREFETCH)] = {
110 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
111 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
112 },
113 },
114 [C(LL)] = {
115 [C(OP_READ)] = {
116 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
117 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
118 },
119 [C(OP_WRITE)] = {
120 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
121 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
122 },
123 [C(OP_PREFETCH)] = {
124 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
125 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
126 },
127 },
128 [C(DTLB)] = {
129 /*
130 * The ARM performance counters can count micro DTLB misses,
131 * micro ITLB misses and main TLB misses. There isn't an event
132 * for TLB misses, so use the micro misses here and if users
133 * want the main TLB misses they can use a raw counter.
134 */
135 [C(OP_READ)] = {
136 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
137 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
138 },
139 [C(OP_WRITE)] = {
140 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
141 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
142 },
143 [C(OP_PREFETCH)] = {
144 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
145 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
146 },
147 },
148 [C(ITLB)] = {
149 [C(OP_READ)] = {
150 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
151 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
152 },
153 [C(OP_WRITE)] = {
154 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
155 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
156 },
157 [C(OP_PREFETCH)] = {
158 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
159 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
160 },
161 },
162 [C(BPU)] = {
163 [C(OP_READ)] = {
164 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
165 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
166 },
167 [C(OP_WRITE)] = {
168 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
169 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
170 },
171 [C(OP_PREFETCH)] = {
172 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
173 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
174 },
175 },
176};
177
178enum armv6mpcore_perf_types {
179 ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0,
180 ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1,
181 ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2,
182 ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3,
183 ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4,
184 ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5,
185 ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6,
186 ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7,
187 ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8,
188 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
189 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB,
190 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
191 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD,
192 ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
193 ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF,
194 ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10,
195 ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
196 ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12,
197 ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13,
198 ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF,
199};
200
201/*
202 * The hardware events that we support. We do support cache operations but
203 * we have harvard caches and no way to combine instruction and data
204 * accesses/misses in hardware.
205 */
206static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
207 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
208 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
209 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
210 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
211 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
212 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
213 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
214};
215
216static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
217 [PERF_COUNT_HW_CACHE_OP_MAX]
218 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
219 [C(L1D)] = {
220 [C(OP_READ)] = {
221 [C(RESULT_ACCESS)] =
222 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
223 [C(RESULT_MISS)] =
224 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
225 },
226 [C(OP_WRITE)] = {
227 [C(RESULT_ACCESS)] =
228 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
229 [C(RESULT_MISS)] =
230 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
231 },
232 [C(OP_PREFETCH)] = {
233 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
234 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
235 },
236 },
237 [C(L1I)] = {
238 [C(OP_READ)] = {
239 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
240 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
241 },
242 [C(OP_WRITE)] = {
243 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
244 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
245 },
246 [C(OP_PREFETCH)] = {
247 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
248 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
249 },
250 },
251 [C(LL)] = {
252 [C(OP_READ)] = {
253 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
254 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
255 },
256 [C(OP_WRITE)] = {
257 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
258 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
259 },
260 [C(OP_PREFETCH)] = {
261 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
262 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
263 },
264 },
265 [C(DTLB)] = {
266 /*
267 * The ARM performance counters can count micro DTLB misses,
268 * micro ITLB misses and main TLB misses. There isn't an event
269 * for TLB misses, so use the micro misses here and if users
270 * want the main TLB misses they can use a raw counter.
271 */
272 [C(OP_READ)] = {
273 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
274 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
275 },
276 [C(OP_WRITE)] = {
277 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
278 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
279 },
280 [C(OP_PREFETCH)] = {
281 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
282 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
283 },
284 },
285 [C(ITLB)] = {
286 [C(OP_READ)] = {
287 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
288 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
289 },
290 [C(OP_WRITE)] = {
291 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
292 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
293 },
294 [C(OP_PREFETCH)] = {
295 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
296 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
297 },
298 },
299 [C(BPU)] = {
300 [C(OP_READ)] = {
301 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
302 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
303 },
304 [C(OP_WRITE)] = {
305 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
306 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
307 },
308 [C(OP_PREFETCH)] = {
309 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
310 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
311 },
312 },
313};
314
315static inline unsigned long
316armv6_pmcr_read(void)
317{
318 u32 val;
319 asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
320 return val;
321}
322
323static inline void
324armv6_pmcr_write(unsigned long val)
325{
326 asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
327}
328
329#define ARMV6_PMCR_ENABLE (1 << 0)
330#define ARMV6_PMCR_CTR01_RESET (1 << 1)
331#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
332#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
333#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
334#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
335#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
336#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
337#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
338#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
339#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
340#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
341#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
342#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
343
344#define ARMV6_PMCR_OVERFLOWED_MASK \
345 (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
346 ARMV6_PMCR_CCOUNT_OVERFLOW)
347
348static inline int
349armv6_pmcr_has_overflowed(unsigned long pmcr)
350{
351 return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
352}
353
354static inline int
355armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
356 enum armv6_counters counter)
357{
358 int ret = 0;
359
360 if (ARMV6_CYCLE_COUNTER == counter)
361 ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
362 else if (ARMV6_COUNTER0 == counter)
363 ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
364 else if (ARMV6_COUNTER1 == counter)
365 ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
366 else
367 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
368
369 return ret;
370}
371
372static inline u32
373armv6pmu_read_counter(int counter)
374{
375 unsigned long value = 0;
376
377 if (ARMV6_CYCLE_COUNTER == counter)
378 asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
379 else if (ARMV6_COUNTER0 == counter)
380 asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
381 else if (ARMV6_COUNTER1 == counter)
382 asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
383 else
384 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
385
386 return value;
387}
388
389static inline void
390armv6pmu_write_counter(int counter,
391 u32 value)
392{
393 if (ARMV6_CYCLE_COUNTER == counter)
394 asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
395 else if (ARMV6_COUNTER0 == counter)
396 asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
397 else if (ARMV6_COUNTER1 == counter)
398 asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
399 else
400 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
401}
402
403void
404armv6pmu_enable_event(struct hw_perf_event *hwc,
405 int idx)
406{
407 unsigned long val, mask, evt, flags;
408
409 if (ARMV6_CYCLE_COUNTER == idx) {
410 mask = 0;
411 evt = ARMV6_PMCR_CCOUNT_IEN;
412 } else if (ARMV6_COUNTER0 == idx) {
413 mask = ARMV6_PMCR_EVT_COUNT0_MASK;
414 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
415 ARMV6_PMCR_COUNT0_IEN;
416 } else if (ARMV6_COUNTER1 == idx) {
417 mask = ARMV6_PMCR_EVT_COUNT1_MASK;
418 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
419 ARMV6_PMCR_COUNT1_IEN;
420 } else {
421 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
422 return;
423 }
424
425 /*
426 * Mask out the current event and set the counter to count the event
427 * that we're interested in.
428 */
429 spin_lock_irqsave(&pmu_lock, flags);
430 val = armv6_pmcr_read();
431 val &= ~mask;
432 val |= evt;
433 armv6_pmcr_write(val);
434 spin_unlock_irqrestore(&pmu_lock, flags);
435}
436
437static irqreturn_t
438armv6pmu_handle_irq(int irq_num,
439 void *dev)
440{
441 unsigned long pmcr = armv6_pmcr_read();
442 struct perf_sample_data data;
443 struct cpu_hw_events *cpuc;
444 struct pt_regs *regs;
445 int idx;
446
447 if (!armv6_pmcr_has_overflowed(pmcr))
448 return IRQ_NONE;
449
450 regs = get_irq_regs();
451
452 /*
453 * The interrupts are cleared by writing the overflow flags back to
454 * the control register. All of the other bits don't have any effect
455 * if they are rewritten, so write the whole value back.
456 */
457 armv6_pmcr_write(pmcr);
458
459 perf_sample_data_init(&data, 0);
460
461 cpuc = &__get_cpu_var(cpu_hw_events);
462 for (idx = 0; idx <= armpmu->num_events; ++idx) {
463 struct perf_event *event = cpuc->events[idx];
464 struct hw_perf_event *hwc;
465
466 if (!test_bit(idx, cpuc->active_mask))
467 continue;
468
469 /*
470 * We have a single interrupt for all counters. Check that
471 * each counter has overflowed before we process it.
472 */
473 if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
474 continue;
475
476 hwc = &event->hw;
477 armpmu_event_update(event, hwc, idx);
478 data.period = event->hw.last_period;
479 if (!armpmu_event_set_period(event, hwc, idx))
480 continue;
481
482 if (perf_event_overflow(event, 0, &data, regs))
483 armpmu->disable(hwc, idx);
484 }
485
486 /*
487 * Handle the pending perf events.
488 *
489 * Note: this call *must* be run with interrupts disabled. For
490 * platforms that can have the PMU interrupts raised as an NMI, this
491 * will not work.
492 */
493 irq_work_run();
494
495 return IRQ_HANDLED;
496}
497
498static void
499armv6pmu_start(void)
500{
501 unsigned long flags, val;
502
503 spin_lock_irqsave(&pmu_lock, flags);
504 val = armv6_pmcr_read();
505 val |= ARMV6_PMCR_ENABLE;
506 armv6_pmcr_write(val);
507 spin_unlock_irqrestore(&pmu_lock, flags);
508}
509
510static void
511armv6pmu_stop(void)
512{
513 unsigned long flags, val;
514
515 spin_lock_irqsave(&pmu_lock, flags);
516 val = armv6_pmcr_read();
517 val &= ~ARMV6_PMCR_ENABLE;
518 armv6_pmcr_write(val);
519 spin_unlock_irqrestore(&pmu_lock, flags);
520}
521
522static int
523armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
524 struct hw_perf_event *event)
525{
526 /* Always place a cycle counter into the cycle counter. */
527 if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
528 if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
529 return -EAGAIN;
530
531 return ARMV6_CYCLE_COUNTER;
532 } else {
533 /*
534 * For anything other than a cycle counter, try and use
535 * counter0 and counter1.
536 */
537 if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
538 return ARMV6_COUNTER1;
539
540 if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
541 return ARMV6_COUNTER0;
542
543 /* The counters are all in use. */
544 return -EAGAIN;
545 }
546}
547
548static void
549armv6pmu_disable_event(struct hw_perf_event *hwc,
550 int idx)
551{
552 unsigned long val, mask, evt, flags;
553
554 if (ARMV6_CYCLE_COUNTER == idx) {
555 mask = ARMV6_PMCR_CCOUNT_IEN;
556 evt = 0;
557 } else if (ARMV6_COUNTER0 == idx) {
558 mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
559 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
560 } else if (ARMV6_COUNTER1 == idx) {
561 mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
562 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
563 } else {
564 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
565 return;
566 }
567
568 /*
569 * Mask out the current event and set the counter to count the number
570 * of ETM bus signal assertion cycles. The external reporting should
571 * be disabled and so this should never increment.
572 */
573 spin_lock_irqsave(&pmu_lock, flags);
574 val = armv6_pmcr_read();
575 val &= ~mask;
576 val |= evt;
577 armv6_pmcr_write(val);
578 spin_unlock_irqrestore(&pmu_lock, flags);
579}
580
581static void
582armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
583 int idx)
584{
585 unsigned long val, mask, flags, evt = 0;
586
587 if (ARMV6_CYCLE_COUNTER == idx) {
588 mask = ARMV6_PMCR_CCOUNT_IEN;
589 } else if (ARMV6_COUNTER0 == idx) {
590 mask = ARMV6_PMCR_COUNT0_IEN;
591 } else if (ARMV6_COUNTER1 == idx) {
592 mask = ARMV6_PMCR_COUNT1_IEN;
593 } else {
594 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
595 return;
596 }
597
598 /*
599 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
600 * simply disable the interrupt reporting.
601 */
602 spin_lock_irqsave(&pmu_lock, flags);
603 val = armv6_pmcr_read();
604 val &= ~mask;
605 val |= evt;
606 armv6_pmcr_write(val);
607 spin_unlock_irqrestore(&pmu_lock, flags);
608}
609
610static const struct arm_pmu armv6pmu = {
611 .id = ARM_PERF_PMU_ID_V6,
612 .name = "v6",
613 .handle_irq = armv6pmu_handle_irq,
614 .enable = armv6pmu_enable_event,
615 .disable = armv6pmu_disable_event,
616 .read_counter = armv6pmu_read_counter,
617 .write_counter = armv6pmu_write_counter,
618 .get_event_idx = armv6pmu_get_event_idx,
619 .start = armv6pmu_start,
620 .stop = armv6pmu_stop,
621 .cache_map = &armv6_perf_cache_map,
622 .event_map = &armv6_perf_map,
623 .raw_event_mask = 0xFF,
624 .num_events = 3,
625 .max_period = (1LLU << 32) - 1,
626};
627
628const struct arm_pmu *__init armv6pmu_init(void)
629{
630 return &armv6pmu;
631}
632
633/*
634 * ARMv6mpcore is almost identical to single core ARMv6 with the exception
635 * that some of the events have different enumerations and that there is no
636 * *hack* to stop the programmable counters. To stop the counters we simply
637 * disable the interrupt reporting and update the event. When unthrottling we
638 * reset the period and enable the interrupt reporting.
639 */
640static const struct arm_pmu armv6mpcore_pmu = {
641 .id = ARM_PERF_PMU_ID_V6MP,
642 .name = "v6mpcore",
643 .handle_irq = armv6pmu_handle_irq,
644 .enable = armv6pmu_enable_event,
645 .disable = armv6mpcore_pmu_disable_event,
646 .read_counter = armv6pmu_read_counter,
647 .write_counter = armv6pmu_write_counter,
648 .get_event_idx = armv6pmu_get_event_idx,
649 .start = armv6pmu_start,
650 .stop = armv6pmu_stop,
651 .cache_map = &armv6mpcore_perf_cache_map,
652 .event_map = &armv6mpcore_perf_map,
653 .raw_event_mask = 0xFF,
654 .num_events = 3,
655 .max_period = (1LLU << 32) - 1,
656};
657
658const struct arm_pmu *__init armv6mpcore_pmu_init(void)
659{
660 return &armv6mpcore_pmu;
661}
662#else
663const struct arm_pmu *__init armv6pmu_init(void)
664{
665 return NULL;
666}
667
668const struct arm_pmu *__init armv6mpcore_pmu_init(void)
669{
670 return NULL;
671}
672#endif /* CONFIG_CPU_V6 */
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
new file mode 100644
index 000000000000..4d0423969df9
--- /dev/null
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -0,0 +1,906 @@
1/*
2 * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
3 *
4 * ARMv7 support: Jean Pihet <jpihet@mvista.com>
5 * 2010 (c) MontaVista Software, LLC.
6 *
7 * Copied from ARMv6 code, with the low level code inspired
8 * by the ARMv7 Oprofile code.
9 *
10 * Cortex-A8 has up to 4 configurable performance counters and
11 * a single cycle counter.
12 * Cortex-A9 has up to 31 configurable performance counters and
13 * a single cycle counter.
14 *
15 * All counters can be enabled/disabled and IRQ masked separately. The cycle
16 * counter and all 4 performance counters together can be reset separately.
17 */
18
19#ifdef CONFIG_CPU_V7
20/* Common ARMv7 event types */
21enum armv7_perf_types {
22 ARMV7_PERFCTR_PMNC_SW_INCR = 0x00,
23 ARMV7_PERFCTR_IFETCH_MISS = 0x01,
24 ARMV7_PERFCTR_ITLB_MISS = 0x02,
25 ARMV7_PERFCTR_DCACHE_REFILL = 0x03,
26 ARMV7_PERFCTR_DCACHE_ACCESS = 0x04,
27 ARMV7_PERFCTR_DTLB_REFILL = 0x05,
28 ARMV7_PERFCTR_DREAD = 0x06,
29 ARMV7_PERFCTR_DWRITE = 0x07,
30
31 ARMV7_PERFCTR_EXC_TAKEN = 0x09,
32 ARMV7_PERFCTR_EXC_EXECUTED = 0x0A,
33 ARMV7_PERFCTR_CID_WRITE = 0x0B,
34 /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
35 * It counts:
36 * - all branch instructions,
37 * - instructions that explicitly write the PC,
38 * - exception generating instructions.
39 */
40 ARMV7_PERFCTR_PC_WRITE = 0x0C,
41 ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D,
42 ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F,
43 ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10,
44 ARMV7_PERFCTR_CLOCK_CYCLES = 0x11,
45
46 ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12,
47
48 ARMV7_PERFCTR_CPU_CYCLES = 0xFF
49};
50
51/* ARMv7 Cortex-A8 specific event types */
52enum armv7_a8_perf_types {
53 ARMV7_PERFCTR_INSTR_EXECUTED = 0x08,
54
55 ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E,
56
57 ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40,
58 ARMV7_PERFCTR_L2_STORE_MERGED = 0x41,
59 ARMV7_PERFCTR_L2_STORE_BUFF = 0x42,
60 ARMV7_PERFCTR_L2_ACCESS = 0x43,
61 ARMV7_PERFCTR_L2_CACH_MISS = 0x44,
62 ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45,
63 ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46,
64 ARMV7_PERFCTR_MEMORY_REPLAY = 0x47,
65 ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48,
66 ARMV7_PERFCTR_L1_DATA_MISS = 0x49,
67 ARMV7_PERFCTR_L1_INST_MISS = 0x4A,
68 ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B,
69 ARMV7_PERFCTR_L1_NEON_DATA = 0x4C,
70 ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D,
71 ARMV7_PERFCTR_L2_NEON = 0x4E,
72 ARMV7_PERFCTR_L2_NEON_HIT = 0x4F,
73 ARMV7_PERFCTR_L1_INST = 0x50,
74 ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51,
75 ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52,
76 ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53,
77 ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54,
78 ARMV7_PERFCTR_OP_EXECUTED = 0x55,
79 ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56,
80 ARMV7_PERFCTR_CYCLES_INST = 0x57,
81 ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58,
82 ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59,
83 ARMV7_PERFCTR_NEON_CYCLES = 0x5A,
84
85 ARMV7_PERFCTR_PMU0_EVENTS = 0x70,
86 ARMV7_PERFCTR_PMU1_EVENTS = 0x71,
87 ARMV7_PERFCTR_PMU_EVENTS = 0x72,
88};
89
90/* ARMv7 Cortex-A9 specific event types */
91enum armv7_a9_perf_types {
92 ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40,
93 ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41,
94 ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42,
95
96 ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50,
97 ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51,
98
99 ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60,
100 ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61,
101 ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62,
102 ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63,
103 ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64,
104 ARMV7_PERFCTR_DATA_EVICTION = 0x65,
105 ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66,
106 ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67,
107 ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68,
108
109 ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E,
110
111 ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70,
112 ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71,
113 ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72,
114 ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73,
115 ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74,
116
117 ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80,
118 ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81,
119 ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82,
120 ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83,
121 ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84,
122 ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85,
123 ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86,
124
125 ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A,
126 ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B,
127
128 ARMV7_PERFCTR_ISB_INST = 0x90,
129 ARMV7_PERFCTR_DSB_INST = 0x91,
130 ARMV7_PERFCTR_DMB_INST = 0x92,
131 ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93,
132
133 ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0,
134 ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1,
135 ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2,
136 ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3,
137 ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4,
138 ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5
139};
140
141/*
142 * Cortex-A8 HW events mapping
143 *
144 * The hardware events that we support. We do support cache operations but
145 * we have harvard caches and no way to combine instruction and data
146 * accesses/misses in hardware.
147 */
148static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
149 [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
150 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
151 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
152 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
153 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
154 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
155 [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
156};
157
158static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
159 [PERF_COUNT_HW_CACHE_OP_MAX]
160 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
161 [C(L1D)] = {
162 /*
163 * The performance counters don't differentiate between read
164 * and write accesses/misses so this isn't strictly correct,
165 * but it's the best we can do. Writes and reads get
166 * combined.
167 */
168 [C(OP_READ)] = {
169 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
170 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
171 },
172 [C(OP_WRITE)] = {
173 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
174 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
175 },
176 [C(OP_PREFETCH)] = {
177 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
178 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
179 },
180 },
181 [C(L1I)] = {
182 [C(OP_READ)] = {
183 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
184 [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
185 },
186 [C(OP_WRITE)] = {
187 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
188 [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
189 },
190 [C(OP_PREFETCH)] = {
191 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
192 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
193 },
194 },
195 [C(LL)] = {
196 [C(OP_READ)] = {
197 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
198 [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
199 },
200 [C(OP_WRITE)] = {
201 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
202 [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
203 },
204 [C(OP_PREFETCH)] = {
205 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
206 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
207 },
208 },
209 [C(DTLB)] = {
210 /*
211 * Only ITLB misses and DTLB refills are supported.
212 * If users want the DTLB refills misses a raw counter
213 * must be used.
214 */
215 [C(OP_READ)] = {
216 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
217 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
218 },
219 [C(OP_WRITE)] = {
220 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
221 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
222 },
223 [C(OP_PREFETCH)] = {
224 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
225 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
226 },
227 },
228 [C(ITLB)] = {
229 [C(OP_READ)] = {
230 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
231 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
232 },
233 [C(OP_WRITE)] = {
234 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
235 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
236 },
237 [C(OP_PREFETCH)] = {
238 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
239 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
240 },
241 },
242 [C(BPU)] = {
243 [C(OP_READ)] = {
244 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
245 [C(RESULT_MISS)]
246 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
247 },
248 [C(OP_WRITE)] = {
249 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
250 [C(RESULT_MISS)]
251 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
252 },
253 [C(OP_PREFETCH)] = {
254 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
255 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
256 },
257 },
258};
259
260/*
261 * Cortex-A9 HW events mapping
262 */
263static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
264 [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
265 [PERF_COUNT_HW_INSTRUCTIONS] =
266 ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
267 [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT,
268 [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS,
269 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
270 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
271 [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
272};
273
274static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
275 [PERF_COUNT_HW_CACHE_OP_MAX]
276 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
277 [C(L1D)] = {
278 /*
279 * The performance counters don't differentiate between read
280 * and write accesses/misses so this isn't strictly correct,
281 * but it's the best we can do. Writes and reads get
282 * combined.
283 */
284 [C(OP_READ)] = {
285 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
286 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
287 },
288 [C(OP_WRITE)] = {
289 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
290 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
291 },
292 [C(OP_PREFETCH)] = {
293 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
294 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
295 },
296 },
297 [C(L1I)] = {
298 [C(OP_READ)] = {
299 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
300 [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
301 },
302 [C(OP_WRITE)] = {
303 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
304 [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
305 },
306 [C(OP_PREFETCH)] = {
307 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
308 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
309 },
310 },
311 [C(LL)] = {
312 [C(OP_READ)] = {
313 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
314 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
315 },
316 [C(OP_WRITE)] = {
317 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
318 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
319 },
320 [C(OP_PREFETCH)] = {
321 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
322 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
323 },
324 },
325 [C(DTLB)] = {
326 /*
327 * Only ITLB misses and DTLB refills are supported.
328 * If users want the DTLB refills misses a raw counter
329 * must be used.
330 */
331 [C(OP_READ)] = {
332 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
333 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
334 },
335 [C(OP_WRITE)] = {
336 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
337 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
338 },
339 [C(OP_PREFETCH)] = {
340 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
341 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
342 },
343 },
344 [C(ITLB)] = {
345 [C(OP_READ)] = {
346 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
347 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
348 },
349 [C(OP_WRITE)] = {
350 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
351 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
352 },
353 [C(OP_PREFETCH)] = {
354 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
355 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
356 },
357 },
358 [C(BPU)] = {
359 [C(OP_READ)] = {
360 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
361 [C(RESULT_MISS)]
362 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
363 },
364 [C(OP_WRITE)] = {
365 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
366 [C(RESULT_MISS)]
367 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
368 },
369 [C(OP_PREFETCH)] = {
370 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
371 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
372 },
373 },
374};
375
376/*
377 * Perf Events counters
378 */
379enum armv7_counters {
380 ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */
381 ARMV7_COUNTER0 = 2, /* First event counter */
382};
383
384/*
385 * The cycle counter is ARMV7_CYCLE_COUNTER.
386 * The first event counter is ARMV7_COUNTER0.
387 * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
388 */
389#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1)
390
391/*
392 * ARMv7 low level PMNC access
393 */
394
395/*
396 * Per-CPU PMNC: config reg
397 */
398#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */
399#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */
400#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */
401#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */
402#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */
403#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
404#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */
405#define ARMV7_PMNC_N_MASK 0x1f
406#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */
407
408/*
409 * Available counters
410 */
411#define ARMV7_CNT0 0 /* First event counter */
412#define ARMV7_CCNT 31 /* Cycle counter */
413
414/* Perf Event to low level counters mapping */
415#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
416
417/*
418 * CNTENS: counters enable reg
419 */
420#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
421#define ARMV7_CNTENS_C (1 << ARMV7_CCNT)
422
423/*
424 * CNTENC: counters disable reg
425 */
426#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
427#define ARMV7_CNTENC_C (1 << ARMV7_CCNT)
428
429/*
430 * INTENS: counters overflow interrupt enable reg
431 */
432#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
433#define ARMV7_INTENS_C (1 << ARMV7_CCNT)
434
435/*
436 * INTENC: counters overflow interrupt disable reg
437 */
438#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
439#define ARMV7_INTENC_C (1 << ARMV7_CCNT)
440
441/*
442 * EVTSEL: Event selection reg
443 */
444#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */
445
446/*
447 * SELECT: Counter selection reg
448 */
449#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */
450
451/*
452 * FLAG: counters overflow flag status reg
453 */
454#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
455#define ARMV7_FLAG_C (1 << ARMV7_CCNT)
456#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */
457#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
458
459static inline unsigned long armv7_pmnc_read(void)
460{
461 u32 val;
462 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
463 return val;
464}
465
466static inline void armv7_pmnc_write(unsigned long val)
467{
468 val &= ARMV7_PMNC_MASK;
469 asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
470}
471
472static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
473{
474 return pmnc & ARMV7_OVERFLOWED_MASK;
475}
476
477static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
478 enum armv7_counters counter)
479{
480 int ret = 0;
481
482 if (counter == ARMV7_CYCLE_COUNTER)
483 ret = pmnc & ARMV7_FLAG_C;
484 else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
485 ret = pmnc & ARMV7_FLAG_P(counter);
486 else
487 pr_err("CPU%u checking wrong counter %d overflow status\n",
488 smp_processor_id(), counter);
489
490 return ret;
491}
492
493static inline int armv7_pmnc_select_counter(unsigned int idx)
494{
495 u32 val;
496
497 if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
498 pr_err("CPU%u selecting wrong PMNC counter"
499 " %d\n", smp_processor_id(), idx);
500 return -1;
501 }
502
503 val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
504 asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
505
506 return idx;
507}
508
509static inline u32 armv7pmu_read_counter(int idx)
510{
511 unsigned long value = 0;
512
513 if (idx == ARMV7_CYCLE_COUNTER)
514 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
515 else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
516 if (armv7_pmnc_select_counter(idx) == idx)
517 asm volatile("mrc p15, 0, %0, c9, c13, 2"
518 : "=r" (value));
519 } else
520 pr_err("CPU%u reading wrong counter %d\n",
521 smp_processor_id(), idx);
522
523 return value;
524}
525
526static inline void armv7pmu_write_counter(int idx, u32 value)
527{
528 if (idx == ARMV7_CYCLE_COUNTER)
529 asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
530 else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
531 if (armv7_pmnc_select_counter(idx) == idx)
532 asm volatile("mcr p15, 0, %0, c9, c13, 2"
533 : : "r" (value));
534 } else
535 pr_err("CPU%u writing wrong counter %d\n",
536 smp_processor_id(), idx);
537}
538
539static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
540{
541 if (armv7_pmnc_select_counter(idx) == idx) {
542 val &= ARMV7_EVTSEL_MASK;
543 asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
544 }
545}
546
547static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
548{
549 u32 val;
550
551 if ((idx != ARMV7_CYCLE_COUNTER) &&
552 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
553 pr_err("CPU%u enabling wrong PMNC counter"
554 " %d\n", smp_processor_id(), idx);
555 return -1;
556 }
557
558 if (idx == ARMV7_CYCLE_COUNTER)
559 val = ARMV7_CNTENS_C;
560 else
561 val = ARMV7_CNTENS_P(idx);
562
563 asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
564
565 return idx;
566}
567
568static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
569{
570 u32 val;
571
572
573 if ((idx != ARMV7_CYCLE_COUNTER) &&
574 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
575 pr_err("CPU%u disabling wrong PMNC counter"
576 " %d\n", smp_processor_id(), idx);
577 return -1;
578 }
579
580 if (idx == ARMV7_CYCLE_COUNTER)
581 val = ARMV7_CNTENC_C;
582 else
583 val = ARMV7_CNTENC_P(idx);
584
585 asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
586
587 return idx;
588}
589
590static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
591{
592 u32 val;
593
594 if ((idx != ARMV7_CYCLE_COUNTER) &&
595 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
596 pr_err("CPU%u enabling wrong PMNC counter"
597 " interrupt enable %d\n", smp_processor_id(), idx);
598 return -1;
599 }
600
601 if (idx == ARMV7_CYCLE_COUNTER)
602 val = ARMV7_INTENS_C;
603 else
604 val = ARMV7_INTENS_P(idx);
605
606 asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
607
608 return idx;
609}
610
611static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
612{
613 u32 val;
614
615 if ((idx != ARMV7_CYCLE_COUNTER) &&
616 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
617 pr_err("CPU%u disabling wrong PMNC counter"
618 " interrupt enable %d\n", smp_processor_id(), idx);
619 return -1;
620 }
621
622 if (idx == ARMV7_CYCLE_COUNTER)
623 val = ARMV7_INTENC_C;
624 else
625 val = ARMV7_INTENC_P(idx);
626
627 asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
628
629 return idx;
630}
631
632static inline u32 armv7_pmnc_getreset_flags(void)
633{
634 u32 val;
635
636 /* Read */
637 asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
638
639 /* Write to clear flags */
640 val &= ARMV7_FLAG_MASK;
641 asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
642
643 return val;
644}
645
646#ifdef DEBUG
647static void armv7_pmnc_dump_regs(void)
648{
649 u32 val;
650 unsigned int cnt;
651
652 printk(KERN_INFO "PMNC registers dump:\n");
653
654 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
655 printk(KERN_INFO "PMNC =0x%08x\n", val);
656
657 asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
658 printk(KERN_INFO "CNTENS=0x%08x\n", val);
659
660 asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
661 printk(KERN_INFO "INTENS=0x%08x\n", val);
662
663 asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
664 printk(KERN_INFO "FLAGS =0x%08x\n", val);
665
666 asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
667 printk(KERN_INFO "SELECT=0x%08x\n", val);
668
669 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
670 printk(KERN_INFO "CCNT =0x%08x\n", val);
671
672 for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
673 armv7_pmnc_select_counter(cnt);
674 asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
675 printk(KERN_INFO "CNT[%d] count =0x%08x\n",
676 cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
677 asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
678 printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
679 cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
680 }
681}
682#endif
683
684void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
685{
686 unsigned long flags;
687
688 /*
689 * Enable counter and interrupt, and set the counter to count
690 * the event that we're interested in.
691 */
692 spin_lock_irqsave(&pmu_lock, flags);
693
694 /*
695 * Disable counter
696 */
697 armv7_pmnc_disable_counter(idx);
698
699 /*
700 * Set event (if destined for PMNx counters)
701 * We don't need to set the event if it's a cycle count
702 */
703 if (idx != ARMV7_CYCLE_COUNTER)
704 armv7_pmnc_write_evtsel(idx, hwc->config_base);
705
706 /*
707 * Enable interrupt for this counter
708 */
709 armv7_pmnc_enable_intens(idx);
710
711 /*
712 * Enable counter
713 */
714 armv7_pmnc_enable_counter(idx);
715
716 spin_unlock_irqrestore(&pmu_lock, flags);
717}
718
719static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
720{
721 unsigned long flags;
722
723 /*
724 * Disable counter and interrupt
725 */
726 spin_lock_irqsave(&pmu_lock, flags);
727
728 /*
729 * Disable counter
730 */
731 armv7_pmnc_disable_counter(idx);
732
733 /*
734 * Disable interrupt for this counter
735 */
736 armv7_pmnc_disable_intens(idx);
737
738 spin_unlock_irqrestore(&pmu_lock, flags);
739}
740
741static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
742{
743 unsigned long pmnc;
744 struct perf_sample_data data;
745 struct cpu_hw_events *cpuc;
746 struct pt_regs *regs;
747 int idx;
748
749 /*
750 * Get and reset the IRQ flags
751 */
752 pmnc = armv7_pmnc_getreset_flags();
753
754 /*
755 * Did an overflow occur?
756 */
757 if (!armv7_pmnc_has_overflowed(pmnc))
758 return IRQ_NONE;
759
760 /*
761 * Handle the counter(s) overflow(s)
762 */
763 regs = get_irq_regs();
764
765 perf_sample_data_init(&data, 0);
766
767 cpuc = &__get_cpu_var(cpu_hw_events);
768 for (idx = 0; idx <= armpmu->num_events; ++idx) {
769 struct perf_event *event = cpuc->events[idx];
770 struct hw_perf_event *hwc;
771
772 if (!test_bit(idx, cpuc->active_mask))
773 continue;
774
775 /*
776 * We have a single interrupt for all counters. Check that
777 * each counter has overflowed before we process it.
778 */
779 if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
780 continue;
781
782 hwc = &event->hw;
783 armpmu_event_update(event, hwc, idx);
784 data.period = event->hw.last_period;
785 if (!armpmu_event_set_period(event, hwc, idx))
786 continue;
787
788 if (perf_event_overflow(event, 0, &data, regs))
789 armpmu->disable(hwc, idx);
790 }
791
792 /*
793 * Handle the pending perf events.
794 *
795 * Note: this call *must* be run with interrupts disabled. For
796 * platforms that can have the PMU interrupts raised as an NMI, this
797 * will not work.
798 */
799 irq_work_run();
800
801 return IRQ_HANDLED;
802}
803
804static void armv7pmu_start(void)
805{
806 unsigned long flags;
807
808 spin_lock_irqsave(&pmu_lock, flags);
809 /* Enable all counters */
810 armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
811 spin_unlock_irqrestore(&pmu_lock, flags);
812}
813
814static void armv7pmu_stop(void)
815{
816 unsigned long flags;
817
818 spin_lock_irqsave(&pmu_lock, flags);
819 /* Disable all counters */
820 armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
821 spin_unlock_irqrestore(&pmu_lock, flags);
822}
823
824static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
825 struct hw_perf_event *event)
826{
827 int idx;
828
829 /* Always place a cycle counter into the cycle counter. */
830 if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
831 if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
832 return -EAGAIN;
833
834 return ARMV7_CYCLE_COUNTER;
835 } else {
836 /*
837 * For anything other than a cycle counter, try and use
838 * the events counters
839 */
840 for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
841 if (!test_and_set_bit(idx, cpuc->used_mask))
842 return idx;
843 }
844
845 /* The counters are all in use. */
846 return -EAGAIN;
847 }
848}
849
850static struct arm_pmu armv7pmu = {
851 .handle_irq = armv7pmu_handle_irq,
852 .enable = armv7pmu_enable_event,
853 .disable = armv7pmu_disable_event,
854 .read_counter = armv7pmu_read_counter,
855 .write_counter = armv7pmu_write_counter,
856 .get_event_idx = armv7pmu_get_event_idx,
857 .start = armv7pmu_start,
858 .stop = armv7pmu_stop,
859 .raw_event_mask = 0xFF,
860 .max_period = (1LLU << 32) - 1,
861};
862
863static u32 __init armv7_reset_read_pmnc(void)
864{
865 u32 nb_cnt;
866
867 /* Initialize & Reset PMNC: C and P bits */
868 armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
869
870 /* Read the nb of CNTx counters supported from PMNC */
871 nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
872
873 /* Add the CPU cycles counter and return */
874 return nb_cnt + 1;
875}
876
877const struct arm_pmu *__init armv7_a8_pmu_init(void)
878{
879 armv7pmu.id = ARM_PERF_PMU_ID_CA8;
880 armv7pmu.name = "ARMv7 Cortex-A8";
881 armv7pmu.cache_map = &armv7_a8_perf_cache_map;
882 armv7pmu.event_map = &armv7_a8_perf_map;
883 armv7pmu.num_events = armv7_reset_read_pmnc();
884 return &armv7pmu;
885}
886
887const struct arm_pmu *__init armv7_a9_pmu_init(void)
888{
889 armv7pmu.id = ARM_PERF_PMU_ID_CA9;
890 armv7pmu.name = "ARMv7 Cortex-A9";
891 armv7pmu.cache_map = &armv7_a9_perf_cache_map;
892 armv7pmu.event_map = &armv7_a9_perf_map;
893 armv7pmu.num_events = armv7_reset_read_pmnc();
894 return &armv7pmu;
895}
896#else
897const struct arm_pmu *__init armv7_a8_pmu_init(void)
898{
899 return NULL;
900}
901
902const struct arm_pmu *__init armv7_a9_pmu_init(void)
903{
904 return NULL;
905}
906#endif /* CONFIG_CPU_V7 */
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
new file mode 100644
index 000000000000..4e9592789d40
--- /dev/null
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -0,0 +1,807 @@
1/*
2 * ARMv5 [xscale] Performance counter handling code.
3 *
4 * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com>
5 *
6 * Based on the previous xscale OProfile code.
7 *
8 * There are two variants of the xscale PMU that we support:
9 * - xscale1pmu: 2 event counters and a cycle counter
10 * - xscale2pmu: 4 event counters and a cycle counter
11 * The two variants share event definitions, but have different
12 * PMU structures.
13 */
14
15#ifdef CONFIG_CPU_XSCALE
16enum xscale_perf_types {
17 XSCALE_PERFCTR_ICACHE_MISS = 0x00,
18 XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
19 XSCALE_PERFCTR_DATA_STALL = 0x02,
20 XSCALE_PERFCTR_ITLB_MISS = 0x03,
21 XSCALE_PERFCTR_DTLB_MISS = 0x04,
22 XSCALE_PERFCTR_BRANCH = 0x05,
23 XSCALE_PERFCTR_BRANCH_MISS = 0x06,
24 XSCALE_PERFCTR_INSTRUCTION = 0x07,
25 XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08,
26 XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
27 XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A,
28 XSCALE_PERFCTR_DCACHE_MISS = 0x0B,
29 XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C,
30 XSCALE_PERFCTR_PC_CHANGED = 0x0D,
31 XSCALE_PERFCTR_BCU_REQUEST = 0x10,
32 XSCALE_PERFCTR_BCU_FULL = 0x11,
33 XSCALE_PERFCTR_BCU_DRAIN = 0x12,
34 XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14,
35 XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15,
36 XSCALE_PERFCTR_RMW = 0x16,
37 /* XSCALE_PERFCTR_CCNT is not hardware defined */
38 XSCALE_PERFCTR_CCNT = 0xFE,
39 XSCALE_PERFCTR_UNUSED = 0xFF,
40};
41
42enum xscale_counters {
43 XSCALE_CYCLE_COUNTER = 1,
44 XSCALE_COUNTER0,
45 XSCALE_COUNTER1,
46 XSCALE_COUNTER2,
47 XSCALE_COUNTER3,
48};
49
50static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
51 [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
52 [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
53 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
54 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
55 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
56 [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
57 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
58};
59
60static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
61 [PERF_COUNT_HW_CACHE_OP_MAX]
62 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
63 [C(L1D)] = {
64 [C(OP_READ)] = {
65 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
66 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
67 },
68 [C(OP_WRITE)] = {
69 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
70 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
71 },
72 [C(OP_PREFETCH)] = {
73 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
74 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
75 },
76 },
77 [C(L1I)] = {
78 [C(OP_READ)] = {
79 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
80 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
81 },
82 [C(OP_WRITE)] = {
83 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
84 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
85 },
86 [C(OP_PREFETCH)] = {
87 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
88 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
89 },
90 },
91 [C(LL)] = {
92 [C(OP_READ)] = {
93 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
94 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
95 },
96 [C(OP_WRITE)] = {
97 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
98 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
99 },
100 [C(OP_PREFETCH)] = {
101 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
102 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
103 },
104 },
105 [C(DTLB)] = {
106 [C(OP_READ)] = {
107 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
108 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
109 },
110 [C(OP_WRITE)] = {
111 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
112 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
113 },
114 [C(OP_PREFETCH)] = {
115 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
116 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
117 },
118 },
119 [C(ITLB)] = {
120 [C(OP_READ)] = {
121 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
122 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
123 },
124 [C(OP_WRITE)] = {
125 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
126 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
127 },
128 [C(OP_PREFETCH)] = {
129 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
130 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
131 },
132 },
133 [C(BPU)] = {
134 [C(OP_READ)] = {
135 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
136 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
137 },
138 [C(OP_WRITE)] = {
139 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
140 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
141 },
142 [C(OP_PREFETCH)] = {
143 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
144 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
145 },
146 },
147};
148
149#define XSCALE_PMU_ENABLE 0x001
150#define XSCALE_PMN_RESET 0x002
151#define XSCALE_CCNT_RESET 0x004
152#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
153#define XSCALE_PMU_CNT64 0x008
154
155#define XSCALE1_OVERFLOWED_MASK 0x700
156#define XSCALE1_CCOUNT_OVERFLOW 0x400
157#define XSCALE1_COUNT0_OVERFLOW 0x100
158#define XSCALE1_COUNT1_OVERFLOW 0x200
159#define XSCALE1_CCOUNT_INT_EN 0x040
160#define XSCALE1_COUNT0_INT_EN 0x010
161#define XSCALE1_COUNT1_INT_EN 0x020
162#define XSCALE1_COUNT0_EVT_SHFT 12
163#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
164#define XSCALE1_COUNT1_EVT_SHFT 20
165#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
166
167static inline u32
168xscale1pmu_read_pmnc(void)
169{
170 u32 val;
171 asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
172 return val;
173}
174
175static inline void
176xscale1pmu_write_pmnc(u32 val)
177{
178 /* upper 4bits and 7, 11 are write-as-0 */
179 val &= 0xffff77f;
180 asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
181}
182
183static inline int
184xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
185 enum xscale_counters counter)
186{
187 int ret = 0;
188
189 switch (counter) {
190 case XSCALE_CYCLE_COUNTER:
191 ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
192 break;
193 case XSCALE_COUNTER0:
194 ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
195 break;
196 case XSCALE_COUNTER1:
197 ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
198 break;
199 default:
200 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
201 }
202
203 return ret;
204}
205
206static irqreturn_t
207xscale1pmu_handle_irq(int irq_num, void *dev)
208{
209 unsigned long pmnc;
210 struct perf_sample_data data;
211 struct cpu_hw_events *cpuc;
212 struct pt_regs *regs;
213 int idx;
214
215 /*
216 * NOTE: there's an A stepping erratum that states if an overflow
217 * bit already exists and another occurs, the previous
218 * Overflow bit gets cleared. There's no workaround.
219 * Fixed in B stepping or later.
220 */
221 pmnc = xscale1pmu_read_pmnc();
222
223 /*
224 * Write the value back to clear the overflow flags. Overflow
225 * flags remain in pmnc for use below. We also disable the PMU
226 * while we process the interrupt.
227 */
228 xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
229
230 if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
231 return IRQ_NONE;
232
233 regs = get_irq_regs();
234
235 perf_sample_data_init(&data, 0);
236
237 cpuc = &__get_cpu_var(cpu_hw_events);
238 for (idx = 0; idx <= armpmu->num_events; ++idx) {
239 struct perf_event *event = cpuc->events[idx];
240 struct hw_perf_event *hwc;
241
242 if (!test_bit(idx, cpuc->active_mask))
243 continue;
244
245 if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
246 continue;
247
248 hwc = &event->hw;
249 armpmu_event_update(event, hwc, idx);
250 data.period = event->hw.last_period;
251 if (!armpmu_event_set_period(event, hwc, idx))
252 continue;
253
254 if (perf_event_overflow(event, 0, &data, regs))
255 armpmu->disable(hwc, idx);
256 }
257
258 irq_work_run();
259
260 /*
261 * Re-enable the PMU.
262 */
263 pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
264 xscale1pmu_write_pmnc(pmnc);
265
266 return IRQ_HANDLED;
267}
268
269static void
270xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
271{
272 unsigned long val, mask, evt, flags;
273
274 switch (idx) {
275 case XSCALE_CYCLE_COUNTER:
276 mask = 0;
277 evt = XSCALE1_CCOUNT_INT_EN;
278 break;
279 case XSCALE_COUNTER0:
280 mask = XSCALE1_COUNT0_EVT_MASK;
281 evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
282 XSCALE1_COUNT0_INT_EN;
283 break;
284 case XSCALE_COUNTER1:
285 mask = XSCALE1_COUNT1_EVT_MASK;
286 evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
287 XSCALE1_COUNT1_INT_EN;
288 break;
289 default:
290 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
291 return;
292 }
293
294 spin_lock_irqsave(&pmu_lock, flags);
295 val = xscale1pmu_read_pmnc();
296 val &= ~mask;
297 val |= evt;
298 xscale1pmu_write_pmnc(val);
299 spin_unlock_irqrestore(&pmu_lock, flags);
300}
301
302static void
303xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
304{
305 unsigned long val, mask, evt, flags;
306
307 switch (idx) {
308 case XSCALE_CYCLE_COUNTER:
309 mask = XSCALE1_CCOUNT_INT_EN;
310 evt = 0;
311 break;
312 case XSCALE_COUNTER0:
313 mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
314 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
315 break;
316 case XSCALE_COUNTER1:
317 mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
318 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
319 break;
320 default:
321 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
322 return;
323 }
324
325 spin_lock_irqsave(&pmu_lock, flags);
326 val = xscale1pmu_read_pmnc();
327 val &= ~mask;
328 val |= evt;
329 xscale1pmu_write_pmnc(val);
330 spin_unlock_irqrestore(&pmu_lock, flags);
331}
332
333static int
334xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
335 struct hw_perf_event *event)
336{
337 if (XSCALE_PERFCTR_CCNT == event->config_base) {
338 if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
339 return -EAGAIN;
340
341 return XSCALE_CYCLE_COUNTER;
342 } else {
343 if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask))
344 return XSCALE_COUNTER1;
345
346 if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask))
347 return XSCALE_COUNTER0;
348
349 return -EAGAIN;
350 }
351}
352
353static void
354xscale1pmu_start(void)
355{
356 unsigned long flags, val;
357
358 spin_lock_irqsave(&pmu_lock, flags);
359 val = xscale1pmu_read_pmnc();
360 val |= XSCALE_PMU_ENABLE;
361 xscale1pmu_write_pmnc(val);
362 spin_unlock_irqrestore(&pmu_lock, flags);
363}
364
365static void
366xscale1pmu_stop(void)
367{
368 unsigned long flags, val;
369
370 spin_lock_irqsave(&pmu_lock, flags);
371 val = xscale1pmu_read_pmnc();
372 val &= ~XSCALE_PMU_ENABLE;
373 xscale1pmu_write_pmnc(val);
374 spin_unlock_irqrestore(&pmu_lock, flags);
375}
376
377static inline u32
378xscale1pmu_read_counter(int counter)
379{
380 u32 val = 0;
381
382 switch (counter) {
383 case XSCALE_CYCLE_COUNTER:
384 asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
385 break;
386 case XSCALE_COUNTER0:
387 asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
388 break;
389 case XSCALE_COUNTER1:
390 asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
391 break;
392 }
393
394 return val;
395}
396
397static inline void
398xscale1pmu_write_counter(int counter, u32 val)
399{
400 switch (counter) {
401 case XSCALE_CYCLE_COUNTER:
402 asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
403 break;
404 case XSCALE_COUNTER0:
405 asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
406 break;
407 case XSCALE_COUNTER1:
408 asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
409 break;
410 }
411}
412
413static const struct arm_pmu xscale1pmu = {
414 .id = ARM_PERF_PMU_ID_XSCALE1,
415 .name = "xscale1",
416 .handle_irq = xscale1pmu_handle_irq,
417 .enable = xscale1pmu_enable_event,
418 .disable = xscale1pmu_disable_event,
419 .read_counter = xscale1pmu_read_counter,
420 .write_counter = xscale1pmu_write_counter,
421 .get_event_idx = xscale1pmu_get_event_idx,
422 .start = xscale1pmu_start,
423 .stop = xscale1pmu_stop,
424 .cache_map = &xscale_perf_cache_map,
425 .event_map = &xscale_perf_map,
426 .raw_event_mask = 0xFF,
427 .num_events = 3,
428 .max_period = (1LLU << 32) - 1,
429};
430
431const struct arm_pmu *__init xscale1pmu_init(void)
432{
433 return &xscale1pmu;
434}
435
436#define XSCALE2_OVERFLOWED_MASK 0x01f
437#define XSCALE2_CCOUNT_OVERFLOW 0x001
438#define XSCALE2_COUNT0_OVERFLOW 0x002
439#define XSCALE2_COUNT1_OVERFLOW 0x004
440#define XSCALE2_COUNT2_OVERFLOW 0x008
441#define XSCALE2_COUNT3_OVERFLOW 0x010
442#define XSCALE2_CCOUNT_INT_EN 0x001
443#define XSCALE2_COUNT0_INT_EN 0x002
444#define XSCALE2_COUNT1_INT_EN 0x004
445#define XSCALE2_COUNT2_INT_EN 0x008
446#define XSCALE2_COUNT3_INT_EN 0x010
447#define XSCALE2_COUNT0_EVT_SHFT 0
448#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
449#define XSCALE2_COUNT1_EVT_SHFT 8
450#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
451#define XSCALE2_COUNT2_EVT_SHFT 16
452#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
453#define XSCALE2_COUNT3_EVT_SHFT 24
454#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
455
456static inline u32
457xscale2pmu_read_pmnc(void)
458{
459 u32 val;
460 asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
461 /* bits 1-2 and 4-23 are read-unpredictable */
462 return val & 0xff000009;
463}
464
465static inline void
466xscale2pmu_write_pmnc(u32 val)
467{
468 /* bits 4-23 are write-as-0, 24-31 are write ignored */
469 val &= 0xf;
470 asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
471}
472
473static inline u32
474xscale2pmu_read_overflow_flags(void)
475{
476 u32 val;
477 asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
478 return val;
479}
480
481static inline void
482xscale2pmu_write_overflow_flags(u32 val)
483{
484 asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
485}
486
487static inline u32
488xscale2pmu_read_event_select(void)
489{
490 u32 val;
491 asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
492 return val;
493}
494
495static inline void
496xscale2pmu_write_event_select(u32 val)
497{
498 asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
499}
500
501static inline u32
502xscale2pmu_read_int_enable(void)
503{
504 u32 val;
505 asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
506 return val;
507}
508
509static void
510xscale2pmu_write_int_enable(u32 val)
511{
512 asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
513}
514
515static inline int
516xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
517 enum xscale_counters counter)
518{
519 int ret = 0;
520
521 switch (counter) {
522 case XSCALE_CYCLE_COUNTER:
523 ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
524 break;
525 case XSCALE_COUNTER0:
526 ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
527 break;
528 case XSCALE_COUNTER1:
529 ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
530 break;
531 case XSCALE_COUNTER2:
532 ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
533 break;
534 case XSCALE_COUNTER3:
535 ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
536 break;
537 default:
538 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
539 }
540
541 return ret;
542}
543
544static irqreturn_t
545xscale2pmu_handle_irq(int irq_num, void *dev)
546{
547 unsigned long pmnc, of_flags;
548 struct perf_sample_data data;
549 struct cpu_hw_events *cpuc;
550 struct pt_regs *regs;
551 int idx;
552
553 /* Disable the PMU. */
554 pmnc = xscale2pmu_read_pmnc();
555 xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
556
557 /* Check the overflow flag register. */
558 of_flags = xscale2pmu_read_overflow_flags();
559 if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
560 return IRQ_NONE;
561
562 /* Clear the overflow bits. */
563 xscale2pmu_write_overflow_flags(of_flags);
564
565 regs = get_irq_regs();
566
567 perf_sample_data_init(&data, 0);
568
569 cpuc = &__get_cpu_var(cpu_hw_events);
570 for (idx = 0; idx <= armpmu->num_events; ++idx) {
571 struct perf_event *event = cpuc->events[idx];
572 struct hw_perf_event *hwc;
573
574 if (!test_bit(idx, cpuc->active_mask))
575 continue;
576
577 if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
578 continue;
579
580 hwc = &event->hw;
581 armpmu_event_update(event, hwc, idx);
582 data.period = event->hw.last_period;
583 if (!armpmu_event_set_period(event, hwc, idx))
584 continue;
585
586 if (perf_event_overflow(event, 0, &data, regs))
587 armpmu->disable(hwc, idx);
588 }
589
590 irq_work_run();
591
592 /*
593 * Re-enable the PMU.
594 */
595 pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
596 xscale2pmu_write_pmnc(pmnc);
597
598 return IRQ_HANDLED;
599}
600
601static void
602xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
603{
604 unsigned long flags, ien, evtsel;
605
606 ien = xscale2pmu_read_int_enable();
607 evtsel = xscale2pmu_read_event_select();
608
609 switch (idx) {
610 case XSCALE_CYCLE_COUNTER:
611 ien |= XSCALE2_CCOUNT_INT_EN;
612 break;
613 case XSCALE_COUNTER0:
614 ien |= XSCALE2_COUNT0_INT_EN;
615 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
616 evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
617 break;
618 case XSCALE_COUNTER1:
619 ien |= XSCALE2_COUNT1_INT_EN;
620 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
621 evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
622 break;
623 case XSCALE_COUNTER2:
624 ien |= XSCALE2_COUNT2_INT_EN;
625 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
626 evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
627 break;
628 case XSCALE_COUNTER3:
629 ien |= XSCALE2_COUNT3_INT_EN;
630 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
631 evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
632 break;
633 default:
634 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
635 return;
636 }
637
638 spin_lock_irqsave(&pmu_lock, flags);
639 xscale2pmu_write_event_select(evtsel);
640 xscale2pmu_write_int_enable(ien);
641 spin_unlock_irqrestore(&pmu_lock, flags);
642}
643
644static void
645xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
646{
647 unsigned long flags, ien, evtsel;
648
649 ien = xscale2pmu_read_int_enable();
650 evtsel = xscale2pmu_read_event_select();
651
652 switch (idx) {
653 case XSCALE_CYCLE_COUNTER:
654 ien &= ~XSCALE2_CCOUNT_INT_EN;
655 break;
656 case XSCALE_COUNTER0:
657 ien &= ~XSCALE2_COUNT0_INT_EN;
658 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
659 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
660 break;
661 case XSCALE_COUNTER1:
662 ien &= ~XSCALE2_COUNT1_INT_EN;
663 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
664 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
665 break;
666 case XSCALE_COUNTER2:
667 ien &= ~XSCALE2_COUNT2_INT_EN;
668 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
669 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
670 break;
671 case XSCALE_COUNTER3:
672 ien &= ~XSCALE2_COUNT3_INT_EN;
673 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
674 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
675 break;
676 default:
677 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
678 return;
679 }
680
681 spin_lock_irqsave(&pmu_lock, flags);
682 xscale2pmu_write_event_select(evtsel);
683 xscale2pmu_write_int_enable(ien);
684 spin_unlock_irqrestore(&pmu_lock, flags);
685}
686
687static int
688xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
689 struct hw_perf_event *event)
690{
691 int idx = xscale1pmu_get_event_idx(cpuc, event);
692 if (idx >= 0)
693 goto out;
694
695 if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
696 idx = XSCALE_COUNTER3;
697 else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
698 idx = XSCALE_COUNTER2;
699out:
700 return idx;
701}
702
703static void
704xscale2pmu_start(void)
705{
706 unsigned long flags, val;
707
708 spin_lock_irqsave(&pmu_lock, flags);
709 val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
710 val |= XSCALE_PMU_ENABLE;
711 xscale2pmu_write_pmnc(val);
712 spin_unlock_irqrestore(&pmu_lock, flags);
713}
714
715static void
716xscale2pmu_stop(void)
717{
718 unsigned long flags, val;
719
720 spin_lock_irqsave(&pmu_lock, flags);
721 val = xscale2pmu_read_pmnc();
722 val &= ~XSCALE_PMU_ENABLE;
723 xscale2pmu_write_pmnc(val);
724 spin_unlock_irqrestore(&pmu_lock, flags);
725}
726
727static inline u32
728xscale2pmu_read_counter(int counter)
729{
730 u32 val = 0;
731
732 switch (counter) {
733 case XSCALE_CYCLE_COUNTER:
734 asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
735 break;
736 case XSCALE_COUNTER0:
737 asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
738 break;
739 case XSCALE_COUNTER1:
740 asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
741 break;
742 case XSCALE_COUNTER2:
743 asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
744 break;
745 case XSCALE_COUNTER3:
746 asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
747 break;
748 }
749
750 return val;
751}
752
753static inline void
754xscale2pmu_write_counter(int counter, u32 val)
755{
756 switch (counter) {
757 case XSCALE_CYCLE_COUNTER:
758 asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
759 break;
760 case XSCALE_COUNTER0:
761 asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
762 break;
763 case XSCALE_COUNTER1:
764 asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
765 break;
766 case XSCALE_COUNTER2:
767 asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
768 break;
769 case XSCALE_COUNTER3:
770 asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
771 break;
772 }
773}
774
775static const struct arm_pmu xscale2pmu = {
776 .id = ARM_PERF_PMU_ID_XSCALE2,
777 .name = "xscale2",
778 .handle_irq = xscale2pmu_handle_irq,
779 .enable = xscale2pmu_enable_event,
780 .disable = xscale2pmu_disable_event,
781 .read_counter = xscale2pmu_read_counter,
782 .write_counter = xscale2pmu_write_counter,
783 .get_event_idx = xscale2pmu_get_event_idx,
784 .start = xscale2pmu_start,
785 .stop = xscale2pmu_stop,
786 .cache_map = &xscale_perf_cache_map,
787 .event_map = &xscale_perf_map,
788 .raw_event_mask = 0xFF,
789 .num_events = 5,
790 .max_period = (1LLU << 32) - 1,
791};
792
793const struct arm_pmu *__init xscale2pmu_init(void)
794{
795 return &xscale2pmu;
796}
797#else
798const struct arm_pmu *__init xscale1pmu_init(void)
799{
800 return NULL;
801}
802
803const struct arm_pmu *__init xscale2pmu_init(void)
804{
805 return NULL;
806}
807#endif /* CONFIG_CPU_XSCALE */