diff options
| -rw-r--r-- | arch/arc/boot/dts/angel4.dts | 4 | ||||
| -rw-r--r-- | arch/arc/include/asm/perf_event.h | 204 | ||||
| -rw-r--r-- | arch/arc/kernel/Makefile | 1 | ||||
| -rw-r--r-- | arch/arc/kernel/perf_event.c | 322 |
4 files changed, 530 insertions, 1 deletions
diff --git a/arch/arc/boot/dts/angel4.dts b/arch/arc/boot/dts/angel4.dts index 4fb2d6f655bd..bcf662d21a57 100644 --- a/arch/arc/boot/dts/angel4.dts +++ b/arch/arc/boot/dts/angel4.dts | |||
| @@ -67,5 +67,9 @@ | |||
| 67 | reg = <1>; | 67 | reg = <1>; |
| 68 | }; | 68 | }; |
| 69 | }; | 69 | }; |
| 70 | |||
| 71 | arcpmu0: pmu { | ||
| 72 | compatible = "snps,arc700-pmu"; | ||
| 73 | }; | ||
| 70 | }; | 74 | }; |
| 71 | }; | 75 | }; |
diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index 115ad96480e6..cbf755e32a03 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h | |||
| @@ -1,5 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2011-2012 Synopsys, Inc. (www.synopsys.com) | 2 | * Linux performance counter support for ARC |
| 3 | * | ||
| 4 | * Copyright (C) 2011-2013 Synopsys, Inc. (www.synopsys.com) | ||
| 3 | * | 5 | * |
| 4 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
| 5 | * it under the terms of the GNU General Public License version 2 as | 7 | * it under the terms of the GNU General Public License version 2 as |
| @@ -10,4 +12,204 @@ | |||
| 10 | #ifndef __ASM_PERF_EVENT_H | 12 | #ifndef __ASM_PERF_EVENT_H |
| 11 | #define __ASM_PERF_EVENT_H | 13 | #define __ASM_PERF_EVENT_H |
| 12 | 14 | ||
| 15 | /* real maximum varies per CPU, this is the maximum supported by the driver */ | ||
| 16 | #define ARC_PMU_MAX_HWEVENTS 64 | ||
| 17 | |||
| 18 | #define ARC_REG_CC_BUILD 0xF6 | ||
| 19 | #define ARC_REG_CC_INDEX 0x240 | ||
| 20 | #define ARC_REG_CC_NAME0 0x241 | ||
| 21 | #define ARC_REG_CC_NAME1 0x242 | ||
| 22 | |||
| 23 | #define ARC_REG_PCT_BUILD 0xF5 | ||
| 24 | #define ARC_REG_PCT_COUNTL 0x250 | ||
| 25 | #define ARC_REG_PCT_COUNTH 0x251 | ||
| 26 | #define ARC_REG_PCT_SNAPL 0x252 | ||
| 27 | #define ARC_REG_PCT_SNAPH 0x253 | ||
| 28 | #define ARC_REG_PCT_CONFIG 0x254 | ||
| 29 | #define ARC_REG_PCT_CONTROL 0x255 | ||
| 30 | #define ARC_REG_PCT_INDEX 0x256 | ||
| 31 | |||
| 32 | #define ARC_REG_PCT_CONTROL_CC (1 << 16) /* clear counts */ | ||
| 33 | #define ARC_REG_PCT_CONTROL_SN (1 << 17) /* snapshot */ | ||
| 34 | |||
| 35 | struct arc_reg_pct_build { | ||
| 36 | #ifdef CONFIG_CPU_BIG_ENDIAN | ||
| 37 | unsigned int m:8, c:8, r:6, s:2, v:8; | ||
| 38 | #else | ||
| 39 | unsigned int v:8, s:2, r:6, c:8, m:8; | ||
| 40 | #endif | ||
| 41 | }; | ||
| 42 | |||
| 43 | struct arc_reg_cc_build { | ||
| 44 | #ifdef CONFIG_CPU_BIG_ENDIAN | ||
| 45 | unsigned int c:16, r:8, v:8; | ||
| 46 | #else | ||
| 47 | unsigned int v:8, r:8, c:16; | ||
| 48 | #endif | ||
| 49 | }; | ||
| 50 | |||
| 51 | #define PERF_COUNT_ARC_DCLM (PERF_COUNT_HW_MAX + 0) | ||
| 52 | #define PERF_COUNT_ARC_DCSM (PERF_COUNT_HW_MAX + 1) | ||
| 53 | #define PERF_COUNT_ARC_ICM (PERF_COUNT_HW_MAX + 2) | ||
| 54 | #define PERF_COUNT_ARC_BPOK (PERF_COUNT_HW_MAX + 3) | ||
| 55 | #define PERF_COUNT_ARC_EDTLB (PERF_COUNT_HW_MAX + 4) | ||
| 56 | #define PERF_COUNT_ARC_EITLB (PERF_COUNT_HW_MAX + 5) | ||
| 57 | #define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 6) | ||
| 58 | |||
| 59 | /* | ||
| 60 | * The "generalized" performance events seem to really be a copy | ||
| 61 | * of the available events on x86 processors; the mapping to ARC | ||
| 62 | * events is not always possible 1-to-1. Fortunately, there doesn't | ||
| 63 | * seem to be an exact definition for these events, so we can cheat | ||
| 64 | * a bit where necessary. | ||
| 65 | * | ||
| 66 | * In particular, the following PERF events may behave a bit differently | ||
| 67 | * compared to other architectures: | ||
| 68 | * | ||
| 69 | * PERF_COUNT_HW_CPU_CYCLES | ||
| 70 | * Cycles not in halted state | ||
| 71 | * | ||
| 72 | * PERF_COUNT_HW_REF_CPU_CYCLES | ||
| 73 | * Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES | ||
| 74 | * for now as we don't do Dynamic Voltage/Frequency Scaling (yet) | ||
| 75 | * | ||
| 76 | * PERF_COUNT_HW_BUS_CYCLES | ||
| 77 | * Unclear what this means, Intel uses 0x013c, which according to | ||
| 78 | * their datasheet means "unhalted reference cycles". It sounds similar | ||
| 79 | * to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it. | ||
| 80 | * | ||
| 81 | * PERF_COUNT_HW_STALLED_CYCLES_BACKEND | ||
| 82 | * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND | ||
| 83 | * The ARC 700 can either measure stalls per pipeline stage, or all stalls | ||
| 84 | * combined; for now we assign all stalls to STALLED_CYCLES_BACKEND | ||
| 85 | * and all pipeline flushes (e.g. caused by mispredicts, etc.) to | ||
| 86 | * STALLED_CYCLES_FRONTEND. | ||
| 87 | * | ||
| 88 | * We could start multiple performance counters and combine everything | ||
| 89 | * afterwards, but that makes it complicated. | ||
| 90 | * | ||
| 91 | * Note that I$ cache misses aren't counted by either of the two! | ||
| 92 | */ | ||
| 93 | |||
| 94 | static const char * const arc_pmu_ev_hw_map[] = { | ||
| 95 | [PERF_COUNT_HW_CPU_CYCLES] = "crun", | ||
| 96 | [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun", | ||
| 97 | [PERF_COUNT_HW_BUS_CYCLES] = "crun", | ||
| 98 | [PERF_COUNT_HW_INSTRUCTIONS] = "iall", | ||
| 99 | [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", | ||
| 100 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", | ||
| 101 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush", | ||
| 102 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall", | ||
| 103 | [PERF_COUNT_ARC_DCLM] = "dclm", | ||
| 104 | [PERF_COUNT_ARC_DCSM] = "dcsm", | ||
| 105 | [PERF_COUNT_ARC_ICM] = "icm", | ||
| 106 | [PERF_COUNT_ARC_BPOK] = "bpok", | ||
| 107 | [PERF_COUNT_ARC_EDTLB] = "edtlb", | ||
| 108 | [PERF_COUNT_ARC_EITLB] = "eitlb", | ||
| 109 | }; | ||
| 110 | |||
| 111 | #define C(_x) PERF_COUNT_HW_CACHE_##_x | ||
| 112 | #define CACHE_OP_UNSUPPORTED 0xffff | ||
| 113 | |||
| 114 | static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
| 115 | [C(L1D)] = { | ||
| 116 | [C(OP_READ)] = { | ||
| 117 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 118 | [C(RESULT_MISS)] = PERF_COUNT_ARC_DCLM, | ||
| 119 | }, | ||
| 120 | [C(OP_WRITE)] = { | ||
| 121 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 122 | [C(RESULT_MISS)] = PERF_COUNT_ARC_DCSM, | ||
| 123 | }, | ||
| 124 | [C(OP_PREFETCH)] = { | ||
| 125 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 126 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
| 127 | }, | ||
| 128 | }, | ||
| 129 | [C(L1I)] = { | ||
| 130 | [C(OP_READ)] = { | ||
| 131 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 132 | [C(RESULT_MISS)] = PERF_COUNT_ARC_ICM, | ||
| 133 | }, | ||
| 134 | [C(OP_WRITE)] = { | ||
| 135 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 136 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
| 137 | }, | ||
| 138 | [C(OP_PREFETCH)] = { | ||
| 139 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 140 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
| 141 | }, | ||
| 142 | }, | ||
| 143 | [C(LL)] = { | ||
| 144 | [C(OP_READ)] = { | ||
| 145 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 146 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
| 147 | }, | ||
| 148 | [C(OP_WRITE)] = { | ||
| 149 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 150 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
| 151 | }, | ||
| 152 | [C(OP_PREFETCH)] = { | ||
| 153 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 154 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
| 155 | }, | ||
| 156 | }, | ||
| 157 | [C(DTLB)] = { | ||
| 158 | [C(OP_READ)] = { | ||
| 159 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 160 | [C(RESULT_MISS)] = PERF_COUNT_ARC_EDTLB, | ||
| 161 | }, | ||
| 162 | [C(OP_WRITE)] = { | ||
| 163 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 164 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
| 165 | }, | ||
| 166 | [C(OP_PREFETCH)] = { | ||
| 167 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 168 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
| 169 | }, | ||
| 170 | }, | ||
| 171 | [C(ITLB)] = { | ||
| 172 | [C(OP_READ)] = { | ||
| 173 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 174 | [C(RESULT_MISS)] = PERF_COUNT_ARC_EITLB, | ||
| 175 | }, | ||
| 176 | [C(OP_WRITE)] = { | ||
| 177 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 178 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
| 179 | }, | ||
| 180 | [C(OP_PREFETCH)] = { | ||
| 181 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 182 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
| 183 | }, | ||
| 184 | }, | ||
| 185 | [C(BPU)] = { | ||
| 186 | [C(OP_READ)] = { | ||
| 187 | [C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS, | ||
| 188 | [C(RESULT_MISS)] = PERF_COUNT_HW_BRANCH_MISSES, | ||
| 189 | }, | ||
| 190 | [C(OP_WRITE)] = { | ||
| 191 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 192 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
| 193 | }, | ||
| 194 | [C(OP_PREFETCH)] = { | ||
| 195 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 196 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
| 197 | }, | ||
| 198 | }, | ||
| 199 | [C(NODE)] = { | ||
| 200 | [C(OP_READ)] = { | ||
| 201 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 202 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
| 203 | }, | ||
| 204 | [C(OP_WRITE)] = { | ||
| 205 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 206 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
| 207 | }, | ||
| 208 | [C(OP_PREFETCH)] = { | ||
| 209 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
| 210 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
| 211 | }, | ||
| 212 | }, | ||
| 213 | }; | ||
| 214 | |||
| 13 | #endif /* __ASM_PERF_EVENT_H */ | 215 | #endif /* __ASM_PERF_EVENT_H */ |
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index c242ef07ba70..8004b4fa6461 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile | |||
| @@ -19,6 +19,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o | |||
| 19 | obj-$(CONFIG_ARC_MISALIGN_ACCESS) += unaligned.o | 19 | obj-$(CONFIG_ARC_MISALIGN_ACCESS) += unaligned.o |
| 20 | obj-$(CONFIG_KGDB) += kgdb.o | 20 | obj-$(CONFIG_KGDB) += kgdb.o |
| 21 | obj-$(CONFIG_ARC_METAWARE_HLINK) += arc_hostlink.o | 21 | obj-$(CONFIG_ARC_METAWARE_HLINK) += arc_hostlink.o |
| 22 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | ||
| 22 | 23 | ||
| 23 | obj-$(CONFIG_ARC_FPU_SAVE_RESTORE) += fpu.o | 24 | obj-$(CONFIG_ARC_FPU_SAVE_RESTORE) += fpu.o |
| 24 | CFLAGS_fpu.o += -mdpfp | 25 | CFLAGS_fpu.o += -mdpfp |
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c new file mode 100644 index 000000000000..759e5f3e9029 --- /dev/null +++ b/arch/arc/kernel/perf_event.c | |||
| @@ -0,0 +1,322 @@ | |||
| 1 | /* | ||
| 2 | * Linux performance counter support for ARC700 series | ||
| 3 | * | ||
| 4 | * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) | ||
| 5 | * | ||
| 6 | * This code is inspired by the perf support of various other architectures. | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License version 2 as | ||
| 10 | * published by the Free Software Foundation. | ||
| 11 | * | ||
| 12 | */ | ||
| 13 | #include <linux/errno.h> | ||
| 14 | #include <linux/module.h> | ||
| 15 | #include <linux/of.h> | ||
| 16 | #include <linux/perf_event.h> | ||
| 17 | #include <linux/platform_device.h> | ||
| 18 | #include <asm/arcregs.h> | ||
| 19 | |||
| 20 | struct arc_pmu { | ||
| 21 | struct pmu pmu; | ||
| 22 | int counter_size; /* in bits */ | ||
| 23 | int n_counters; | ||
| 24 | unsigned long used_mask[BITS_TO_LONGS(ARC_PMU_MAX_HWEVENTS)]; | ||
| 25 | int ev_hw_idx[PERF_COUNT_ARC_HW_MAX]; | ||
| 26 | }; | ||
| 27 | |||
| 28 | /* read counter #idx; note that counter# != event# on ARC! */ | ||
| 29 | static uint64_t arc_pmu_read_counter(int idx) | ||
| 30 | { | ||
| 31 | uint32_t tmp; | ||
| 32 | uint64_t result; | ||
| 33 | |||
| 34 | /* | ||
| 35 | * ARC supports making 'snapshots' of the counters, so we don't | ||
| 36 | * need to care about counters wrapping to 0 underneath our feet | ||
| 37 | */ | ||
| 38 | write_aux_reg(ARC_REG_PCT_INDEX, idx); | ||
| 39 | tmp = read_aux_reg(ARC_REG_PCT_CONTROL); | ||
| 40 | write_aux_reg(ARC_REG_PCT_CONTROL, tmp | ARC_REG_PCT_CONTROL_SN); | ||
| 41 | result = (uint64_t) (read_aux_reg(ARC_REG_PCT_SNAPH)) << 32; | ||
| 42 | result |= read_aux_reg(ARC_REG_PCT_SNAPL); | ||
| 43 | |||
| 44 | return result; | ||
| 45 | } | ||
| 46 | |||
| 47 | static void arc_perf_event_update(struct perf_event *event, | ||
| 48 | struct hw_perf_event *hwc, int idx) | ||
| 49 | { | ||
| 50 | struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu); | ||
| 51 | uint64_t prev_raw_count, new_raw_count; | ||
| 52 | int64_t delta; | ||
| 53 | |||
| 54 | do { | ||
| 55 | prev_raw_count = local64_read(&hwc->prev_count); | ||
| 56 | new_raw_count = arc_pmu_read_counter(idx); | ||
| 57 | } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
| 58 | new_raw_count) != prev_raw_count); | ||
| 59 | |||
| 60 | delta = (new_raw_count - prev_raw_count) & | ||
| 61 | ((1ULL << arc_pmu->counter_size) - 1ULL); | ||
| 62 | |||
| 63 | local64_add(delta, &event->count); | ||
| 64 | local64_sub(delta, &hwc->period_left); | ||
| 65 | } | ||
| 66 | |||
| 67 | static void arc_pmu_read(struct perf_event *event) | ||
| 68 | { | ||
| 69 | arc_perf_event_update(event, &event->hw, event->hw.idx); | ||
| 70 | } | ||
| 71 | |||
| 72 | static int arc_pmu_cache_event(u64 config) | ||
| 73 | { | ||
| 74 | unsigned int cache_type, cache_op, cache_result; | ||
| 75 | int ret; | ||
| 76 | |||
| 77 | cache_type = (config >> 0) & 0xff; | ||
| 78 | cache_op = (config >> 8) & 0xff; | ||
| 79 | cache_result = (config >> 16) & 0xff; | ||
| 80 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) | ||
| 81 | return -EINVAL; | ||
| 82 | if (cache_type >= PERF_COUNT_HW_CACHE_OP_MAX) | ||
| 83 | return -EINVAL; | ||
| 84 | if (cache_type >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
| 85 | return -EINVAL; | ||
| 86 | |||
| 87 | ret = arc_pmu_cache_map[cache_type][cache_op][cache_result]; | ||
| 88 | |||
| 89 | if (ret == CACHE_OP_UNSUPPORTED) | ||
| 90 | return -ENOENT; | ||
| 91 | |||
| 92 | return ret; | ||
| 93 | } | ||
| 94 | |||
| 95 | /* initializes hw_perf_event structure if event is supported */ | ||
| 96 | static int arc_pmu_event_init(struct perf_event *event) | ||
| 97 | { | ||
| 98 | struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu); | ||
| 99 | struct hw_perf_event *hwc = &event->hw; | ||
| 100 | int ret; | ||
| 101 | |||
| 102 | switch (event->attr.type) { | ||
| 103 | case PERF_TYPE_HARDWARE: | ||
| 104 | if (event->attr.config >= PERF_COUNT_HW_MAX) | ||
| 105 | return -ENOENT; | ||
| 106 | if (arc_pmu->ev_hw_idx[event->attr.config] < 0) | ||
| 107 | return -ENOENT; | ||
| 108 | hwc->config = arc_pmu->ev_hw_idx[event->attr.config]; | ||
| 109 | pr_debug("initializing event %d with cfg %d\n", | ||
| 110 | (int) event->attr.config, (int) hwc->config); | ||
| 111 | return 0; | ||
| 112 | case PERF_TYPE_HW_CACHE: | ||
| 113 | ret = arc_pmu_cache_event(event->attr.config); | ||
| 114 | if (ret < 0) | ||
| 115 | return ret; | ||
| 116 | hwc->config = arc_pmu->ev_hw_idx[ret]; | ||
| 117 | return 0; | ||
| 118 | default: | ||
| 119 | return -ENOENT; | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | /* starts all counters */ | ||
| 124 | static void arc_pmu_enable(struct pmu *pmu) | ||
| 125 | { | ||
| 126 | uint32_t tmp; | ||
| 127 | tmp = read_aux_reg(ARC_REG_PCT_CONTROL); | ||
| 128 | write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x1); | ||
| 129 | } | ||
| 130 | |||
| 131 | /* stops all counters */ | ||
| 132 | static void arc_pmu_disable(struct pmu *pmu) | ||
| 133 | { | ||
| 134 | uint32_t tmp; | ||
| 135 | tmp = read_aux_reg(ARC_REG_PCT_CONTROL); | ||
| 136 | write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x0); | ||
| 137 | } | ||
| 138 | |||
| 139 | /* | ||
| 140 | * Assigns hardware counter to hardware condition. | ||
| 141 | * Note that there is no separate start/stop mechanism; | ||
| 142 | * stopping is achieved by assigning the 'never' condition | ||
| 143 | */ | ||
| 144 | static void arc_pmu_start(struct perf_event *event, int flags) | ||
| 145 | { | ||
| 146 | struct hw_perf_event *hwc = &event->hw; | ||
| 147 | int idx = hwc->idx; | ||
| 148 | |||
| 149 | if (WARN_ON_ONCE(idx == -1)) | ||
| 150 | return; | ||
| 151 | |||
| 152 | if (flags & PERF_EF_RELOAD) | ||
| 153 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
| 154 | |||
| 155 | event->hw.state = 0; | ||
| 156 | |||
| 157 | /* enable ARC pmu here */ | ||
| 158 | write_aux_reg(ARC_REG_PCT_INDEX, idx); | ||
| 159 | write_aux_reg(ARC_REG_PCT_CONFIG, hwc->config); | ||
| 160 | } | ||
| 161 | |||
| 162 | static void arc_pmu_stop(struct perf_event *event, int flags) | ||
| 163 | { | ||
| 164 | struct hw_perf_event *hwc = &event->hw; | ||
| 165 | int idx = hwc->idx; | ||
| 166 | |||
| 167 | if (!(event->hw.state & PERF_HES_STOPPED)) { | ||
| 168 | /* stop ARC pmu here */ | ||
| 169 | write_aux_reg(ARC_REG_PCT_INDEX, idx); | ||
| 170 | |||
| 171 | /* condition code #0 is always "never" */ | ||
| 172 | write_aux_reg(ARC_REG_PCT_CONFIG, 0); | ||
| 173 | |||
| 174 | event->hw.state |= PERF_HES_STOPPED; | ||
| 175 | } | ||
| 176 | |||
| 177 | if ((flags & PERF_EF_UPDATE) && | ||
| 178 | !(event->hw.state & PERF_HES_UPTODATE)) { | ||
| 179 | arc_perf_event_update(event, &event->hw, idx); | ||
| 180 | event->hw.state |= PERF_HES_UPTODATE; | ||
| 181 | } | ||
| 182 | } | ||
| 183 | |||
| 184 | static void arc_pmu_del(struct perf_event *event, int flags) | ||
| 185 | { | ||
| 186 | struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu); | ||
| 187 | |||
| 188 | arc_pmu_stop(event, PERF_EF_UPDATE); | ||
| 189 | __clear_bit(event->hw.idx, arc_pmu->used_mask); | ||
| 190 | |||
| 191 | perf_event_update_userpage(event); | ||
| 192 | } | ||
| 193 | |||
| 194 | /* allocate hardware counter and optionally start counting */ | ||
| 195 | static int arc_pmu_add(struct perf_event *event, int flags) | ||
| 196 | { | ||
| 197 | struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu); | ||
| 198 | struct hw_perf_event *hwc = &event->hw; | ||
| 199 | int idx = hwc->idx; | ||
| 200 | |||
| 201 | if (__test_and_set_bit(idx, arc_pmu->used_mask)) { | ||
| 202 | idx = find_first_zero_bit(arc_pmu->used_mask, | ||
| 203 | arc_pmu->n_counters); | ||
| 204 | if (idx == arc_pmu->n_counters) | ||
| 205 | return -EAGAIN; | ||
| 206 | |||
| 207 | __set_bit(idx, arc_pmu->used_mask); | ||
| 208 | hwc->idx = idx; | ||
| 209 | } | ||
| 210 | |||
| 211 | write_aux_reg(ARC_REG_PCT_INDEX, idx); | ||
| 212 | write_aux_reg(ARC_REG_PCT_CONFIG, 0); | ||
| 213 | write_aux_reg(ARC_REG_PCT_COUNTL, 0); | ||
| 214 | write_aux_reg(ARC_REG_PCT_COUNTH, 0); | ||
| 215 | local64_set(&hwc->prev_count, 0); | ||
| 216 | |||
| 217 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
| 218 | if (flags & PERF_EF_START) | ||
| 219 | arc_pmu_start(event, PERF_EF_RELOAD); | ||
| 220 | |||
| 221 | perf_event_update_userpage(event); | ||
| 222 | |||
| 223 | return 0; | ||
| 224 | } | ||
| 225 | |||
| 226 | static int arc_pmu_device_probe(struct platform_device *pdev) | ||
| 227 | { | ||
| 228 | struct arc_pmu *arc_pmu; | ||
| 229 | struct arc_reg_pct_build pct_bcr; | ||
| 230 | struct arc_reg_cc_build cc_bcr; | ||
| 231 | int i, j, ret; | ||
| 232 | |||
| 233 | union cc_name { | ||
| 234 | struct { | ||
| 235 | uint32_t word0, word1; | ||
| 236 | char sentinel; | ||
| 237 | } indiv; | ||
| 238 | char str[9]; | ||
| 239 | } cc_name; | ||
| 240 | |||
| 241 | |||
| 242 | READ_BCR(ARC_REG_PCT_BUILD, pct_bcr); | ||
| 243 | if (!pct_bcr.v) { | ||
| 244 | pr_err("This core does not have performance counters!\n"); | ||
| 245 | return -ENODEV; | ||
| 246 | } | ||
| 247 | |||
| 248 | arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), | ||
| 249 | GFP_KERNEL); | ||
| 250 | if (!arc_pmu) | ||
| 251 | return -ENOMEM; | ||
| 252 | |||
| 253 | arc_pmu->n_counters = pct_bcr.c; | ||
| 254 | BUG_ON(arc_pmu->n_counters > ARC_PMU_MAX_HWEVENTS); | ||
| 255 | |||
| 256 | arc_pmu->counter_size = 32 + (pct_bcr.s << 4); | ||
| 257 | pr_info("ARC PMU found with %d counters of size %d bits\n", | ||
| 258 | arc_pmu->n_counters, arc_pmu->counter_size); | ||
| 259 | |||
| 260 | READ_BCR(ARC_REG_CC_BUILD, cc_bcr); | ||
| 261 | |||
| 262 | if (!cc_bcr.v) | ||
| 263 | pr_err("Strange! Performance counters exist, but no countable conditions?\n"); | ||
| 264 | |||
| 265 | pr_info("ARC PMU has %d countable conditions\n", cc_bcr.c); | ||
| 266 | |||
| 267 | cc_name.str[8] = 0; | ||
| 268 | for (i = 0; i < PERF_COUNT_HW_MAX; i++) | ||
| 269 | arc_pmu->ev_hw_idx[i] = -1; | ||
| 270 | |||
| 271 | for (j = 0; j < cc_bcr.c; j++) { | ||
| 272 | write_aux_reg(ARC_REG_CC_INDEX, j); | ||
| 273 | cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0); | ||
| 274 | cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1); | ||
| 275 | for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) { | ||
| 276 | if (arc_pmu_ev_hw_map[i] && | ||
| 277 | !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) && | ||
| 278 | strlen(arc_pmu_ev_hw_map[i])) { | ||
| 279 | pr_debug("mapping %d to idx %d with name %s\n", | ||
| 280 | i, j, cc_name.str); | ||
| 281 | arc_pmu->ev_hw_idx[i] = j; | ||
| 282 | } | ||
| 283 | } | ||
| 284 | } | ||
| 285 | |||
| 286 | arc_pmu->pmu = (struct pmu) { | ||
| 287 | .pmu_enable = arc_pmu_enable, | ||
| 288 | .pmu_disable = arc_pmu_disable, | ||
| 289 | .event_init = arc_pmu_event_init, | ||
| 290 | .add = arc_pmu_add, | ||
| 291 | .del = arc_pmu_del, | ||
| 292 | .start = arc_pmu_start, | ||
| 293 | .stop = arc_pmu_stop, | ||
| 294 | .read = arc_pmu_read, | ||
| 295 | }; | ||
| 296 | |||
| 297 | ret = perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW); | ||
| 298 | |||
| 299 | return ret; | ||
| 300 | } | ||
| 301 | |||
| 302 | #ifdef CONFIG_OF | ||
| 303 | static const struct of_device_id arc_pmu_match[] = { | ||
| 304 | { .compatible = "snps,arc700-pmu" }, | ||
| 305 | {}, | ||
| 306 | }; | ||
| 307 | MODULE_DEVICE_TABLE(of, arc_pmu_match); | ||
| 308 | #endif | ||
| 309 | |||
| 310 | static struct platform_driver arc_pmu_driver = { | ||
| 311 | .driver = { | ||
| 312 | .name = "arc700-pmu", | ||
| 313 | .of_match_table = of_match_ptr(arc_pmu_match), | ||
| 314 | }, | ||
| 315 | .probe = arc_pmu_device_probe, | ||
| 316 | }; | ||
| 317 | |||
| 318 | module_platform_driver(arc_pmu_driver); | ||
| 319 | |||
| 320 | MODULE_LICENSE("GPL"); | ||
| 321 | MODULE_AUTHOR("Mischa Jonker <mjonker@synopsys.com>"); | ||
| 322 | MODULE_DESCRIPTION("ARC PMU driver"); | ||
