diff options
| author | Vineet Gupta <vgupta@synopsys.com> | 2015-04-15 10:14:07 -0400 |
|---|---|---|
| committer | Vineet Gupta <vgupta@synopsys.com> | 2015-04-20 08:57:30 -0400 |
| commit | bde80c237e49983e2b26dfa9925325a070b71de7 (patch) | |
| tree | 8b9f0ff0b9e0553665dfab6e35e14afc7212387b | |
| parent | 03c94fcf954d6bc5e23460e200d23a2c0fe5cd2e (diff) | |
ARC: perf: Add some comments/debug stuff
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
| -rw-r--r-- | arch/arc/include/asm/perf_event.h | 53 | ||||
| -rw-r--r-- | arch/arc/kernel/perf_event.c | 18 |
2 files changed, 37 insertions, 34 deletions
diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index cbf755e32a03..1c45667c1367 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h | |||
| @@ -57,26 +57,7 @@ struct arc_reg_cc_build { | |||
| 57 | #define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 6) | 57 | #define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 6) |
| 58 | 58 | ||
| 59 | /* | 59 | /* |
| 60 | * The "generalized" performance events seem to really be a copy | 60 | * Some ARC pct quirks: |
| 61 | * of the available events on x86 processors; the mapping to ARC | ||
| 62 | * events is not always possible 1-to-1. Fortunately, there doesn't | ||
| 63 | * seem to be an exact definition for these events, so we can cheat | ||
| 64 | * a bit where necessary. | ||
| 65 | * | ||
| 66 | * In particular, the following PERF events may behave a bit differently | ||
| 67 | * compared to other architectures: | ||
| 68 | * | ||
| 69 | * PERF_COUNT_HW_CPU_CYCLES | ||
| 70 | * Cycles not in halted state | ||
| 71 | * | ||
| 72 | * PERF_COUNT_HW_REF_CPU_CYCLES | ||
| 73 | * Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES | ||
| 74 | * for now as we don't do Dynamic Voltage/Frequency Scaling (yet) | ||
| 75 | * | ||
| 76 | * PERF_COUNT_HW_BUS_CYCLES | ||
| 77 | * Unclear what this means, Intel uses 0x013c, which according to | ||
| 78 | * their datasheet means "unhalted reference cycles". It sounds similar | ||
| 79 | * to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it. | ||
| 80 | * | 61 | * |
| 81 | * PERF_COUNT_HW_STALLED_CYCLES_BACKEND | 62 | * PERF_COUNT_HW_STALLED_CYCLES_BACKEND |
| 82 | * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND | 63 | * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND |
| @@ -91,21 +72,35 @@ struct arc_reg_cc_build { | |||
| 91 | * Note that I$ cache misses aren't counted by either of the two! | 72 | * Note that I$ cache misses aren't counted by either of the two! |
| 92 | */ | 73 | */ |
| 93 | 74 | ||
| 75 | /* | ||
| 76 | * ARC PCT has hardware conditions with fixed "names" but variable "indexes" | ||
| 77 | * (based on a specific RTL build) | ||
| 78 | * Below is the static map between perf generic/arc specific event_id and | ||
| 79 | * h/w condition names. | ||
| 80 | * At the time of probe, we loop thru each index and find it's name to | ||
| 81 | * complete the mapping of perf event_id to h/w index as latter is needed | ||
| 82 | * to program the counter really | ||
| 83 | */ | ||
| 94 | static const char * const arc_pmu_ev_hw_map[] = { | 84 | static const char * const arc_pmu_ev_hw_map[] = { |
| 85 | /* count cycles */ | ||
| 95 | [PERF_COUNT_HW_CPU_CYCLES] = "crun", | 86 | [PERF_COUNT_HW_CPU_CYCLES] = "crun", |
| 96 | [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun", | 87 | [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun", |
| 97 | [PERF_COUNT_HW_BUS_CYCLES] = "crun", | 88 | [PERF_COUNT_HW_BUS_CYCLES] = "crun", |
| 98 | [PERF_COUNT_HW_INSTRUCTIONS] = "iall", | 89 | |
| 99 | [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", | ||
| 100 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", | ||
| 101 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush", | 90 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush", |
| 102 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall", | 91 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall", |
| 103 | [PERF_COUNT_ARC_DCLM] = "dclm", | 92 | |
| 104 | [PERF_COUNT_ARC_DCSM] = "dcsm", | 93 | /* counts condition */ |
| 105 | [PERF_COUNT_ARC_ICM] = "icm", | 94 | [PERF_COUNT_HW_INSTRUCTIONS] = "iall", |
| 106 | [PERF_COUNT_ARC_BPOK] = "bpok", | 95 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", |
| 107 | [PERF_COUNT_ARC_EDTLB] = "edtlb", | 96 | [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ |
| 108 | [PERF_COUNT_ARC_EITLB] = "eitlb", | 97 | [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */ |
| 98 | |||
| 99 | [PERF_COUNT_ARC_DCLM] = "dclm", /* D-cache Load Miss */ | ||
| 100 | [PERF_COUNT_ARC_DCSM] = "dcsm", /* D-cache Store Miss */ | ||
| 101 | [PERF_COUNT_ARC_ICM] = "icm", /* I-cache Miss */ | ||
| 102 | [PERF_COUNT_ARC_EDTLB] = "edtlb", /* D-TLB Miss */ | ||
| 103 | [PERF_COUNT_ARC_EITLB] = "eitlb", /* I-TLB Miss */ | ||
| 109 | }; | 104 | }; |
| 110 | 105 | ||
| 111 | #define C(_x) PERF_COUNT_HW_CACHE_##_x | 106 | #define C(_x) PERF_COUNT_HW_CACHE_##_x |
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 64261c2711b1..181baeed4495 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c | |||
| @@ -90,6 +90,10 @@ static int arc_pmu_cache_event(u64 config) | |||
| 90 | if (ret == CACHE_OP_UNSUPPORTED) | 90 | if (ret == CACHE_OP_UNSUPPORTED) |
| 91 | return -ENOENT; | 91 | return -ENOENT; |
| 92 | 92 | ||
| 93 | pr_debug("init cache event: type/op/result %d/%d/%d with h/w %d \'%s\'\n", | ||
| 94 | cache_type, cache_op, cache_result, ret, | ||
| 95 | arc_pmu_ev_hw_map[ret]); | ||
| 96 | |||
| 93 | return ret; | 97 | return ret; |
| 94 | } | 98 | } |
| 95 | 99 | ||
| @@ -106,8 +110,9 @@ static int arc_pmu_event_init(struct perf_event *event) | |||
| 106 | if (arc_pmu->ev_hw_idx[event->attr.config] < 0) | 110 | if (arc_pmu->ev_hw_idx[event->attr.config] < 0) |
| 107 | return -ENOENT; | 111 | return -ENOENT; |
| 108 | hwc->config = arc_pmu->ev_hw_idx[event->attr.config]; | 112 | hwc->config = arc_pmu->ev_hw_idx[event->attr.config]; |
| 109 | pr_debug("initializing event %d with cfg %d\n", | 113 | pr_debug("init event %d with h/w %d \'%s\'\n", |
| 110 | (int) event->attr.config, (int) hwc->config); | 114 | (int) event->attr.config, (int) hwc->config, |
| 115 | arc_pmu_ev_hw_map[event->attr.config]); | ||
| 111 | return 0; | 116 | return 0; |
| 112 | case PERF_TYPE_HW_CACHE: | 117 | case PERF_TYPE_HW_CACHE: |
| 113 | ret = arc_pmu_cache_event(event->attr.config); | 118 | ret = arc_pmu_cache_event(event->attr.config); |
| @@ -260,19 +265,22 @@ static int arc_pmu_device_probe(struct platform_device *pdev) | |||
| 260 | arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c); | 265 | arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c); |
| 261 | 266 | ||
| 262 | cc_name.str[8] = 0; | 267 | cc_name.str[8] = 0; |
| 263 | for (i = 0; i < PERF_COUNT_HW_MAX; i++) | 268 | for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++) |
| 264 | arc_pmu->ev_hw_idx[i] = -1; | 269 | arc_pmu->ev_hw_idx[i] = -1; |
| 265 | 270 | ||
| 271 | /* loop thru all available h/w condition indexes */ | ||
| 266 | for (j = 0; j < cc_bcr.c; j++) { | 272 | for (j = 0; j < cc_bcr.c; j++) { |
| 267 | write_aux_reg(ARC_REG_CC_INDEX, j); | 273 | write_aux_reg(ARC_REG_CC_INDEX, j); |
| 268 | cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0); | 274 | cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0); |
| 269 | cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1); | 275 | cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1); |
| 276 | |||
| 277 | /* See if it has been mapped to a perf event_id */ | ||
| 270 | for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) { | 278 | for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) { |
| 271 | if (arc_pmu_ev_hw_map[i] && | 279 | if (arc_pmu_ev_hw_map[i] && |
| 272 | !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) && | 280 | !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) && |
| 273 | strlen(arc_pmu_ev_hw_map[i])) { | 281 | strlen(arc_pmu_ev_hw_map[i])) { |
| 274 | pr_debug("mapping %d to idx %d with name %s\n", | 282 | pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n", |
| 275 | i, j, cc_name.str); | 283 | i, cc_name.str, j); |
| 276 | arc_pmu->ev_hw_idx[i] = j; | 284 | arc_pmu->ev_hw_idx[i] = j; |
| 277 | } | 285 | } |
| 278 | } | 286 | } |
