diff options
author | Vineet Gupta <vgupta@synopsys.com> | 2015-04-15 10:14:07 -0400 |
---|---|---|
committer | Vineet Gupta <vgupta@synopsys.com> | 2015-04-20 08:57:30 -0400 |
commit | bde80c237e49983e2b26dfa9925325a070b71de7 (patch) | |
tree | 8b9f0ff0b9e0553665dfab6e35e14afc7212387b /arch/arc | |
parent | 03c94fcf954d6bc5e23460e200d23a2c0fe5cd2e (diff) |
ARC: perf: Add some comments/debug stuff
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Diffstat (limited to 'arch/arc')
-rw-r--r-- | arch/arc/include/asm/perf_event.h | 53 | ||||
-rw-r--r-- | arch/arc/kernel/perf_event.c | 18 |
2 files changed, 37 insertions, 34 deletions
diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index cbf755e32a03..1c45667c1367 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h | |||
@@ -57,26 +57,7 @@ struct arc_reg_cc_build { | |||
57 | #define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 6) | 57 | #define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 6) |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * The "generalized" performance events seem to really be a copy | 60 | * Some ARC pct quirks: |
61 | * of the available events on x86 processors; the mapping to ARC | ||
62 | * events is not always possible 1-to-1. Fortunately, there doesn't | ||
63 | * seem to be an exact definition for these events, so we can cheat | ||
64 | * a bit where necessary. | ||
65 | * | ||
66 | * In particular, the following PERF events may behave a bit differently | ||
67 | * compared to other architectures: | ||
68 | * | ||
69 | * PERF_COUNT_HW_CPU_CYCLES | ||
70 | * Cycles not in halted state | ||
71 | * | ||
72 | * PERF_COUNT_HW_REF_CPU_CYCLES | ||
73 | * Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES | ||
74 | * for now as we don't do Dynamic Voltage/Frequency Scaling (yet) | ||
75 | * | ||
76 | * PERF_COUNT_HW_BUS_CYCLES | ||
77 | * Unclear what this means, Intel uses 0x013c, which according to | ||
78 | * their datasheet means "unhalted reference cycles". It sounds similar | ||
79 | * to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it. | ||
80 | * | 61 | * |
81 | * PERF_COUNT_HW_STALLED_CYCLES_BACKEND | 62 | * PERF_COUNT_HW_STALLED_CYCLES_BACKEND |
82 | * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND | 63 | * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND |
@@ -91,21 +72,35 @@ struct arc_reg_cc_build { | |||
91 | * Note that I$ cache misses aren't counted by either of the two! | 72 | * Note that I$ cache misses aren't counted by either of the two! |
92 | */ | 73 | */ |
93 | 74 | ||
75 | /* | ||
76 | * ARC PCT has hardware conditions with fixed "names" but variable "indexes" | ||
77 | * (based on a specific RTL build) | ||
78 | * Below is the static map between perf generic/arc specific event_id and | ||
79 | * h/w condition names. | ||
80 | * At the time of probe, we loop thru each index and find it's name to | ||
81 | * complete the mapping of perf event_id to h/w index as latter is needed | ||
82 | * to program the counter really | ||
83 | */ | ||
94 | static const char * const arc_pmu_ev_hw_map[] = { | 84 | static const char * const arc_pmu_ev_hw_map[] = { |
85 | /* count cycles */ | ||
95 | [PERF_COUNT_HW_CPU_CYCLES] = "crun", | 86 | [PERF_COUNT_HW_CPU_CYCLES] = "crun", |
96 | [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun", | 87 | [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun", |
97 | [PERF_COUNT_HW_BUS_CYCLES] = "crun", | 88 | [PERF_COUNT_HW_BUS_CYCLES] = "crun", |
98 | [PERF_COUNT_HW_INSTRUCTIONS] = "iall", | 89 | |
99 | [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", | ||
100 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", | ||
101 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush", | 90 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush", |
102 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall", | 91 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall", |
103 | [PERF_COUNT_ARC_DCLM] = "dclm", | 92 | |
104 | [PERF_COUNT_ARC_DCSM] = "dcsm", | 93 | /* counts condition */ |
105 | [PERF_COUNT_ARC_ICM] = "icm", | 94 | [PERF_COUNT_HW_INSTRUCTIONS] = "iall", |
106 | [PERF_COUNT_ARC_BPOK] = "bpok", | 95 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", |
107 | [PERF_COUNT_ARC_EDTLB] = "edtlb", | 96 | [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ |
108 | [PERF_COUNT_ARC_EITLB] = "eitlb", | 97 | [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */ |
98 | |||
99 | [PERF_COUNT_ARC_DCLM] = "dclm", /* D-cache Load Miss */ | ||
100 | [PERF_COUNT_ARC_DCSM] = "dcsm", /* D-cache Store Miss */ | ||
101 | [PERF_COUNT_ARC_ICM] = "icm", /* I-cache Miss */ | ||
102 | [PERF_COUNT_ARC_EDTLB] = "edtlb", /* D-TLB Miss */ | ||
103 | [PERF_COUNT_ARC_EITLB] = "eitlb", /* I-TLB Miss */ | ||
109 | }; | 104 | }; |
110 | 105 | ||
111 | #define C(_x) PERF_COUNT_HW_CACHE_##_x | 106 | #define C(_x) PERF_COUNT_HW_CACHE_##_x |
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 64261c2711b1..181baeed4495 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c | |||
@@ -90,6 +90,10 @@ static int arc_pmu_cache_event(u64 config) | |||
90 | if (ret == CACHE_OP_UNSUPPORTED) | 90 | if (ret == CACHE_OP_UNSUPPORTED) |
91 | return -ENOENT; | 91 | return -ENOENT; |
92 | 92 | ||
93 | pr_debug("init cache event: type/op/result %d/%d/%d with h/w %d \'%s\'\n", | ||
94 | cache_type, cache_op, cache_result, ret, | ||
95 | arc_pmu_ev_hw_map[ret]); | ||
96 | |||
93 | return ret; | 97 | return ret; |
94 | } | 98 | } |
95 | 99 | ||
@@ -106,8 +110,9 @@ static int arc_pmu_event_init(struct perf_event *event) | |||
106 | if (arc_pmu->ev_hw_idx[event->attr.config] < 0) | 110 | if (arc_pmu->ev_hw_idx[event->attr.config] < 0) |
107 | return -ENOENT; | 111 | return -ENOENT; |
108 | hwc->config = arc_pmu->ev_hw_idx[event->attr.config]; | 112 | hwc->config = arc_pmu->ev_hw_idx[event->attr.config]; |
109 | pr_debug("initializing event %d with cfg %d\n", | 113 | pr_debug("init event %d with h/w %d \'%s\'\n", |
110 | (int) event->attr.config, (int) hwc->config); | 114 | (int) event->attr.config, (int) hwc->config, |
115 | arc_pmu_ev_hw_map[event->attr.config]); | ||
111 | return 0; | 116 | return 0; |
112 | case PERF_TYPE_HW_CACHE: | 117 | case PERF_TYPE_HW_CACHE: |
113 | ret = arc_pmu_cache_event(event->attr.config); | 118 | ret = arc_pmu_cache_event(event->attr.config); |
@@ -260,19 +265,22 @@ static int arc_pmu_device_probe(struct platform_device *pdev) | |||
260 | arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c); | 265 | arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c); |
261 | 266 | ||
262 | cc_name.str[8] = 0; | 267 | cc_name.str[8] = 0; |
263 | for (i = 0; i < PERF_COUNT_HW_MAX; i++) | 268 | for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++) |
264 | arc_pmu->ev_hw_idx[i] = -1; | 269 | arc_pmu->ev_hw_idx[i] = -1; |
265 | 270 | ||
271 | /* loop thru all available h/w condition indexes */ | ||
266 | for (j = 0; j < cc_bcr.c; j++) { | 272 | for (j = 0; j < cc_bcr.c; j++) { |
267 | write_aux_reg(ARC_REG_CC_INDEX, j); | 273 | write_aux_reg(ARC_REG_CC_INDEX, j); |
268 | cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0); | 274 | cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0); |
269 | cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1); | 275 | cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1); |
276 | |||
277 | /* See if it has been mapped to a perf event_id */ | ||
270 | for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) { | 278 | for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) { |
271 | if (arc_pmu_ev_hw_map[i] && | 279 | if (arc_pmu_ev_hw_map[i] && |
272 | !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) && | 280 | !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) && |
273 | strlen(arc_pmu_ev_hw_map[i])) { | 281 | strlen(arc_pmu_ev_hw_map[i])) { |
274 | pr_debug("mapping %d to idx %d with name %s\n", | 282 | pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n", |
275 | i, j, cc_name.str); | 283 | i, cc_name.str, j); |
276 | arc_pmu->ev_hw_idx[i] = j; | 284 | arc_pmu->ev_hw_idx[i] = j; |
277 | } | 285 | } |
278 | } | 286 | } |