diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-06-05 14:22:46 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-06-06 07:14:47 -0400 |
commit | 8326f44da090d6d304d29b9fdc7fb3e20889e329 (patch) | |
tree | a15b2a2155c64a327b3cdf1da0997755d49390eb | |
parent | a21ca2cac582886a3e95c8bb84ff7c52d4d15e54 (diff) |
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | Documentation/perf_counter/util/parse-events.c | 104 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 201 | ||||
-rw-r--r-- | include/linux/perf_counter.h | 34 | ||||
-rw-r--r-- | kernel/perf_counter.c | 1 |
4 files changed, 329 insertions, 11 deletions
diff --git a/Documentation/perf_counter/util/parse-events.c b/Documentation/perf_counter/util/parse-events.c index eb56bd996573..de9a77c47151 100644 --- a/Documentation/perf_counter/util/parse-events.c +++ b/Documentation/perf_counter/util/parse-events.c | |||
@@ -6,6 +6,8 @@ | |||
6 | #include "exec_cmd.h" | 6 | #include "exec_cmd.h" |
7 | #include "string.h" | 7 | #include "string.h" |
8 | 8 | ||
9 | extern char *strcasestr(const char *haystack, const char *needle); | ||
10 | |||
9 | int nr_counters; | 11 | int nr_counters; |
10 | 12 | ||
11 | struct perf_counter_attr attrs[MAX_COUNTERS]; | 13 | struct perf_counter_attr attrs[MAX_COUNTERS]; |
@@ -17,6 +19,7 @@ struct event_symbol { | |||
17 | }; | 19 | }; |
18 | 20 | ||
19 | #define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y | 21 | #define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y |
22 | #define CR(x, y) .type = PERF_TYPE_##x, .config = y | ||
20 | 23 | ||
21 | static struct event_symbol event_symbols[] = { | 24 | static struct event_symbol event_symbols[] = { |
22 | { C(HARDWARE, CPU_CYCLES), "cpu-cycles", }, | 25 | { C(HARDWARE, CPU_CYCLES), "cpu-cycles", }, |
@@ -69,6 +72,28 @@ static char *sw_event_names[] = { | |||
69 | "major faults", | 72 | "major faults", |
70 | }; | 73 | }; |
71 | 74 | ||
75 | #define MAX_ALIASES 8 | ||
76 | |||
77 | static char *hw_cache [][MAX_ALIASES] = { | ||
78 | { "l1-d" , "l1d" , "l1", "l1-data-cache" }, | ||
79 | { "l1-i" , "l1i" , "l1-instruction-cache" }, | ||
80 | { "l2" , }, | ||
81 | { "dtlb", }, | ||
82 | { "itlb", }, | ||
83 | { "bpu" , "btb", "branch-cache", NULL }, | ||
84 | }; | ||
85 | |||
86 | static char *hw_cache_op [][MAX_ALIASES] = { | ||
87 | { "read" , "load" }, | ||
88 | { "write" , "store" }, | ||
89 | { "prefetch" , "speculative-read", "speculative-load" }, | ||
90 | }; | ||
91 | |||
92 | static char *hw_cache_result [][MAX_ALIASES] = { | ||
93 | { "access", "ops" }, | ||
94 | { "miss", }, | ||
95 | }; | ||
96 | |||
72 | char *event_name(int counter) | 97 | char *event_name(int counter) |
73 | { | 98 | { |
74 | __u64 config = attrs[counter].config; | 99 | __u64 config = attrs[counter].config; |
@@ -86,6 +111,30 @@ char *event_name(int counter) | |||
86 | return hw_event_names[config]; | 111 | return hw_event_names[config]; |
87 | return "unknown-hardware"; | 112 | return "unknown-hardware"; |
88 | 113 | ||
114 | case PERF_TYPE_HW_CACHE: { | ||
115 | __u8 cache_type, cache_op, cache_result; | ||
116 | static char name[100]; | ||
117 | |||
118 | cache_type = (config >> 0) & 0xff; | ||
119 | if (cache_type > PERF_COUNT_HW_CACHE_MAX) | ||
120 | return "unknown-ext-hardware-cache-type"; | ||
121 | |||
122 | cache_op = (config >> 8) & 0xff; | ||
123 | if (cache_type > PERF_COUNT_HW_CACHE_OP_MAX) | ||
124 | return "unknown-ext-hardware-cache-op-type"; | ||
125 | |||
126 | cache_result = (config >> 16) & 0xff; | ||
127 | if (cache_type > PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
128 | return "unknown-ext-hardware-cache-result-type"; | ||
129 | |||
130 | sprintf(name, "%s:%s:%s", | ||
131 | hw_cache[cache_type][0], | ||
132 | hw_cache_op[cache_op][0], | ||
133 | hw_cache_result[cache_result][0]); | ||
134 | |||
135 | return name; | ||
136 | } | ||
137 | |||
89 | case PERF_TYPE_SOFTWARE: | 138 | case PERF_TYPE_SOFTWARE: |
90 | if (config < PERF_SW_EVENTS_MAX) | 139 | if (config < PERF_SW_EVENTS_MAX) |
91 | return sw_event_names[config]; | 140 | return sw_event_names[config]; |
@@ -98,11 +147,60 @@ char *event_name(int counter) | |||
98 | return "unknown"; | 147 | return "unknown"; |
99 | } | 148 | } |
100 | 149 | ||
150 | static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size) | ||
151 | { | ||
152 | int i, j; | ||
153 | |||
154 | for (i = 0; i < size; i++) { | ||
155 | for (j = 0; j < MAX_ALIASES; j++) { | ||
156 | if (!names[i][j]) | ||
157 | break; | ||
158 | if (strcasestr(str, names[i][j])) | ||
159 | return i; | ||
160 | } | ||
161 | } | ||
162 | |||
163 | return 0; | ||
164 | } | ||
165 | |||
166 | static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) | ||
167 | { | ||
168 | __u8 cache_type = -1, cache_op = 0, cache_result = 0; | ||
169 | |||
170 | cache_type = parse_aliases(str, hw_cache, PERF_COUNT_HW_CACHE_MAX); | ||
171 | /* | ||
172 | * No fallback - if we cannot get a clear cache type | ||
173 | * then bail out: | ||
174 | */ | ||
175 | if (cache_type == -1) | ||
176 | return -EINVAL; | ||
177 | |||
178 | cache_op = parse_aliases(str, hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX); | ||
179 | /* | ||
180 | * Fall back to reads: | ||
181 | */ | ||
182 | if (cache_type == -1) | ||
183 | cache_type = PERF_COUNT_HW_CACHE_OP_READ; | ||
184 | |||
185 | cache_result = parse_aliases(str, hw_cache_result, | ||
186 | PERF_COUNT_HW_CACHE_RESULT_MAX); | ||
187 | /* | ||
188 | * Fall back to accesses: | ||
189 | */ | ||
190 | if (cache_result == -1) | ||
191 | cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS; | ||
192 | |||
193 | attr->config = cache_type | (cache_op << 8) | (cache_result << 16); | ||
194 | attr->type = PERF_TYPE_HW_CACHE; | ||
195 | |||
196 | return 0; | ||
197 | } | ||
198 | |||
101 | /* | 199 | /* |
102 | * Each event can have multiple symbolic names. | 200 | * Each event can have multiple symbolic names. |
103 | * Symbolic names are (almost) exactly matched. | 201 | * Symbolic names are (almost) exactly matched. |
104 | */ | 202 | */ |
105 | static int match_event_symbols(const char *str, struct perf_counter_attr *attr) | 203 | static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) |
106 | { | 204 | { |
107 | __u64 config, id; | 205 | __u64 config, id; |
108 | int type; | 206 | int type; |
@@ -147,7 +245,7 @@ static int match_event_symbols(const char *str, struct perf_counter_attr *attr) | |||
147 | } | 245 | } |
148 | } | 246 | } |
149 | 247 | ||
150 | return -EINVAL; | 248 | return parse_generic_hw_symbols(str, attr); |
151 | } | 249 | } |
152 | 250 | ||
153 | int parse_events(const struct option *opt, const char *str, int unset) | 251 | int parse_events(const struct option *opt, const char *str, int unset) |
@@ -160,7 +258,7 @@ again: | |||
160 | if (nr_counters == MAX_COUNTERS) | 258 | if (nr_counters == MAX_COUNTERS) |
161 | return -1; | 259 | return -1; |
162 | 260 | ||
163 | ret = match_event_symbols(str, &attr); | 261 | ret = parse_event_symbols(str, &attr); |
164 | if (ret < 0) | 262 | if (ret < 0) |
165 | return ret; | 263 | return ret; |
166 | 264 | ||
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 430e048f2854..e86679fa5215 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -83,6 +83,128 @@ static u64 intel_pmu_event_map(int event) | |||
83 | return intel_perfmon_event_map[event]; | 83 | return intel_perfmon_event_map[event]; |
84 | } | 84 | } |
85 | 85 | ||
86 | /* | ||
87 | * Generalized hw caching related event table, filled | ||
88 | * in on a per model basis. A value of 0 means | ||
89 | * 'not supported', -1 means 'event makes no sense on | ||
90 | * this CPU', any other value means the raw event | ||
91 | * ID. | ||
92 | */ | ||
93 | |||
94 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
95 | |||
96 | static u64 __read_mostly hw_cache_event_ids | ||
97 | [PERF_COUNT_HW_CACHE_MAX] | ||
98 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
99 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
100 | |||
101 | static const u64 nehalem_hw_cache_event_ids | ||
102 | [PERF_COUNT_HW_CACHE_MAX] | ||
103 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
104 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
105 | { | ||
106 | [ C(L1D) ] = { | ||
107 | [ C(OP_READ) ] = { | ||
108 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
109 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
110 | }, | ||
111 | [ C(OP_WRITE) ] = { | ||
112 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
113 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
114 | }, | ||
115 | [ C(OP_PREFETCH) ] = { | ||
116 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
117 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
118 | }, | ||
119 | }, | ||
120 | [ C(L1I ) ] = { | ||
121 | [ C(OP_READ) ] = { | ||
122 | [ C(RESULT_ACCESS) ] = 0x0480, /* L1I.READS */ | ||
123 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
124 | }, | ||
125 | [ C(OP_WRITE) ] = { | ||
126 | [ C(RESULT_ACCESS) ] = -1, | ||
127 | [ C(RESULT_MISS) ] = -1, | ||
128 | }, | ||
129 | [ C(OP_PREFETCH) ] = { | ||
130 | [ C(RESULT_ACCESS) ] = 0x0, | ||
131 | [ C(RESULT_MISS) ] = 0x0, | ||
132 | }, | ||
133 | }, | ||
134 | [ C(L2 ) ] = { | ||
135 | [ C(OP_READ) ] = { | ||
136 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
137 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
138 | }, | ||
139 | [ C(OP_WRITE) ] = { | ||
140 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
141 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
142 | }, | ||
143 | [ C(OP_PREFETCH) ] = { | ||
144 | [ C(RESULT_ACCESS) ] = 0xc024, /* L2_RQSTS.PREFETCHES */ | ||
145 | [ C(RESULT_MISS) ] = 0x8024, /* L2_RQSTS.PREFETCH_MISS */ | ||
146 | }, | ||
147 | }, | ||
148 | [ C(DTLB) ] = { | ||
149 | [ C(OP_READ) ] = { | ||
150 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
151 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
152 | }, | ||
153 | [ C(OP_WRITE) ] = { | ||
154 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
155 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
156 | }, | ||
157 | [ C(OP_PREFETCH) ] = { | ||
158 | [ C(RESULT_ACCESS) ] = 0x0, | ||
159 | [ C(RESULT_MISS) ] = 0x0, | ||
160 | }, | ||
161 | }, | ||
162 | [ C(ITLB) ] = { | ||
163 | [ C(OP_READ) ] = { | ||
164 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
165 | [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISS_RETIRED */ | ||
166 | }, | ||
167 | [ C(OP_WRITE) ] = { | ||
168 | [ C(RESULT_ACCESS) ] = -1, | ||
169 | [ C(RESULT_MISS) ] = -1, | ||
170 | }, | ||
171 | [ C(OP_PREFETCH) ] = { | ||
172 | [ C(RESULT_ACCESS) ] = -1, | ||
173 | [ C(RESULT_MISS) ] = -1, | ||
174 | }, | ||
175 | }, | ||
176 | [ C(BPU ) ] = { | ||
177 | [ C(OP_READ) ] = { | ||
178 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
179 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
180 | }, | ||
181 | [ C(OP_WRITE) ] = { | ||
182 | [ C(RESULT_ACCESS) ] = -1, | ||
183 | [ C(RESULT_MISS) ] = -1, | ||
184 | }, | ||
185 | [ C(OP_PREFETCH) ] = { | ||
186 | [ C(RESULT_ACCESS) ] = -1, | ||
187 | [ C(RESULT_MISS) ] = -1, | ||
188 | }, | ||
189 | }, | ||
190 | }; | ||
191 | |||
192 | static const u64 core2_hw_cache_event_ids | ||
193 | [PERF_COUNT_HW_CACHE_MAX] | ||
194 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
195 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
196 | { | ||
197 | /* To be filled in */ | ||
198 | }; | ||
199 | |||
200 | static const u64 atom_hw_cache_event_ids | ||
201 | [PERF_COUNT_HW_CACHE_MAX] | ||
202 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
203 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
204 | { | ||
205 | /* To be filled in */ | ||
206 | }; | ||
207 | |||
86 | static u64 intel_pmu_raw_event(u64 event) | 208 | static u64 intel_pmu_raw_event(u64 event) |
87 | { | 209 | { |
88 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | 210 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL |
@@ -246,6 +368,39 @@ static inline int x86_pmu_initialized(void) | |||
246 | return x86_pmu.handle_irq != NULL; | 368 | return x86_pmu.handle_irq != NULL; |
247 | } | 369 | } |
248 | 370 | ||
371 | static inline int | ||
372 | set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) | ||
373 | { | ||
374 | unsigned int cache_type, cache_op, cache_result; | ||
375 | u64 config, val; | ||
376 | |||
377 | config = attr->config; | ||
378 | |||
379 | cache_type = (config >> 0) & 0xff; | ||
380 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) | ||
381 | return -EINVAL; | ||
382 | |||
383 | cache_op = (config >> 8) & 0xff; | ||
384 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) | ||
385 | return -EINVAL; | ||
386 | |||
387 | cache_result = (config >> 16) & 0xff; | ||
388 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
389 | return -EINVAL; | ||
390 | |||
391 | val = hw_cache_event_ids[cache_type][cache_op][cache_result]; | ||
392 | |||
393 | if (val == 0) | ||
394 | return -ENOENT; | ||
395 | |||
396 | if (val == -1) | ||
397 | return -EINVAL; | ||
398 | |||
399 | hwc->config |= val; | ||
400 | |||
401 | return 0; | ||
402 | } | ||
403 | |||
249 | /* | 404 | /* |
250 | * Setup the hardware configuration for a given attr_type | 405 | * Setup the hardware configuration for a given attr_type |
251 | */ | 406 | */ |
@@ -288,22 +443,25 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
288 | hwc->sample_period = x86_pmu.max_period; | 443 | hwc->sample_period = x86_pmu.max_period; |
289 | 444 | ||
290 | atomic64_set(&hwc->period_left, hwc->sample_period); | 445 | atomic64_set(&hwc->period_left, hwc->sample_period); |
446 | counter->destroy = hw_perf_counter_destroy; | ||
291 | 447 | ||
292 | /* | 448 | /* |
293 | * Raw event type provide the config in the event structure | 449 | * Raw event type provide the config in the event structure |
294 | */ | 450 | */ |
295 | if (attr->type == PERF_TYPE_RAW) { | 451 | if (attr->type == PERF_TYPE_RAW) { |
296 | hwc->config |= x86_pmu.raw_event(attr->config); | 452 | hwc->config |= x86_pmu.raw_event(attr->config); |
297 | } else { | 453 | return 0; |
298 | if (attr->config >= x86_pmu.max_events) | ||
299 | return -EINVAL; | ||
300 | /* | ||
301 | * The generic map: | ||
302 | */ | ||
303 | hwc->config |= x86_pmu.event_map(attr->config); | ||
304 | } | 454 | } |
305 | 455 | ||
306 | counter->destroy = hw_perf_counter_destroy; | 456 | if (attr->type == PERF_TYPE_HW_CACHE) |
457 | return set_ext_hw_attr(hwc, attr); | ||
458 | |||
459 | if (attr->config >= x86_pmu.max_events) | ||
460 | return -EINVAL; | ||
461 | /* | ||
462 | * The generic map: | ||
463 | */ | ||
464 | hwc->config |= x86_pmu.event_map(attr->config); | ||
307 | 465 | ||
308 | return 0; | 466 | return 0; |
309 | } | 467 | } |
@@ -989,6 +1147,33 @@ static int intel_pmu_init(void) | |||
989 | 1147 | ||
990 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 1148 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); |
991 | 1149 | ||
1150 | /* | ||
1151 | * Nehalem: | ||
1152 | */ | ||
1153 | switch (boot_cpu_data.x86_model) { | ||
1154 | case 17: | ||
1155 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | ||
1156 | sizeof(u64)*PERF_COUNT_HW_CACHE_MAX* | ||
1157 | PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX); | ||
1158 | |||
1159 | pr_info("... installed Core2 event tables\n"); | ||
1160 | break; | ||
1161 | default: | ||
1162 | case 26: | ||
1163 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | ||
1164 | sizeof(u64)*PERF_COUNT_HW_CACHE_MAX* | ||
1165 | PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX); | ||
1166 | |||
1167 | pr_info("... installed Nehalem/Corei7 event tables\n"); | ||
1168 | break; | ||
1169 | case 28: | ||
1170 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | ||
1171 | sizeof(u64)*PERF_COUNT_HW_CACHE_MAX* | ||
1172 | PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX); | ||
1173 | |||
1174 | pr_info("... installed Atom event tables\n"); | ||
1175 | break; | ||
1176 | } | ||
992 | return 0; | 1177 | return 0; |
993 | } | 1178 | } |
994 | 1179 | ||
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f794c69b34c9..3586df840f69 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h | |||
@@ -28,6 +28,7 @@ enum perf_event_types { | |||
28 | PERF_TYPE_HARDWARE = 0, | 28 | PERF_TYPE_HARDWARE = 0, |
29 | PERF_TYPE_SOFTWARE = 1, | 29 | PERF_TYPE_SOFTWARE = 1, |
30 | PERF_TYPE_TRACEPOINT = 2, | 30 | PERF_TYPE_TRACEPOINT = 2, |
31 | PERF_TYPE_HW_CACHE = 3, | ||
31 | 32 | ||
32 | /* | 33 | /* |
33 | * available TYPE space, raw is the max value. | 34 | * available TYPE space, raw is the max value. |
@@ -56,6 +57,39 @@ enum attr_ids { | |||
56 | }; | 57 | }; |
57 | 58 | ||
58 | /* | 59 | /* |
60 | * Generalized hardware cache counters: | ||
61 | * | ||
62 | * { L1-D, L1-I, L2, LLC, ITLB, DTLB, BPU } x | ||
63 | * { read, write, prefetch } x | ||
64 | * { accesses, misses } | ||
65 | */ | ||
66 | enum hw_cache_id { | ||
67 | PERF_COUNT_HW_CACHE_L1D, | ||
68 | PERF_COUNT_HW_CACHE_L1I, | ||
69 | PERF_COUNT_HW_CACHE_L2, | ||
70 | PERF_COUNT_HW_CACHE_DTLB, | ||
71 | PERF_COUNT_HW_CACHE_ITLB, | ||
72 | PERF_COUNT_HW_CACHE_BPU, | ||
73 | |||
74 | PERF_COUNT_HW_CACHE_MAX, | ||
75 | }; | ||
76 | |||
77 | enum hw_cache_op_id { | ||
78 | PERF_COUNT_HW_CACHE_OP_READ, | ||
79 | PERF_COUNT_HW_CACHE_OP_WRITE, | ||
80 | PERF_COUNT_HW_CACHE_OP_PREFETCH, | ||
81 | |||
82 | PERF_COUNT_HW_CACHE_OP_MAX, | ||
83 | }; | ||
84 | |||
85 | enum hw_cache_op_result_id { | ||
86 | PERF_COUNT_HW_CACHE_RESULT_ACCESS, | ||
87 | PERF_COUNT_HW_CACHE_RESULT_MISS, | ||
88 | |||
89 | PERF_COUNT_HW_CACHE_RESULT_MAX, | ||
90 | }; | ||
91 | |||
92 | /* | ||
59 | * Special "software" counters provided by the kernel, even if the hardware | 93 | * Special "software" counters provided by the kernel, even if the hardware |
60 | * does not support performance counters. These counters measure various | 94 | * does not support performance counters. These counters measure various |
61 | * physical and sw events of the kernel (and allow the profiling of them as | 95 | * physical and sw events of the kernel (and allow the profiling of them as |
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 75ae76796df1..5eacaaf3f9cd 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -3501,6 +3501,7 @@ perf_counter_alloc(struct perf_counter_attr *attr, | |||
3501 | 3501 | ||
3502 | switch (attr->type) { | 3502 | switch (attr->type) { |
3503 | case PERF_TYPE_HARDWARE: | 3503 | case PERF_TYPE_HARDWARE: |
3504 | case PERF_TYPE_HW_CACHE: | ||
3504 | pmu = hw_perf_counter_init(counter); | 3505 | pmu = hw_perf_counter_init(counter); |
3505 | break; | 3506 | break; |
3506 | 3507 | ||