diff options
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_cqm.c | 195 | ||||
-rw-r--r-- | include/linux/perf_event.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/perf_event.h | 1 | ||||
-rw-r--r-- | kernel/events/core.c | 2 |
4 files changed, 178 insertions, 21 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index b5d9d746dbc0..8003d87afd89 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c | |||
@@ -182,23 +182,124 @@ fail: | |||
182 | 182 | ||
183 | /* | 183 | /* |
184 | * Determine if @a and @b measure the same set of tasks. | 184 | * Determine if @a and @b measure the same set of tasks. |
185 | * | ||
186 | * If @a and @b measure the same set of tasks then we want to share a | ||
187 | * single RMID. | ||
185 | */ | 188 | */ |
186 | static bool __match_event(struct perf_event *a, struct perf_event *b) | 189 | static bool __match_event(struct perf_event *a, struct perf_event *b) |
187 | { | 190 | { |
191 | /* Per-cpu and task events don't mix */ | ||
188 | if ((a->attach_state & PERF_ATTACH_TASK) != | 192 | if ((a->attach_state & PERF_ATTACH_TASK) != |
189 | (b->attach_state & PERF_ATTACH_TASK)) | 193 | (b->attach_state & PERF_ATTACH_TASK)) |
190 | return false; | 194 | return false; |
191 | 195 | ||
192 | /* not task */ | 196 | #ifdef CONFIG_CGROUP_PERF |
197 | if (a->cgrp != b->cgrp) | ||
198 | return false; | ||
199 | #endif | ||
200 | |||
201 | /* If not task event, we're machine wide */ | ||
202 | if (!(b->attach_state & PERF_ATTACH_TASK)) | ||
203 | return true; | ||
204 | |||
205 | /* | ||
206 | * Events that target same task are placed into the same cache group. | ||
207 | */ | ||
208 | if (a->hw.cqm_target == b->hw.cqm_target) | ||
209 | return true; | ||
210 | |||
211 | /* | ||
212 | * Are we an inherited event? | ||
213 | */ | ||
214 | if (b->parent == a) | ||
215 | return true; | ||
216 | |||
217 | return false; | ||
218 | } | ||
219 | |||
220 | #ifdef CONFIG_CGROUP_PERF | ||
221 | static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event) | ||
222 | { | ||
223 | if (event->attach_state & PERF_ATTACH_TASK) | ||
224 | return perf_cgroup_from_task(event->hw.cqm_target); | ||
193 | 225 | ||
194 | return true; /* if not task, we're machine wide */ | 226 | return event->cgrp; |
195 | } | 227 | } |
228 | #endif | ||
196 | 229 | ||
197 | /* | 230 | /* |
198 | * Determine if @a's tasks intersect with @b's tasks | 231 | * Determine if @a's tasks intersect with @b's tasks |
232 | * | ||
233 | * There are combinations of events that we explicitly prohibit, | ||
234 | * | ||
235 | * PROHIBITS | ||
236 | * system-wide -> cgroup and task | ||
237 | * cgroup -> system-wide | ||
238 | * -> task in cgroup | ||
239 | * task -> system-wide | ||
240 | * -> task in cgroup | ||
241 | * | ||
242 | * Call this function before allocating an RMID. | ||
199 | */ | 243 | */ |
200 | static bool __conflict_event(struct perf_event *a, struct perf_event *b) | 244 | static bool __conflict_event(struct perf_event *a, struct perf_event *b) |
201 | { | 245 | { |
246 | #ifdef CONFIG_CGROUP_PERF | ||
247 | /* | ||
248 | * We can have any number of cgroups but only one system-wide | ||
249 | * event at a time. | ||
250 | */ | ||
251 | if (a->cgrp && b->cgrp) { | ||
252 | struct perf_cgroup *ac = a->cgrp; | ||
253 | struct perf_cgroup *bc = b->cgrp; | ||
254 | |||
255 | /* | ||
256 | * This condition should have been caught in | ||
257 | * __match_event() and we should be sharing an RMID. | ||
258 | */ | ||
259 | WARN_ON_ONCE(ac == bc); | ||
260 | |||
261 | if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) || | ||
262 | cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup)) | ||
263 | return true; | ||
264 | |||
265 | return false; | ||
266 | } | ||
267 | |||
268 | if (a->cgrp || b->cgrp) { | ||
269 | struct perf_cgroup *ac, *bc; | ||
270 | |||
271 | /* | ||
272 | * cgroup and system-wide events are mutually exclusive | ||
273 | */ | ||
274 | if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) || | ||
275 | (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK))) | ||
276 | return true; | ||
277 | |||
278 | /* | ||
279 | * Ensure neither event is part of the other's cgroup | ||
280 | */ | ||
281 | ac = event_to_cgroup(a); | ||
282 | bc = event_to_cgroup(b); | ||
283 | if (ac == bc) | ||
284 | return true; | ||
285 | |||
286 | /* | ||
287 | * Must have cgroup and non-intersecting task events. | ||
288 | */ | ||
289 | if (!ac || !bc) | ||
290 | return false; | ||
291 | |||
292 | /* | ||
293 | * We have cgroup and task events, and the task belongs | ||
294 | * to a cgroup. Check for for overlap. | ||
295 | */ | ||
296 | if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) || | ||
297 | cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup)) | ||
298 | return true; | ||
299 | |||
300 | return false; | ||
301 | } | ||
302 | #endif | ||
202 | /* | 303 | /* |
203 | * If one of them is not a task, same story as above with cgroups. | 304 | * If one of them is not a task, same story as above with cgroups. |
204 | */ | 305 | */ |
@@ -245,9 +346,16 @@ static int intel_cqm_setup_event(struct perf_event *event, | |||
245 | 346 | ||
246 | static void intel_cqm_event_read(struct perf_event *event) | 347 | static void intel_cqm_event_read(struct perf_event *event) |
247 | { | 348 | { |
248 | unsigned long rmid = event->hw.cqm_rmid; | 349 | unsigned long rmid; |
249 | u64 val; | 350 | u64 val; |
250 | 351 | ||
352 | /* | ||
353 | * Task events are handled by intel_cqm_event_count(). | ||
354 | */ | ||
355 | if (event->cpu == -1) | ||
356 | return; | ||
357 | |||
358 | rmid = event->hw.cqm_rmid; | ||
251 | val = __rmid_read(rmid); | 359 | val = __rmid_read(rmid); |
252 | 360 | ||
253 | /* | 361 | /* |
@@ -259,6 +367,63 @@ static void intel_cqm_event_read(struct perf_event *event) | |||
259 | local64_set(&event->count, val); | 367 | local64_set(&event->count, val); |
260 | } | 368 | } |
261 | 369 | ||
370 | struct rmid_read { | ||
371 | unsigned int rmid; | ||
372 | atomic64_t value; | ||
373 | }; | ||
374 | |||
375 | static void __intel_cqm_event_count(void *info) | ||
376 | { | ||
377 | struct rmid_read *rr = info; | ||
378 | u64 val; | ||
379 | |||
380 | val = __rmid_read(rr->rmid); | ||
381 | |||
382 | if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) | ||
383 | return; | ||
384 | |||
385 | atomic64_add(val, &rr->value); | ||
386 | } | ||
387 | |||
388 | static inline bool cqm_group_leader(struct perf_event *event) | ||
389 | { | ||
390 | return !list_empty(&event->hw.cqm_groups_entry); | ||
391 | } | ||
392 | |||
393 | static u64 intel_cqm_event_count(struct perf_event *event) | ||
394 | { | ||
395 | struct rmid_read rr = { | ||
396 | .rmid = event->hw.cqm_rmid, | ||
397 | .value = ATOMIC64_INIT(0), | ||
398 | }; | ||
399 | |||
400 | /* | ||
401 | * We only need to worry about task events. System-wide events | ||
402 | * are handled like usual, i.e. entirely with | ||
403 | * intel_cqm_event_read(). | ||
404 | */ | ||
405 | if (event->cpu != -1) | ||
406 | return __perf_event_count(event); | ||
407 | |||
408 | /* | ||
409 | * Only the group leader gets to report values. This stops us | ||
410 | * reporting duplicate values to userspace, and gives us a clear | ||
411 | * rule for which task gets to report the values. | ||
412 | * | ||
413 | * Note that it is impossible to attribute these values to | ||
414 | * specific packages - we forfeit that ability when we create | ||
415 | * task events. | ||
416 | */ | ||
417 | if (!cqm_group_leader(event)) | ||
418 | return 0; | ||
419 | |||
420 | on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1); | ||
421 | |||
422 | local64_set(&event->count, atomic64_read(&rr.value)); | ||
423 | |||
424 | return __perf_event_count(event); | ||
425 | } | ||
426 | |||
262 | static void intel_cqm_event_start(struct perf_event *event, int mode) | 427 | static void intel_cqm_event_start(struct perf_event *event, int mode) |
263 | { | 428 | { |
264 | struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); | 429 | struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); |
@@ -344,7 +509,7 @@ static void intel_cqm_event_destroy(struct perf_event *event) | |||
344 | /* | 509 | /* |
345 | * And we're the group leader.. | 510 | * And we're the group leader.. |
346 | */ | 511 | */ |
347 | if (!list_empty(&event->hw.cqm_groups_entry)) { | 512 | if (cqm_group_leader(event)) { |
348 | /* | 513 | /* |
349 | * If there was a group_other, make that leader, otherwise | 514 | * If there was a group_other, make that leader, otherwise |
350 | * destroy the group and return the RMID. | 515 | * destroy the group and return the RMID. |
@@ -365,17 +530,6 @@ static void intel_cqm_event_destroy(struct perf_event *event) | |||
365 | 530 | ||
366 | static struct pmu intel_cqm_pmu; | 531 | static struct pmu intel_cqm_pmu; |
367 | 532 | ||
368 | /* | ||
369 | * XXX there's a bit of a problem in that we cannot simply do the one | ||
370 | * event per node as one would want, since that one event would one get | ||
371 | * scheduled on the one cpu. But we want to 'schedule' the RMID on all | ||
372 | * CPUs. | ||
373 | * | ||
374 | * This means we want events for each CPU, however, that generates a lot | ||
375 | * of duplicate values out to userspace -- this is not to be helped | ||
376 | * unless we want to change the core code in some way. Fore more info, | ||
377 | * see intel_cqm_event_read(). | ||
378 | */ | ||
379 | static int intel_cqm_event_init(struct perf_event *event) | 533 | static int intel_cqm_event_init(struct perf_event *event) |
380 | { | 534 | { |
381 | struct perf_event *group = NULL; | 535 | struct perf_event *group = NULL; |
@@ -387,9 +541,6 @@ static int intel_cqm_event_init(struct perf_event *event) | |||
387 | if (event->attr.config & ~QOS_EVENT_MASK) | 541 | if (event->attr.config & ~QOS_EVENT_MASK) |
388 | return -EINVAL; | 542 | return -EINVAL; |
389 | 543 | ||
390 | if (event->cpu == -1) | ||
391 | return -EINVAL; | ||
392 | |||
393 | /* unsupported modes and filters */ | 544 | /* unsupported modes and filters */ |
394 | if (event->attr.exclude_user || | 545 | if (event->attr.exclude_user || |
395 | event->attr.exclude_kernel || | 546 | event->attr.exclude_kernel || |
@@ -407,7 +558,8 @@ static int intel_cqm_event_init(struct perf_event *event) | |||
407 | 558 | ||
408 | mutex_lock(&cache_mutex); | 559 | mutex_lock(&cache_mutex); |
409 | 560 | ||
410 | err = intel_cqm_setup_event(event, &group); /* will also set rmid */ | 561 | /* Will also set rmid */ |
562 | err = intel_cqm_setup_event(event, &group); | ||
411 | if (err) | 563 | if (err) |
412 | goto out; | 564 | goto out; |
413 | 565 | ||
@@ -470,6 +622,7 @@ static struct pmu intel_cqm_pmu = { | |||
470 | .start = intel_cqm_event_start, | 622 | .start = intel_cqm_event_start, |
471 | .stop = intel_cqm_event_stop, | 623 | .stop = intel_cqm_event_stop, |
472 | .read = intel_cqm_event_read, | 624 | .read = intel_cqm_event_read, |
625 | .count = intel_cqm_event_count, | ||
473 | }; | 626 | }; |
474 | 627 | ||
475 | static inline void cqm_pick_event_reader(int cpu) | 628 | static inline void cqm_pick_event_reader(int cpu) |
@@ -599,8 +752,8 @@ static int __init intel_cqm_init(void) | |||
599 | 752 | ||
600 | __perf_cpu_notifier(intel_cqm_cpu_notifier); | 753 | __perf_cpu_notifier(intel_cqm_cpu_notifier); |
601 | 754 | ||
602 | ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1); | 755 | ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", |
603 | 756 | PERF_TYPE_INTEL_CQM); | |
604 | if (ret) | 757 | if (ret) |
605 | pr_err("Intel CQM perf registration failed: %d\n", ret); | 758 | pr_err("Intel CQM perf registration failed: %d\n", ret); |
606 | else | 759 | else |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ca5504c48f4f..dac4c2831d82 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -129,6 +129,7 @@ struct hw_perf_event { | |||
129 | struct list_head cqm_events_entry; | 129 | struct list_head cqm_events_entry; |
130 | struct list_head cqm_groups_entry; | 130 | struct list_head cqm_groups_entry; |
131 | struct list_head cqm_group_entry; | 131 | struct list_head cqm_group_entry; |
132 | struct task_struct *cqm_target; | ||
132 | }; | 133 | }; |
133 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 134 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
134 | struct { /* breakpoint */ | 135 | struct { /* breakpoint */ |
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1e3cd07cf76e..3c8b45de57ec 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h | |||
@@ -32,6 +32,7 @@ enum perf_type_id { | |||
32 | PERF_TYPE_HW_CACHE = 3, | 32 | PERF_TYPE_HW_CACHE = 3, |
33 | PERF_TYPE_RAW = 4, | 33 | PERF_TYPE_RAW = 4, |
34 | PERF_TYPE_BREAKPOINT = 5, | 34 | PERF_TYPE_BREAKPOINT = 5, |
35 | PERF_TYPE_INTEL_CQM = 6, | ||
35 | 36 | ||
36 | PERF_TYPE_MAX, /* non-ABI */ | 37 | PERF_TYPE_MAX, /* non-ABI */ |
37 | }; | 38 | }; |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 1fc3bae5904a..71109a045450 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -7181,6 +7181,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
7181 | else if (attr->type == PERF_TYPE_BREAKPOINT) | 7181 | else if (attr->type == PERF_TYPE_BREAKPOINT) |
7182 | event->hw.bp_target = task; | 7182 | event->hw.bp_target = task; |
7183 | #endif | 7183 | #endif |
7184 | else if (attr->type == PERF_TYPE_INTEL_CQM) | ||
7185 | event->hw.cqm_target = task; | ||
7184 | } | 7186 | } |
7185 | 7187 | ||
7186 | if (!overflow_handler && parent_event) { | 7188 | if (!overflow_handler && parent_event) { |