aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_cqm.c195
-rw-r--r--include/linux/perf_event.h1
-rw-r--r--include/uapi/linux/perf_event.h1
-rw-r--r--kernel/events/core.c2
4 files changed, 178 insertions, 21 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index b5d9d746dbc0..8003d87afd89 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -182,23 +182,124 @@ fail:
182 182
183/* 183/*
184 * Determine if @a and @b measure the same set of tasks. 184 * Determine if @a and @b measure the same set of tasks.
185 *
186 * If @a and @b measure the same set of tasks then we want to share a
187 * single RMID.
185 */ 188 */
186static bool __match_event(struct perf_event *a, struct perf_event *b) 189static bool __match_event(struct perf_event *a, struct perf_event *b)
187{ 190{
191 /* Per-cpu and task events don't mix */
188 if ((a->attach_state & PERF_ATTACH_TASK) != 192 if ((a->attach_state & PERF_ATTACH_TASK) !=
189 (b->attach_state & PERF_ATTACH_TASK)) 193 (b->attach_state & PERF_ATTACH_TASK))
190 return false; 194 return false;
191 195
192 /* not task */ 196#ifdef CONFIG_CGROUP_PERF
197 if (a->cgrp != b->cgrp)
198 return false;
199#endif
200
201 /* If not task event, we're machine wide */
202 if (!(b->attach_state & PERF_ATTACH_TASK))
203 return true;
204
205 /*
206 * Events that target same task are placed into the same cache group.
207 */
208 if (a->hw.cqm_target == b->hw.cqm_target)
209 return true;
210
211 /*
212 * Are we an inherited event?
213 */
214 if (b->parent == a)
215 return true;
216
217 return false;
218}
219
220#ifdef CONFIG_CGROUP_PERF
221static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
222{
223 if (event->attach_state & PERF_ATTACH_TASK)
224 return perf_cgroup_from_task(event->hw.cqm_target);
193 225
194 return true; /* if not task, we're machine wide */ 226 return event->cgrp;
195} 227}
228#endif
196 229
197/* 230/*
198 * Determine if @a's tasks intersect with @b's tasks 231 * Determine if @a's tasks intersect with @b's tasks
232 *
233 * There are combinations of events that we explicitly prohibit,
234 *
235 * PROHIBITS
236 * system-wide -> cgroup and task
237 * cgroup -> system-wide
238 * -> task in cgroup
239 * task -> system-wide
240 * -> task in cgroup
241 *
242 * Call this function before allocating an RMID.
199 */ 243 */
200static bool __conflict_event(struct perf_event *a, struct perf_event *b) 244static bool __conflict_event(struct perf_event *a, struct perf_event *b)
201{ 245{
246#ifdef CONFIG_CGROUP_PERF
247 /*
248 * We can have any number of cgroups but only one system-wide
249 * event at a time.
250 */
251 if (a->cgrp && b->cgrp) {
252 struct perf_cgroup *ac = a->cgrp;
253 struct perf_cgroup *bc = b->cgrp;
254
255 /*
256 * This condition should have been caught in
257 * __match_event() and we should be sharing an RMID.
258 */
259 WARN_ON_ONCE(ac == bc);
260
261 if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
262 cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
263 return true;
264
265 return false;
266 }
267
268 if (a->cgrp || b->cgrp) {
269 struct perf_cgroup *ac, *bc;
270
271 /*
272 * cgroup and system-wide events are mutually exclusive
273 */
274 if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) ||
275 (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK)))
276 return true;
277
278 /*
279 * Ensure neither event is part of the other's cgroup
280 */
281 ac = event_to_cgroup(a);
282 bc = event_to_cgroup(b);
283 if (ac == bc)
284 return true;
285
286 /*
287 * Must have cgroup and non-intersecting task events.
288 */
289 if (!ac || !bc)
290 return false;
291
292 /*
293 * We have cgroup and task events, and the task belongs
294 * to a cgroup. Check for for overlap.
295 */
296 if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
297 cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
298 return true;
299
300 return false;
301 }
302#endif
202 /* 303 /*
203 * If one of them is not a task, same story as above with cgroups. 304 * If one of them is not a task, same story as above with cgroups.
204 */ 305 */
@@ -245,9 +346,16 @@ static int intel_cqm_setup_event(struct perf_event *event,
245 346
246static void intel_cqm_event_read(struct perf_event *event) 347static void intel_cqm_event_read(struct perf_event *event)
247{ 348{
248 unsigned long rmid = event->hw.cqm_rmid; 349 unsigned long rmid;
249 u64 val; 350 u64 val;
250 351
352 /*
353 * Task events are handled by intel_cqm_event_count().
354 */
355 if (event->cpu == -1)
356 return;
357
358 rmid = event->hw.cqm_rmid;
251 val = __rmid_read(rmid); 359 val = __rmid_read(rmid);
252 360
253 /* 361 /*
@@ -259,6 +367,63 @@ static void intel_cqm_event_read(struct perf_event *event)
259 local64_set(&event->count, val); 367 local64_set(&event->count, val);
260} 368}
261 369
370struct rmid_read {
371 unsigned int rmid;
372 atomic64_t value;
373};
374
375static void __intel_cqm_event_count(void *info)
376{
377 struct rmid_read *rr = info;
378 u64 val;
379
380 val = __rmid_read(rr->rmid);
381
382 if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
383 return;
384
385 atomic64_add(val, &rr->value);
386}
387
388static inline bool cqm_group_leader(struct perf_event *event)
389{
390 return !list_empty(&event->hw.cqm_groups_entry);
391}
392
393static u64 intel_cqm_event_count(struct perf_event *event)
394{
395 struct rmid_read rr = {
396 .rmid = event->hw.cqm_rmid,
397 .value = ATOMIC64_INIT(0),
398 };
399
400 /*
401 * We only need to worry about task events. System-wide events
402 * are handled like usual, i.e. entirely with
403 * intel_cqm_event_read().
404 */
405 if (event->cpu != -1)
406 return __perf_event_count(event);
407
408 /*
409 * Only the group leader gets to report values. This stops us
410 * reporting duplicate values to userspace, and gives us a clear
411 * rule for which task gets to report the values.
412 *
413 * Note that it is impossible to attribute these values to
414 * specific packages - we forfeit that ability when we create
415 * task events.
416 */
417 if (!cqm_group_leader(event))
418 return 0;
419
420 on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1);
421
422 local64_set(&event->count, atomic64_read(&rr.value));
423
424 return __perf_event_count(event);
425}
426
262static void intel_cqm_event_start(struct perf_event *event, int mode) 427static void intel_cqm_event_start(struct perf_event *event, int mode)
263{ 428{
264 struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); 429 struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
@@ -344,7 +509,7 @@ static void intel_cqm_event_destroy(struct perf_event *event)
344 /* 509 /*
345 * And we're the group leader.. 510 * And we're the group leader..
346 */ 511 */
347 if (!list_empty(&event->hw.cqm_groups_entry)) { 512 if (cqm_group_leader(event)) {
348 /* 513 /*
349 * If there was a group_other, make that leader, otherwise 514 * If there was a group_other, make that leader, otherwise
350 * destroy the group and return the RMID. 515 * destroy the group and return the RMID.
@@ -365,17 +530,6 @@ static void intel_cqm_event_destroy(struct perf_event *event)
365 530
366static struct pmu intel_cqm_pmu; 531static struct pmu intel_cqm_pmu;
367 532
368/*
369 * XXX there's a bit of a problem in that we cannot simply do the one
370 * event per node as one would want, since that one event would one get
371 * scheduled on the one cpu. But we want to 'schedule' the RMID on all
372 * CPUs.
373 *
374 * This means we want events for each CPU, however, that generates a lot
375 * of duplicate values out to userspace -- this is not to be helped
376 * unless we want to change the core code in some way. Fore more info,
377 * see intel_cqm_event_read().
378 */
379static int intel_cqm_event_init(struct perf_event *event) 533static int intel_cqm_event_init(struct perf_event *event)
380{ 534{
381 struct perf_event *group = NULL; 535 struct perf_event *group = NULL;
@@ -387,9 +541,6 @@ static int intel_cqm_event_init(struct perf_event *event)
387 if (event->attr.config & ~QOS_EVENT_MASK) 541 if (event->attr.config & ~QOS_EVENT_MASK)
388 return -EINVAL; 542 return -EINVAL;
389 543
390 if (event->cpu == -1)
391 return -EINVAL;
392
393 /* unsupported modes and filters */ 544 /* unsupported modes and filters */
394 if (event->attr.exclude_user || 545 if (event->attr.exclude_user ||
395 event->attr.exclude_kernel || 546 event->attr.exclude_kernel ||
@@ -407,7 +558,8 @@ static int intel_cqm_event_init(struct perf_event *event)
407 558
408 mutex_lock(&cache_mutex); 559 mutex_lock(&cache_mutex);
409 560
410 err = intel_cqm_setup_event(event, &group); /* will also set rmid */ 561 /* Will also set rmid */
562 err = intel_cqm_setup_event(event, &group);
411 if (err) 563 if (err)
412 goto out; 564 goto out;
413 565
@@ -470,6 +622,7 @@ static struct pmu intel_cqm_pmu = {
470 .start = intel_cqm_event_start, 622 .start = intel_cqm_event_start,
471 .stop = intel_cqm_event_stop, 623 .stop = intel_cqm_event_stop,
472 .read = intel_cqm_event_read, 624 .read = intel_cqm_event_read,
625 .count = intel_cqm_event_count,
473}; 626};
474 627
475static inline void cqm_pick_event_reader(int cpu) 628static inline void cqm_pick_event_reader(int cpu)
@@ -599,8 +752,8 @@ static int __init intel_cqm_init(void)
599 752
600 __perf_cpu_notifier(intel_cqm_cpu_notifier); 753 __perf_cpu_notifier(intel_cqm_cpu_notifier);
601 754
602 ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1); 755 ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm",
603 756 PERF_TYPE_INTEL_CQM);
604 if (ret) 757 if (ret)
605 pr_err("Intel CQM perf registration failed: %d\n", ret); 758 pr_err("Intel CQM perf registration failed: %d\n", ret);
606 else 759 else
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ca5504c48f4f..dac4c2831d82 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -129,6 +129,7 @@ struct hw_perf_event {
129 struct list_head cqm_events_entry; 129 struct list_head cqm_events_entry;
130 struct list_head cqm_groups_entry; 130 struct list_head cqm_groups_entry;
131 struct list_head cqm_group_entry; 131 struct list_head cqm_group_entry;
132 struct task_struct *cqm_target;
132 }; 133 };
133#ifdef CONFIG_HAVE_HW_BREAKPOINT 134#ifdef CONFIG_HAVE_HW_BREAKPOINT
134 struct { /* breakpoint */ 135 struct { /* breakpoint */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 1e3cd07cf76e..3c8b45de57ec 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -32,6 +32,7 @@ enum perf_type_id {
32 PERF_TYPE_HW_CACHE = 3, 32 PERF_TYPE_HW_CACHE = 3,
33 PERF_TYPE_RAW = 4, 33 PERF_TYPE_RAW = 4,
34 PERF_TYPE_BREAKPOINT = 5, 34 PERF_TYPE_BREAKPOINT = 5,
35 PERF_TYPE_INTEL_CQM = 6,
35 36
36 PERF_TYPE_MAX, /* non-ABI */ 37 PERF_TYPE_MAX, /* non-ABI */
37}; 38};
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1fc3bae5904a..71109a045450 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7181,6 +7181,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
7181 else if (attr->type == PERF_TYPE_BREAKPOINT) 7181 else if (attr->type == PERF_TYPE_BREAKPOINT)
7182 event->hw.bp_target = task; 7182 event->hw.bp_target = task;
7183#endif 7183#endif
7184 else if (attr->type == PERF_TYPE_INTEL_CQM)
7185 event->hw.cqm_target = task;
7184 } 7186 }
7185 7187
7186 if (!overflow_handler && parent_event) { 7188 if (!overflow_handler && parent_event) {