aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan, Zheng <zheng.z.yan@intel.com>2012-06-15 02:31:34 -0400
committerIngo Molnar <mingo@kernel.org>2012-06-18 06:13:22 -0400
commit087bfbb032691262f2f7d52b910652450c5554b8 (patch)
tree1d56c28c749c8eebadbcddfa69c04e1b367ca24c
parent0cda4c023132aa93f2dd94811061f812e88daf4c (diff)
perf/x86: Add generic Intel uncore PMU support
This patch adds the generic Intel uncore PMU support, including helper functions that add/delete uncore events, a hrtimer that periodically polls the counters to avoid overflow and code that places all events for a particular socket onto a single cpu. The code design is based on the structure of Sandy Bridge-EP's uncore subsystem, which consists of a variety of components, each component contains one or more "boxes". (Tooling support follows in the next patches.) Signed-off-by: Zheng Yan <zheng.z.yan@intel.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/1339741902-8449-6-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c878
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h204
3 files changed, 1085 insertions, 1 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 6ab6aa2fdfdd..bac4c3804cc7 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -32,7 +32,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
32 32
33ifdef CONFIG_PERF_EVENTS 33ifdef CONFIG_PERF_EVENTS
34obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o 34obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
35obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o 35obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o
36obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
37obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
36endif 38endif
37 39
38obj-$(CONFIG_X86_MCE) += mcheck/ 40obj-$(CONFIG_X86_MCE) += mcheck/
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
new file mode 100644
index 000000000000..fe76a07dfdbc
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -0,0 +1,878 @@
1#include "perf_event_intel_uncore.h"
2
3static struct intel_uncore_type *empty_uncore[] = { NULL, };
4static struct intel_uncore_type **msr_uncores = empty_uncore;
5
6/* mask of cpus that collect uncore events */
7static cpumask_t uncore_cpu_mask;
8
9/* constraint for the fixed counter */
10static struct event_constraint constraint_fixed =
11 EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
12
13static void uncore_assign_hw_event(struct intel_uncore_box *box,
14 struct perf_event *event, int idx)
15{
16 struct hw_perf_event *hwc = &event->hw;
17
18 hwc->idx = idx;
19 hwc->last_tag = ++box->tags[idx];
20
21 if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
22 hwc->event_base = uncore_msr_fixed_ctr(box);
23 hwc->config_base = uncore_msr_fixed_ctl(box);
24 return;
25 }
26
27 hwc->config_base = uncore_msr_event_ctl(box, hwc->idx);
28 hwc->event_base = uncore_msr_perf_ctr(box, hwc->idx);
29}
30
31static void uncore_perf_event_update(struct intel_uncore_box *box,
32 struct perf_event *event)
33{
34 u64 prev_count, new_count, delta;
35 int shift;
36
37 if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
38 shift = 64 - uncore_fixed_ctr_bits(box);
39 else
40 shift = 64 - uncore_perf_ctr_bits(box);
41
42 /* the hrtimer might modify the previous event value */
43again:
44 prev_count = local64_read(&event->hw.prev_count);
45 new_count = uncore_read_counter(box, event);
46 if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
47 goto again;
48
49 delta = (new_count << shift) - (prev_count << shift);
50 delta >>= shift;
51
52 local64_add(delta, &event->count);
53}
54
55/*
56 * The overflow interrupt is unavailable for SandyBridge-EP, is broken
57 * for SandyBridge. So we use hrtimer to periodically poll the counter
58 * to avoid overflow.
59 */
60static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
61{
62 struct intel_uncore_box *box;
63 unsigned long flags;
64 int bit;
65
66 box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
67 if (!box->n_active || box->cpu != smp_processor_id())
68 return HRTIMER_NORESTART;
69 /*
70 * disable local interrupt to prevent uncore_pmu_event_start/stop
71 * to interrupt the update process
72 */
73 local_irq_save(flags);
74
75 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
76 uncore_perf_event_update(box, box->events[bit]);
77
78 local_irq_restore(flags);
79
80 hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL));
81 return HRTIMER_RESTART;
82}
83
84static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
85{
86 __hrtimer_start_range_ns(&box->hrtimer,
87 ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0,
88 HRTIMER_MODE_REL_PINNED, 0);
89}
90
91static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
92{
93 hrtimer_cancel(&box->hrtimer);
94}
95
96static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
97{
98 hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
99 box->hrtimer.function = uncore_pmu_hrtimer;
100}
101
102struct intel_uncore_box *uncore_alloc_box(int cpu)
103{
104 struct intel_uncore_box *box;
105
106 box = kmalloc_node(sizeof(*box), GFP_KERNEL | __GFP_ZERO,
107 cpu_to_node(cpu));
108 if (!box)
109 return NULL;
110
111 uncore_pmu_init_hrtimer(box);
112 atomic_set(&box->refcnt, 1);
113 box->cpu = -1;
114 box->phys_id = -1;
115
116 return box;
117}
118
119static struct intel_uncore_box *
120uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
121{
122 return *per_cpu_ptr(pmu->box, cpu);
123}
124
125static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
126{
127 return container_of(event->pmu, struct intel_uncore_pmu, pmu);
128}
129
130static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
131{
132 /*
133 * perf core schedules event on the basis of cpu, uncore events are
134 * collected by one of the cpus inside a physical package.
135 */
136 return uncore_pmu_to_box(uncore_event_to_pmu(event),
137 smp_processor_id());
138}
139
140static int uncore_collect_events(struct intel_uncore_box *box,
141 struct perf_event *leader, bool dogrp)
142{
143 struct perf_event *event;
144 int n, max_count;
145
146 max_count = box->pmu->type->num_counters;
147 if (box->pmu->type->fixed_ctl)
148 max_count++;
149
150 if (box->n_events >= max_count)
151 return -EINVAL;
152
153 n = box->n_events;
154 box->event_list[n] = leader;
155 n++;
156 if (!dogrp)
157 return n;
158
159 list_for_each_entry(event, &leader->sibling_list, group_entry) {
160 if (event->state <= PERF_EVENT_STATE_OFF)
161 continue;
162
163 if (n >= max_count)
164 return -EINVAL;
165
166 box->event_list[n] = event;
167 n++;
168 }
169 return n;
170}
171
172static struct event_constraint *
173uncore_event_constraint(struct intel_uncore_type *type,
174 struct perf_event *event)
175{
176 struct event_constraint *c;
177
178 if (event->hw.config == ~0ULL)
179 return &constraint_fixed;
180
181 if (type->constraints) {
182 for_each_event_constraint(c, type->constraints) {
183 if ((event->hw.config & c->cmask) == c->code)
184 return c;
185 }
186 }
187
188 return &type->unconstrainted;
189}
190
191static int uncore_assign_events(struct intel_uncore_box *box,
192 int assign[], int n)
193{
194 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
195 struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
196 int i, ret, wmin, wmax;
197 struct hw_perf_event *hwc;
198
199 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
200
201 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
202 c = uncore_event_constraint(box->pmu->type,
203 box->event_list[i]);
204 constraints[i] = c;
205 wmin = min(wmin, c->weight);
206 wmax = max(wmax, c->weight);
207 }
208
209 /* fastpath, try to reuse previous register */
210 for (i = 0; i < n; i++) {
211 hwc = &box->event_list[i]->hw;
212 c = constraints[i];
213
214 /* never assigned */
215 if (hwc->idx == -1)
216 break;
217
218 /* constraint still honored */
219 if (!test_bit(hwc->idx, c->idxmsk))
220 break;
221
222 /* not already used */
223 if (test_bit(hwc->idx, used_mask))
224 break;
225
226 __set_bit(hwc->idx, used_mask);
227 assign[i] = hwc->idx;
228 }
229 if (i == n)
230 return 0;
231
232 /* slow path */
233 ret = perf_assign_events(constraints, n, wmin, wmax, assign);
234 return ret ? -EINVAL : 0;
235}
236
237static void uncore_pmu_event_start(struct perf_event *event, int flags)
238{
239 struct intel_uncore_box *box = uncore_event_to_box(event);
240 int idx = event->hw.idx;
241
242 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
243 return;
244
245 if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
246 return;
247
248 event->hw.state = 0;
249 box->events[idx] = event;
250 box->n_active++;
251 __set_bit(idx, box->active_mask);
252
253 local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
254 uncore_enable_event(box, event);
255
256 if (box->n_active == 1) {
257 uncore_enable_box(box);
258 uncore_pmu_start_hrtimer(box);
259 }
260}
261
262static void uncore_pmu_event_stop(struct perf_event *event, int flags)
263{
264 struct intel_uncore_box *box = uncore_event_to_box(event);
265 struct hw_perf_event *hwc = &event->hw;
266
267 if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
268 uncore_disable_event(box, event);
269 box->n_active--;
270 box->events[hwc->idx] = NULL;
271 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
272 hwc->state |= PERF_HES_STOPPED;
273
274 if (box->n_active == 0) {
275 uncore_disable_box(box);
276 uncore_pmu_cancel_hrtimer(box);
277 }
278 }
279
280 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
281 /*
282 * Drain the remaining delta count out of a event
283 * that we are disabling:
284 */
285 uncore_perf_event_update(box, event);
286 hwc->state |= PERF_HES_UPTODATE;
287 }
288}
289
290static int uncore_pmu_event_add(struct perf_event *event, int flags)
291{
292 struct intel_uncore_box *box = uncore_event_to_box(event);
293 struct hw_perf_event *hwc = &event->hw;
294 int assign[UNCORE_PMC_IDX_MAX];
295 int i, n, ret;
296
297 if (!box)
298 return -ENODEV;
299
300 ret = n = uncore_collect_events(box, event, false);
301 if (ret < 0)
302 return ret;
303
304 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
305 if (!(flags & PERF_EF_START))
306 hwc->state |= PERF_HES_ARCH;
307
308 ret = uncore_assign_events(box, assign, n);
309 if (ret)
310 return ret;
311
312 /* save events moving to new counters */
313 for (i = 0; i < box->n_events; i++) {
314 event = box->event_list[i];
315 hwc = &event->hw;
316
317 if (hwc->idx == assign[i] &&
318 hwc->last_tag == box->tags[assign[i]])
319 continue;
320 /*
321 * Ensure we don't accidentally enable a stopped
322 * counter simply because we rescheduled.
323 */
324 if (hwc->state & PERF_HES_STOPPED)
325 hwc->state |= PERF_HES_ARCH;
326
327 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
328 }
329
330 /* reprogram moved events into new counters */
331 for (i = 0; i < n; i++) {
332 event = box->event_list[i];
333 hwc = &event->hw;
334
335 if (hwc->idx != assign[i] ||
336 hwc->last_tag != box->tags[assign[i]])
337 uncore_assign_hw_event(box, event, assign[i]);
338 else if (i < box->n_events)
339 continue;
340
341 if (hwc->state & PERF_HES_ARCH)
342 continue;
343
344 uncore_pmu_event_start(event, 0);
345 }
346 box->n_events = n;
347
348 return 0;
349}
350
351static void uncore_pmu_event_del(struct perf_event *event, int flags)
352{
353 struct intel_uncore_box *box = uncore_event_to_box(event);
354 int i;
355
356 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
357
358 for (i = 0; i < box->n_events; i++) {
359 if (event == box->event_list[i]) {
360 while (++i < box->n_events)
361 box->event_list[i - 1] = box->event_list[i];
362
363 --box->n_events;
364 break;
365 }
366 }
367
368 event->hw.idx = -1;
369 event->hw.last_tag = ~0ULL;
370}
371
372static void uncore_pmu_event_read(struct perf_event *event)
373{
374 struct intel_uncore_box *box = uncore_event_to_box(event);
375 uncore_perf_event_update(box, event);
376}
377
378/*
379 * validation ensures the group can be loaded onto the
380 * PMU if it was the only group available.
381 */
382static int uncore_validate_group(struct intel_uncore_pmu *pmu,
383 struct perf_event *event)
384{
385 struct perf_event *leader = event->group_leader;
386 struct intel_uncore_box *fake_box;
387 int assign[UNCORE_PMC_IDX_MAX];
388 int ret = -EINVAL, n;
389
390 fake_box = uncore_alloc_box(smp_processor_id());
391 if (!fake_box)
392 return -ENOMEM;
393
394 fake_box->pmu = pmu;
395 /*
396 * the event is not yet connected with its
397 * siblings therefore we must first collect
398 * existing siblings, then add the new event
399 * before we can simulate the scheduling
400 */
401 n = uncore_collect_events(fake_box, leader, true);
402 if (n < 0)
403 goto out;
404
405 fake_box->n_events = n;
406 n = uncore_collect_events(fake_box, event, false);
407 if (n < 0)
408 goto out;
409
410 fake_box->n_events = n;
411
412 ret = uncore_assign_events(fake_box, assign, n);
413out:
414 kfree(fake_box);
415 return ret;
416}
417
418int uncore_pmu_event_init(struct perf_event *event)
419{
420 struct intel_uncore_pmu *pmu;
421 struct intel_uncore_box *box;
422 struct hw_perf_event *hwc = &event->hw;
423 int ret;
424
425 if (event->attr.type != event->pmu->type)
426 return -ENOENT;
427
428 pmu = uncore_event_to_pmu(event);
429 /* no device found for this pmu */
430 if (pmu->func_id < 0)
431 return -ENOENT;
432
433 /*
434 * Uncore PMU does measure at all privilege level all the time.
435 * So it doesn't make sense to specify any exclude bits.
436 */
437 if (event->attr.exclude_user || event->attr.exclude_kernel ||
438 event->attr.exclude_hv || event->attr.exclude_idle)
439 return -EINVAL;
440
441 /* Sampling not supported yet */
442 if (hwc->sample_period)
443 return -EINVAL;
444
445 /*
446 * Place all uncore events for a particular physical package
447 * onto a single cpu
448 */
449 if (event->cpu < 0)
450 return -EINVAL;
451 box = uncore_pmu_to_box(pmu, event->cpu);
452 if (!box || box->cpu < 0)
453 return -EINVAL;
454 event->cpu = box->cpu;
455
456 if (event->attr.config == UNCORE_FIXED_EVENT) {
457 /* no fixed counter */
458 if (!pmu->type->fixed_ctl)
459 return -EINVAL;
460 /*
461 * if there is only one fixed counter, only the first pmu
462 * can access the fixed counter
463 */
464 if (pmu->type->single_fixed && pmu->pmu_idx > 0)
465 return -EINVAL;
466 hwc->config = ~0ULL;
467 } else {
468 hwc->config = event->attr.config & pmu->type->event_mask;
469 }
470
471 event->hw.idx = -1;
472 event->hw.last_tag = ~0ULL;
473
474 if (event->group_leader != event)
475 ret = uncore_validate_group(pmu, event);
476 else
477 ret = 0;
478
479 return ret;
480}
481
482static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
483{
484 int ret;
485
486 pmu->pmu = (struct pmu) {
487 .attr_groups = pmu->type->attr_groups,
488 .task_ctx_nr = perf_invalid_context,
489 .event_init = uncore_pmu_event_init,
490 .add = uncore_pmu_event_add,
491 .del = uncore_pmu_event_del,
492 .start = uncore_pmu_event_start,
493 .stop = uncore_pmu_event_stop,
494 .read = uncore_pmu_event_read,
495 };
496
497 if (pmu->type->num_boxes == 1) {
498 if (strlen(pmu->type->name) > 0)
499 sprintf(pmu->name, "uncore_%s", pmu->type->name);
500 else
501 sprintf(pmu->name, "uncore");
502 } else {
503 sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
504 pmu->pmu_idx);
505 }
506
507 ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
508 return ret;
509}
510
511static void __init uncore_type_exit(struct intel_uncore_type *type)
512{
513 int i;
514
515 for (i = 0; i < type->num_boxes; i++)
516 free_percpu(type->pmus[i].box);
517 kfree(type->pmus);
518 type->pmus = NULL;
519 kfree(type->attr_groups[1]);
520 type->attr_groups[1] = NULL;
521}
522
523static int __init uncore_type_init(struct intel_uncore_type *type)
524{
525 struct intel_uncore_pmu *pmus;
526 struct attribute_group *events_group;
527 struct attribute **attrs;
528 int i, j;
529
530 pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
531 if (!pmus)
532 return -ENOMEM;
533
534 type->unconstrainted = (struct event_constraint)
535 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
536 0, type->num_counters, 0);
537
538 for (i = 0; i < type->num_boxes; i++) {
539 pmus[i].func_id = -1;
540 pmus[i].pmu_idx = i;
541 pmus[i].type = type;
542 pmus[i].box = alloc_percpu(struct intel_uncore_box *);
543 if (!pmus[i].box)
544 goto fail;
545 }
546
547 if (type->event_descs) {
548 i = 0;
549 while (type->event_descs[i].attr.attr.name)
550 i++;
551
552 events_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
553 sizeof(*events_group), GFP_KERNEL);
554 if (!events_group)
555 goto fail;
556
557 attrs = (struct attribute **)(events_group + 1);
558 events_group->name = "events";
559 events_group->attrs = attrs;
560
561 for (j = 0; j < i; j++)
562 attrs[j] = &type->event_descs[j].attr.attr;
563
564 type->attr_groups[1] = events_group;
565 }
566
567 type->pmus = pmus;
568 return 0;
569fail:
570 uncore_type_exit(type);
571 return -ENOMEM;
572}
573
574static int __init uncore_types_init(struct intel_uncore_type **types)
575{
576 int i, ret;
577
578 for (i = 0; types[i]; i++) {
579 ret = uncore_type_init(types[i]);
580 if (ret)
581 goto fail;
582 }
583 return 0;
584fail:
585 while (--i >= 0)
586 uncore_type_exit(types[i]);
587 return ret;
588}
589
590static void __cpuinit uncore_cpu_dying(int cpu)
591{
592 struct intel_uncore_type *type;
593 struct intel_uncore_pmu *pmu;
594 struct intel_uncore_box *box;
595 int i, j;
596
597 for (i = 0; msr_uncores[i]; i++) {
598 type = msr_uncores[i];
599 for (j = 0; j < type->num_boxes; j++) {
600 pmu = &type->pmus[j];
601 box = *per_cpu_ptr(pmu->box, cpu);
602 *per_cpu_ptr(pmu->box, cpu) = NULL;
603 if (box && atomic_dec_and_test(&box->refcnt))
604 kfree(box);
605 }
606 }
607}
608
609static int __cpuinit uncore_cpu_starting(int cpu)
610{
611 struct intel_uncore_type *type;
612 struct intel_uncore_pmu *pmu;
613 struct intel_uncore_box *box, *exist;
614 int i, j, k, phys_id;
615
616 phys_id = topology_physical_package_id(cpu);
617
618 for (i = 0; msr_uncores[i]; i++) {
619 type = msr_uncores[i];
620 for (j = 0; j < type->num_boxes; j++) {
621 pmu = &type->pmus[j];
622 box = *per_cpu_ptr(pmu->box, cpu);
623 /* called by uncore_cpu_init? */
624 if (box && box->phys_id >= 0) {
625 uncore_box_init(box);
626 continue;
627 }
628
629 for_each_online_cpu(k) {
630 exist = *per_cpu_ptr(pmu->box, k);
631 if (exist && exist->phys_id == phys_id) {
632 atomic_inc(&exist->refcnt);
633 *per_cpu_ptr(pmu->box, cpu) = exist;
634 kfree(box);
635 box = NULL;
636 break;
637 }
638 }
639
640 if (box) {
641 box->phys_id = phys_id;
642 uncore_box_init(box);
643 }
644 }
645 }
646 return 0;
647}
648
649static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id)
650{
651 struct intel_uncore_type *type;
652 struct intel_uncore_pmu *pmu;
653 struct intel_uncore_box *box;
654 int i, j;
655
656 for (i = 0; msr_uncores[i]; i++) {
657 type = msr_uncores[i];
658 for (j = 0; j < type->num_boxes; j++) {
659 pmu = &type->pmus[j];
660 if (pmu->func_id < 0)
661 pmu->func_id = j;
662
663 box = uncore_alloc_box(cpu);
664 if (!box)
665 return -ENOMEM;
666
667 box->pmu = pmu;
668 box->phys_id = phys_id;
669 *per_cpu_ptr(pmu->box, cpu) = box;
670 }
671 }
672 return 0;
673}
674
675static void __cpuinit uncore_change_context(struct intel_uncore_type **uncores,
676 int old_cpu, int new_cpu)
677{
678 struct intel_uncore_type *type;
679 struct intel_uncore_pmu *pmu;
680 struct intel_uncore_box *box;
681 int i, j;
682
683 for (i = 0; uncores[i]; i++) {
684 type = uncores[i];
685 for (j = 0; j < type->num_boxes; j++) {
686 pmu = &type->pmus[j];
687 if (old_cpu < 0)
688 box = uncore_pmu_to_box(pmu, new_cpu);
689 else
690 box = uncore_pmu_to_box(pmu, old_cpu);
691 if (!box)
692 continue;
693
694 if (old_cpu < 0) {
695 WARN_ON_ONCE(box->cpu != -1);
696 box->cpu = new_cpu;
697 continue;
698 }
699
700 WARN_ON_ONCE(box->cpu != old_cpu);
701 if (new_cpu >= 0) {
702 uncore_pmu_cancel_hrtimer(box);
703 perf_pmu_migrate_context(&pmu->pmu,
704 old_cpu, new_cpu);
705 box->cpu = new_cpu;
706 } else {
707 box->cpu = -1;
708 }
709 }
710 }
711}
712
713static void __cpuinit uncore_event_exit_cpu(int cpu)
714{
715 int i, phys_id, target;
716
717 /* if exiting cpu is used for collecting uncore events */
718 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
719 return;
720
721 /* find a new cpu to collect uncore events */
722 phys_id = topology_physical_package_id(cpu);
723 target = -1;
724 for_each_online_cpu(i) {
725 if (i == cpu)
726 continue;
727 if (phys_id == topology_physical_package_id(i)) {
728 target = i;
729 break;
730 }
731 }
732
733 /* migrate uncore events to the new cpu */
734 if (target >= 0)
735 cpumask_set_cpu(target, &uncore_cpu_mask);
736
737 uncore_change_context(msr_uncores, cpu, target);
738}
739
740static void __cpuinit uncore_event_init_cpu(int cpu)
741{
742 int i, phys_id;
743
744 phys_id = topology_physical_package_id(cpu);
745 for_each_cpu(i, &uncore_cpu_mask) {
746 if (phys_id == topology_physical_package_id(i))
747 return;
748 }
749
750 cpumask_set_cpu(cpu, &uncore_cpu_mask);
751
752 uncore_change_context(msr_uncores, -1, cpu);
753}
754
755static int __cpuinit uncore_cpu_notifier(struct notifier_block *self,
756 unsigned long action, void *hcpu)
757{
758 unsigned int cpu = (long)hcpu;
759
760 /* allocate/free data structure for uncore box */
761 switch (action & ~CPU_TASKS_FROZEN) {
762 case CPU_UP_PREPARE:
763 uncore_cpu_prepare(cpu, -1);
764 break;
765 case CPU_STARTING:
766 uncore_cpu_starting(cpu);
767 break;
768 case CPU_UP_CANCELED:
769 case CPU_DYING:
770 uncore_cpu_dying(cpu);
771 break;
772 default:
773 break;
774 }
775
776 /* select the cpu that collects uncore events */
777 switch (action & ~CPU_TASKS_FROZEN) {
778 case CPU_DOWN_FAILED:
779 case CPU_STARTING:
780 uncore_event_init_cpu(cpu);
781 break;
782 case CPU_DOWN_PREPARE:
783 uncore_event_exit_cpu(cpu);
784 break;
785 default:
786 break;
787 }
788
789 return NOTIFY_OK;
790}
791
792static struct notifier_block uncore_cpu_nb __cpuinitdata = {
793 .notifier_call = uncore_cpu_notifier,
794 /*
795 * to migrate uncore events, our notifier should be executed
796 * before perf core's notifier.
797 */
798 .priority = CPU_PRI_PERF + 1,
799};
800
801static void __init uncore_cpu_setup(void *dummy)
802{
803 uncore_cpu_starting(smp_processor_id());
804}
805
806static int __init uncore_cpu_init(void)
807{
808 int ret, cpu;
809
810 switch (boot_cpu_data.x86_model) {
811 default:
812 return 0;
813 }
814
815 ret = uncore_types_init(msr_uncores);
816 if (ret)
817 return ret;
818
819 get_online_cpus();
820
821 for_each_online_cpu(cpu) {
822 int i, phys_id = topology_physical_package_id(cpu);
823
824 for_each_cpu(i, &uncore_cpu_mask) {
825 if (phys_id == topology_physical_package_id(i)) {
826 phys_id = -1;
827 break;
828 }
829 }
830 if (phys_id < 0)
831 continue;
832
833 uncore_cpu_prepare(cpu, phys_id);
834 uncore_event_init_cpu(cpu);
835 }
836 on_each_cpu(uncore_cpu_setup, NULL, 1);
837
838 register_cpu_notifier(&uncore_cpu_nb);
839
840 put_online_cpus();
841
842 return 0;
843}
844
845static int __init uncore_pmus_register(void)
846{
847 struct intel_uncore_pmu *pmu;
848 struct intel_uncore_type *type;
849 int i, j;
850
851 for (i = 0; msr_uncores[i]; i++) {
852 type = msr_uncores[i];
853 for (j = 0; j < type->num_boxes; j++) {
854 pmu = &type->pmus[j];
855 uncore_pmu_register(pmu);
856 }
857 }
858
859 return 0;
860}
861
862static int __init intel_uncore_init(void)
863{
864 int ret;
865
866 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
867 return -ENODEV;
868
869 ret = uncore_cpu_init();
870 if (ret)
871 goto fail;
872
873 uncore_pmus_register();
874 return 0;
875fail:
876 return ret;
877}
878device_initcall(intel_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
new file mode 100644
index 000000000000..49a6bfbba0de
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -0,0 +1,204 @@
1#include <linux/module.h>
2#include <linux/slab.h>
3#include <linux/perf_event.h>
4#include "perf_event.h"
5
6#define UNCORE_PMU_NAME_LEN 32
7#define UNCORE_BOX_HASH_SIZE 8
8
9#define UNCORE_PMU_HRTIMER_INTERVAL (60 * NSEC_PER_SEC)
10
11#define UNCORE_FIXED_EVENT 0xffff
12#define UNCORE_PMC_IDX_MAX_GENERIC 8
13#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
14#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
15
16#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
17
18struct intel_uncore_ops;
19struct intel_uncore_pmu;
20struct intel_uncore_box;
21struct uncore_event_desc;
22
23struct intel_uncore_type {
24 const char *name;
25 int num_counters;
26 int num_boxes;
27 int perf_ctr_bits;
28 int fixed_ctr_bits;
29 int single_fixed;
30 unsigned perf_ctr;
31 unsigned event_ctl;
32 unsigned event_mask;
33 unsigned fixed_ctr;
34 unsigned fixed_ctl;
35 unsigned box_ctl;
36 unsigned msr_offset;
37 struct event_constraint unconstrainted;
38 struct event_constraint *constraints;
39 struct intel_uncore_pmu *pmus;
40 struct intel_uncore_ops *ops;
41 struct uncore_event_desc *event_descs;
42 const struct attribute_group *attr_groups[3];
43};
44
45#define format_group attr_groups[0]
46
47struct intel_uncore_ops {
48 void (*init_box)(struct intel_uncore_box *);
49 void (*disable_box)(struct intel_uncore_box *);
50 void (*enable_box)(struct intel_uncore_box *);
51 void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
52 void (*enable_event)(struct intel_uncore_box *, struct perf_event *);
53 u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *);
54};
55
56struct intel_uncore_pmu {
57 struct pmu pmu;
58 char name[UNCORE_PMU_NAME_LEN];
59 int pmu_idx;
60 int func_id;
61 struct intel_uncore_type *type;
62 struct intel_uncore_box ** __percpu box;
63};
64
65struct intel_uncore_box {
66 int phys_id;
67 int n_active; /* number of active events */
68 int n_events;
69 int cpu; /* cpu to collect events */
70 unsigned long flags;
71 atomic_t refcnt;
72 struct perf_event *events[UNCORE_PMC_IDX_MAX];
73 struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
74 unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
75 u64 tags[UNCORE_PMC_IDX_MAX];
76 struct intel_uncore_pmu *pmu;
77 struct hrtimer hrtimer;
78 struct list_head list;
79};
80
81#define UNCORE_BOX_FLAG_INITIATED 0
82
83struct uncore_event_desc {
84 struct kobj_attribute attr;
85 const char *config;
86};
87
88#define INTEL_UNCORE_EVENT_DESC(_name, _config) \
89{ \
90 .attr = __ATTR(_name, 0444, uncore_event_show, NULL), \
91 .config = _config, \
92}
93
94#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
95static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
96 struct kobj_attribute *attr, \
97 char *page) \
98{ \
99 BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
100 return sprintf(page, _format "\n"); \
101} \
102static struct kobj_attribute format_attr_##_var = \
103 __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
104
105
106static ssize_t uncore_event_show(struct kobject *kobj,
107 struct kobj_attribute *attr, char *buf)
108{
109 struct uncore_event_desc *event =
110 container_of(attr, struct uncore_event_desc, attr);
111 return sprintf(buf, "%s", event->config);
112}
113
114static inline
115unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
116{
117 if (!box->pmu->type->box_ctl)
118 return 0;
119 return box->pmu->type->box_ctl +
120 box->pmu->type->msr_offset * box->pmu->pmu_idx;
121}
122
123static inline
124unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
125{
126 if (!box->pmu->type->fixed_ctl)
127 return 0;
128 return box->pmu->type->fixed_ctl +
129 box->pmu->type->msr_offset * box->pmu->pmu_idx;
130}
131
132static inline
133unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
134{
135 return box->pmu->type->fixed_ctr +
136 box->pmu->type->msr_offset * box->pmu->pmu_idx;
137}
138
139static inline
140unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
141{
142 return idx + box->pmu->type->event_ctl +
143 box->pmu->type->msr_offset * box->pmu->pmu_idx;
144}
145
146static inline
147unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
148{
149 return idx + box->pmu->type->perf_ctr +
150 box->pmu->type->msr_offset * box->pmu->pmu_idx;
151}
152
153static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box)
154{
155 return box->pmu->type->perf_ctr_bits;
156}
157
158static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
159{
160 return box->pmu->type->fixed_ctr_bits;
161}
162
163static inline int uncore_num_counters(struct intel_uncore_box *box)
164{
165 return box->pmu->type->num_counters;
166}
167
168static inline void uncore_disable_box(struct intel_uncore_box *box)
169{
170 if (box->pmu->type->ops->disable_box)
171 box->pmu->type->ops->disable_box(box);
172}
173
174static inline void uncore_enable_box(struct intel_uncore_box *box)
175{
176 if (box->pmu->type->ops->enable_box)
177 box->pmu->type->ops->enable_box(box);
178}
179
180static inline void uncore_disable_event(struct intel_uncore_box *box,
181 struct perf_event *event)
182{
183 box->pmu->type->ops->disable_event(box, event);
184}
185
186static inline void uncore_enable_event(struct intel_uncore_box *box,
187 struct perf_event *event)
188{
189 box->pmu->type->ops->enable_event(box, event);
190}
191
192static inline u64 uncore_read_counter(struct intel_uncore_box *box,
193 struct perf_event *event)
194{
195 return box->pmu->type->ops->read_counter(box, event);
196}
197
198static inline void uncore_box_init(struct intel_uncore_box *box)
199{
200 if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
201 if (box->pmu->type->ops->init_box)
202 box->pmu->type->ops->init_box(box);
203 }
204}