diff options
author | Borislav Petkov <bp@suse.de> | 2016-02-10 04:55:15 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-02-17 04:09:47 -0500 |
commit | 6bcb2db547be8263a98ae9413127df9385b38763 (patch) | |
tree | 91edccbe40af525f8c911bc02b2006d6fc61db71 /arch/x86/events/intel/uncore.c | |
parent | 609d809f832ddda20f03029c865dd052596ea394 (diff) |
perf/x86: Move perf_event_intel_uncore.[ch] .. => x86/events/intel/uncore.[ch]
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1455098123-11740-10-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/events/intel/uncore.c')
-rw-r--r-- | arch/x86/events/intel/uncore.c | 1401 |
1 files changed, 1401 insertions, 0 deletions
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c new file mode 100644 index 000000000000..91a18d6c4405 --- /dev/null +++ b/arch/x86/events/intel/uncore.c | |||
@@ -0,0 +1,1401 @@ | |||
1 | #include "uncore.h" | ||
2 | |||
3 | static struct intel_uncore_type *empty_uncore[] = { NULL, }; | ||
4 | struct intel_uncore_type **uncore_msr_uncores = empty_uncore; | ||
5 | struct intel_uncore_type **uncore_pci_uncores = empty_uncore; | ||
6 | |||
7 | static bool pcidrv_registered; | ||
8 | struct pci_driver *uncore_pci_driver; | ||
9 | /* pci bus to socket mapping */ | ||
10 | DEFINE_RAW_SPINLOCK(pci2phy_map_lock); | ||
11 | struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); | ||
12 | struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; | ||
13 | |||
14 | static DEFINE_RAW_SPINLOCK(uncore_box_lock); | ||
15 | /* mask of cpus that collect uncore events */ | ||
16 | static cpumask_t uncore_cpu_mask; | ||
17 | |||
18 | /* constraint for the fixed counter */ | ||
19 | static struct event_constraint uncore_constraint_fixed = | ||
20 | EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); | ||
21 | struct event_constraint uncore_constraint_empty = | ||
22 | EVENT_CONSTRAINT(0, 0, 0); | ||
23 | |||
24 | int uncore_pcibus_to_physid(struct pci_bus *bus) | ||
25 | { | ||
26 | struct pci2phy_map *map; | ||
27 | int phys_id = -1; | ||
28 | |||
29 | raw_spin_lock(&pci2phy_map_lock); | ||
30 | list_for_each_entry(map, &pci2phy_map_head, list) { | ||
31 | if (map->segment == pci_domain_nr(bus)) { | ||
32 | phys_id = map->pbus_to_physid[bus->number]; | ||
33 | break; | ||
34 | } | ||
35 | } | ||
36 | raw_spin_unlock(&pci2phy_map_lock); | ||
37 | |||
38 | return phys_id; | ||
39 | } | ||
40 | |||
41 | struct pci2phy_map *__find_pci2phy_map(int segment) | ||
42 | { | ||
43 | struct pci2phy_map *map, *alloc = NULL; | ||
44 | int i; | ||
45 | |||
46 | lockdep_assert_held(&pci2phy_map_lock); | ||
47 | |||
48 | lookup: | ||
49 | list_for_each_entry(map, &pci2phy_map_head, list) { | ||
50 | if (map->segment == segment) | ||
51 | goto end; | ||
52 | } | ||
53 | |||
54 | if (!alloc) { | ||
55 | raw_spin_unlock(&pci2phy_map_lock); | ||
56 | alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL); | ||
57 | raw_spin_lock(&pci2phy_map_lock); | ||
58 | |||
59 | if (!alloc) | ||
60 | return NULL; | ||
61 | |||
62 | goto lookup; | ||
63 | } | ||
64 | |||
65 | map = alloc; | ||
66 | alloc = NULL; | ||
67 | map->segment = segment; | ||
68 | for (i = 0; i < 256; i++) | ||
69 | map->pbus_to_physid[i] = -1; | ||
70 | list_add_tail(&map->list, &pci2phy_map_head); | ||
71 | |||
72 | end: | ||
73 | kfree(alloc); | ||
74 | return map; | ||
75 | } | ||
76 | |||
77 | ssize_t uncore_event_show(struct kobject *kobj, | ||
78 | struct kobj_attribute *attr, char *buf) | ||
79 | { | ||
80 | struct uncore_event_desc *event = | ||
81 | container_of(attr, struct uncore_event_desc, attr); | ||
82 | return sprintf(buf, "%s", event->config); | ||
83 | } | ||
84 | |||
85 | struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) | ||
86 | { | ||
87 | return container_of(event->pmu, struct intel_uncore_pmu, pmu); | ||
88 | } | ||
89 | |||
90 | struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) | ||
91 | { | ||
92 | struct intel_uncore_box *box; | ||
93 | |||
94 | box = *per_cpu_ptr(pmu->box, cpu); | ||
95 | if (box) | ||
96 | return box; | ||
97 | |||
98 | raw_spin_lock(&uncore_box_lock); | ||
99 | /* Recheck in lock to handle races. */ | ||
100 | if (*per_cpu_ptr(pmu->box, cpu)) | ||
101 | goto out; | ||
102 | list_for_each_entry(box, &pmu->box_list, list) { | ||
103 | if (box->phys_id == topology_physical_package_id(cpu)) { | ||
104 | atomic_inc(&box->refcnt); | ||
105 | *per_cpu_ptr(pmu->box, cpu) = box; | ||
106 | break; | ||
107 | } | ||
108 | } | ||
109 | out: | ||
110 | raw_spin_unlock(&uncore_box_lock); | ||
111 | |||
112 | return *per_cpu_ptr(pmu->box, cpu); | ||
113 | } | ||
114 | |||
115 | struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) | ||
116 | { | ||
117 | /* | ||
118 | * perf core schedules event on the basis of cpu, uncore events are | ||
119 | * collected by one of the cpus inside a physical package. | ||
120 | */ | ||
121 | return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); | ||
122 | } | ||
123 | |||
124 | u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) | ||
125 | { | ||
126 | u64 count; | ||
127 | |||
128 | rdmsrl(event->hw.event_base, count); | ||
129 | |||
130 | return count; | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * generic get constraint function for shared match/mask registers. | ||
135 | */ | ||
136 | struct event_constraint * | ||
137 | uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) | ||
138 | { | ||
139 | struct intel_uncore_extra_reg *er; | ||
140 | struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; | ||
141 | struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; | ||
142 | unsigned long flags; | ||
143 | bool ok = false; | ||
144 | |||
145 | /* | ||
146 | * reg->alloc can be set due to existing state, so for fake box we | ||
147 | * need to ignore this, otherwise we might fail to allocate proper | ||
148 | * fake state for this extra reg constraint. | ||
149 | */ | ||
150 | if (reg1->idx == EXTRA_REG_NONE || | ||
151 | (!uncore_box_is_fake(box) && reg1->alloc)) | ||
152 | return NULL; | ||
153 | |||
154 | er = &box->shared_regs[reg1->idx]; | ||
155 | raw_spin_lock_irqsave(&er->lock, flags); | ||
156 | if (!atomic_read(&er->ref) || | ||
157 | (er->config1 == reg1->config && er->config2 == reg2->config)) { | ||
158 | atomic_inc(&er->ref); | ||
159 | er->config1 = reg1->config; | ||
160 | er->config2 = reg2->config; | ||
161 | ok = true; | ||
162 | } | ||
163 | raw_spin_unlock_irqrestore(&er->lock, flags); | ||
164 | |||
165 | if (ok) { | ||
166 | if (!uncore_box_is_fake(box)) | ||
167 | reg1->alloc = 1; | ||
168 | return NULL; | ||
169 | } | ||
170 | |||
171 | return &uncore_constraint_empty; | ||
172 | } | ||
173 | |||
174 | void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) | ||
175 | { | ||
176 | struct intel_uncore_extra_reg *er; | ||
177 | struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; | ||
178 | |||
179 | /* | ||
180 | * Only put constraint if extra reg was actually allocated. Also | ||
181 | * takes care of event which do not use an extra shared reg. | ||
182 | * | ||
183 | * Also, if this is a fake box we shouldn't touch any event state | ||
184 | * (reg->alloc) and we don't care about leaving inconsistent box | ||
185 | * state either since it will be thrown out. | ||
186 | */ | ||
187 | if (uncore_box_is_fake(box) || !reg1->alloc) | ||
188 | return; | ||
189 | |||
190 | er = &box->shared_regs[reg1->idx]; | ||
191 | atomic_dec(&er->ref); | ||
192 | reg1->alloc = 0; | ||
193 | } | ||
194 | |||
195 | u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) | ||
196 | { | ||
197 | struct intel_uncore_extra_reg *er; | ||
198 | unsigned long flags; | ||
199 | u64 config; | ||
200 | |||
201 | er = &box->shared_regs[idx]; | ||
202 | |||
203 | raw_spin_lock_irqsave(&er->lock, flags); | ||
204 | config = er->config; | ||
205 | raw_spin_unlock_irqrestore(&er->lock, flags); | ||
206 | |||
207 | return config; | ||
208 | } | ||
209 | |||
210 | static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx) | ||
211 | { | ||
212 | struct hw_perf_event *hwc = &event->hw; | ||
213 | |||
214 | hwc->idx = idx; | ||
215 | hwc->last_tag = ++box->tags[idx]; | ||
216 | |||
217 | if (hwc->idx == UNCORE_PMC_IDX_FIXED) { | ||
218 | hwc->event_base = uncore_fixed_ctr(box); | ||
219 | hwc->config_base = uncore_fixed_ctl(box); | ||
220 | return; | ||
221 | } | ||
222 | |||
223 | hwc->config_base = uncore_event_ctl(box, hwc->idx); | ||
224 | hwc->event_base = uncore_perf_ctr(box, hwc->idx); | ||
225 | } | ||
226 | |||
227 | void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) | ||
228 | { | ||
229 | u64 prev_count, new_count, delta; | ||
230 | int shift; | ||
231 | |||
232 | if (event->hw.idx >= UNCORE_PMC_IDX_FIXED) | ||
233 | shift = 64 - uncore_fixed_ctr_bits(box); | ||
234 | else | ||
235 | shift = 64 - uncore_perf_ctr_bits(box); | ||
236 | |||
237 | /* the hrtimer might modify the previous event value */ | ||
238 | again: | ||
239 | prev_count = local64_read(&event->hw.prev_count); | ||
240 | new_count = uncore_read_counter(box, event); | ||
241 | if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) | ||
242 | goto again; | ||
243 | |||
244 | delta = (new_count << shift) - (prev_count << shift); | ||
245 | delta >>= shift; | ||
246 | |||
247 | local64_add(delta, &event->count); | ||
248 | } | ||
249 | |||
250 | /* | ||
251 | * The overflow interrupt is unavailable for SandyBridge-EP, is broken | ||
252 | * for SandyBridge. So we use hrtimer to periodically poll the counter | ||
253 | * to avoid overflow. | ||
254 | */ | ||
255 | static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) | ||
256 | { | ||
257 | struct intel_uncore_box *box; | ||
258 | struct perf_event *event; | ||
259 | unsigned long flags; | ||
260 | int bit; | ||
261 | |||
262 | box = container_of(hrtimer, struct intel_uncore_box, hrtimer); | ||
263 | if (!box->n_active || box->cpu != smp_processor_id()) | ||
264 | return HRTIMER_NORESTART; | ||
265 | /* | ||
266 | * disable local interrupt to prevent uncore_pmu_event_start/stop | ||
267 | * to interrupt the update process | ||
268 | */ | ||
269 | local_irq_save(flags); | ||
270 | |||
271 | /* | ||
272 | * handle boxes with an active event list as opposed to active | ||
273 | * counters | ||
274 | */ | ||
275 | list_for_each_entry(event, &box->active_list, active_entry) { | ||
276 | uncore_perf_event_update(box, event); | ||
277 | } | ||
278 | |||
279 | for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) | ||
280 | uncore_perf_event_update(box, box->events[bit]); | ||
281 | |||
282 | local_irq_restore(flags); | ||
283 | |||
284 | hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); | ||
285 | return HRTIMER_RESTART; | ||
286 | } | ||
287 | |||
288 | void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) | ||
289 | { | ||
290 | hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), | ||
291 | HRTIMER_MODE_REL_PINNED); | ||
292 | } | ||
293 | |||
294 | void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) | ||
295 | { | ||
296 | hrtimer_cancel(&box->hrtimer); | ||
297 | } | ||
298 | |||
299 | static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) | ||
300 | { | ||
301 | hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
302 | box->hrtimer.function = uncore_pmu_hrtimer; | ||
303 | } | ||
304 | |||
305 | static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node) | ||
306 | { | ||
307 | struct intel_uncore_box *box; | ||
308 | int i, size; | ||
309 | |||
310 | size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); | ||
311 | |||
312 | box = kzalloc_node(size, GFP_KERNEL, node); | ||
313 | if (!box) | ||
314 | return NULL; | ||
315 | |||
316 | for (i = 0; i < type->num_shared_regs; i++) | ||
317 | raw_spin_lock_init(&box->shared_regs[i].lock); | ||
318 | |||
319 | uncore_pmu_init_hrtimer(box); | ||
320 | atomic_set(&box->refcnt, 1); | ||
321 | box->cpu = -1; | ||
322 | box->phys_id = -1; | ||
323 | |||
324 | /* set default hrtimer timeout */ | ||
325 | box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; | ||
326 | |||
327 | INIT_LIST_HEAD(&box->active_list); | ||
328 | |||
329 | return box; | ||
330 | } | ||
331 | |||
332 | /* | ||
333 | * Using uncore_pmu_event_init pmu event_init callback | ||
334 | * as a detection point for uncore events. | ||
335 | */ | ||
336 | static int uncore_pmu_event_init(struct perf_event *event); | ||
337 | |||
338 | static bool is_uncore_event(struct perf_event *event) | ||
339 | { | ||
340 | return event->pmu->event_init == uncore_pmu_event_init; | ||
341 | } | ||
342 | |||
343 | static int | ||
344 | uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp) | ||
345 | { | ||
346 | struct perf_event *event; | ||
347 | int n, max_count; | ||
348 | |||
349 | max_count = box->pmu->type->num_counters; | ||
350 | if (box->pmu->type->fixed_ctl) | ||
351 | max_count++; | ||
352 | |||
353 | if (box->n_events >= max_count) | ||
354 | return -EINVAL; | ||
355 | |||
356 | n = box->n_events; | ||
357 | |||
358 | if (is_uncore_event(leader)) { | ||
359 | box->event_list[n] = leader; | ||
360 | n++; | ||
361 | } | ||
362 | |||
363 | if (!dogrp) | ||
364 | return n; | ||
365 | |||
366 | list_for_each_entry(event, &leader->sibling_list, group_entry) { | ||
367 | if (!is_uncore_event(event) || | ||
368 | event->state <= PERF_EVENT_STATE_OFF) | ||
369 | continue; | ||
370 | |||
371 | if (n >= max_count) | ||
372 | return -EINVAL; | ||
373 | |||
374 | box->event_list[n] = event; | ||
375 | n++; | ||
376 | } | ||
377 | return n; | ||
378 | } | ||
379 | |||
380 | static struct event_constraint * | ||
381 | uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event) | ||
382 | { | ||
383 | struct intel_uncore_type *type = box->pmu->type; | ||
384 | struct event_constraint *c; | ||
385 | |||
386 | if (type->ops->get_constraint) { | ||
387 | c = type->ops->get_constraint(box, event); | ||
388 | if (c) | ||
389 | return c; | ||
390 | } | ||
391 | |||
392 | if (event->attr.config == UNCORE_FIXED_EVENT) | ||
393 | return &uncore_constraint_fixed; | ||
394 | |||
395 | if (type->constraints) { | ||
396 | for_each_event_constraint(c, type->constraints) { | ||
397 | if ((event->hw.config & c->cmask) == c->code) | ||
398 | return c; | ||
399 | } | ||
400 | } | ||
401 | |||
402 | return &type->unconstrainted; | ||
403 | } | ||
404 | |||
405 | static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event) | ||
406 | { | ||
407 | if (box->pmu->type->ops->put_constraint) | ||
408 | box->pmu->type->ops->put_constraint(box, event); | ||
409 | } | ||
410 | |||
411 | static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) | ||
412 | { | ||
413 | unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; | ||
414 | struct event_constraint *c; | ||
415 | int i, wmin, wmax, ret = 0; | ||
416 | struct hw_perf_event *hwc; | ||
417 | |||
418 | bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); | ||
419 | |||
420 | for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { | ||
421 | c = uncore_get_event_constraint(box, box->event_list[i]); | ||
422 | box->event_constraint[i] = c; | ||
423 | wmin = min(wmin, c->weight); | ||
424 | wmax = max(wmax, c->weight); | ||
425 | } | ||
426 | |||
427 | /* fastpath, try to reuse previous register */ | ||
428 | for (i = 0; i < n; i++) { | ||
429 | hwc = &box->event_list[i]->hw; | ||
430 | c = box->event_constraint[i]; | ||
431 | |||
432 | /* never assigned */ | ||
433 | if (hwc->idx == -1) | ||
434 | break; | ||
435 | |||
436 | /* constraint still honored */ | ||
437 | if (!test_bit(hwc->idx, c->idxmsk)) | ||
438 | break; | ||
439 | |||
440 | /* not already used */ | ||
441 | if (test_bit(hwc->idx, used_mask)) | ||
442 | break; | ||
443 | |||
444 | __set_bit(hwc->idx, used_mask); | ||
445 | if (assign) | ||
446 | assign[i] = hwc->idx; | ||
447 | } | ||
448 | /* slow path */ | ||
449 | if (i != n) | ||
450 | ret = perf_assign_events(box->event_constraint, n, | ||
451 | wmin, wmax, n, assign); | ||
452 | |||
453 | if (!assign || ret) { | ||
454 | for (i = 0; i < n; i++) | ||
455 | uncore_put_event_constraint(box, box->event_list[i]); | ||
456 | } | ||
457 | return ret ? -EINVAL : 0; | ||
458 | } | ||
459 | |||
460 | static void uncore_pmu_event_start(struct perf_event *event, int flags) | ||
461 | { | ||
462 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
463 | int idx = event->hw.idx; | ||
464 | |||
465 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | ||
466 | return; | ||
467 | |||
468 | if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) | ||
469 | return; | ||
470 | |||
471 | event->hw.state = 0; | ||
472 | box->events[idx] = event; | ||
473 | box->n_active++; | ||
474 | __set_bit(idx, box->active_mask); | ||
475 | |||
476 | local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); | ||
477 | uncore_enable_event(box, event); | ||
478 | |||
479 | if (box->n_active == 1) { | ||
480 | uncore_enable_box(box); | ||
481 | uncore_pmu_start_hrtimer(box); | ||
482 | } | ||
483 | } | ||
484 | |||
485 | static void uncore_pmu_event_stop(struct perf_event *event, int flags) | ||
486 | { | ||
487 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
488 | struct hw_perf_event *hwc = &event->hw; | ||
489 | |||
490 | if (__test_and_clear_bit(hwc->idx, box->active_mask)) { | ||
491 | uncore_disable_event(box, event); | ||
492 | box->n_active--; | ||
493 | box->events[hwc->idx] = NULL; | ||
494 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | ||
495 | hwc->state |= PERF_HES_STOPPED; | ||
496 | |||
497 | if (box->n_active == 0) { | ||
498 | uncore_disable_box(box); | ||
499 | uncore_pmu_cancel_hrtimer(box); | ||
500 | } | ||
501 | } | ||
502 | |||
503 | if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { | ||
504 | /* | ||
505 | * Drain the remaining delta count out of a event | ||
506 | * that we are disabling: | ||
507 | */ | ||
508 | uncore_perf_event_update(box, event); | ||
509 | hwc->state |= PERF_HES_UPTODATE; | ||
510 | } | ||
511 | } | ||
512 | |||
513 | static int uncore_pmu_event_add(struct perf_event *event, int flags) | ||
514 | { | ||
515 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
516 | struct hw_perf_event *hwc = &event->hw; | ||
517 | int assign[UNCORE_PMC_IDX_MAX]; | ||
518 | int i, n, ret; | ||
519 | |||
520 | if (!box) | ||
521 | return -ENODEV; | ||
522 | |||
523 | ret = n = uncore_collect_events(box, event, false); | ||
524 | if (ret < 0) | ||
525 | return ret; | ||
526 | |||
527 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
528 | if (!(flags & PERF_EF_START)) | ||
529 | hwc->state |= PERF_HES_ARCH; | ||
530 | |||
531 | ret = uncore_assign_events(box, assign, n); | ||
532 | if (ret) | ||
533 | return ret; | ||
534 | |||
535 | /* save events moving to new counters */ | ||
536 | for (i = 0; i < box->n_events; i++) { | ||
537 | event = box->event_list[i]; | ||
538 | hwc = &event->hw; | ||
539 | |||
540 | if (hwc->idx == assign[i] && | ||
541 | hwc->last_tag == box->tags[assign[i]]) | ||
542 | continue; | ||
543 | /* | ||
544 | * Ensure we don't accidentally enable a stopped | ||
545 | * counter simply because we rescheduled. | ||
546 | */ | ||
547 | if (hwc->state & PERF_HES_STOPPED) | ||
548 | hwc->state |= PERF_HES_ARCH; | ||
549 | |||
550 | uncore_pmu_event_stop(event, PERF_EF_UPDATE); | ||
551 | } | ||
552 | |||
553 | /* reprogram moved events into new counters */ | ||
554 | for (i = 0; i < n; i++) { | ||
555 | event = box->event_list[i]; | ||
556 | hwc = &event->hw; | ||
557 | |||
558 | if (hwc->idx != assign[i] || | ||
559 | hwc->last_tag != box->tags[assign[i]]) | ||
560 | uncore_assign_hw_event(box, event, assign[i]); | ||
561 | else if (i < box->n_events) | ||
562 | continue; | ||
563 | |||
564 | if (hwc->state & PERF_HES_ARCH) | ||
565 | continue; | ||
566 | |||
567 | uncore_pmu_event_start(event, 0); | ||
568 | } | ||
569 | box->n_events = n; | ||
570 | |||
571 | return 0; | ||
572 | } | ||
573 | |||
574 | static void uncore_pmu_event_del(struct perf_event *event, int flags) | ||
575 | { | ||
576 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
577 | int i; | ||
578 | |||
579 | uncore_pmu_event_stop(event, PERF_EF_UPDATE); | ||
580 | |||
581 | for (i = 0; i < box->n_events; i++) { | ||
582 | if (event == box->event_list[i]) { | ||
583 | uncore_put_event_constraint(box, event); | ||
584 | |||
585 | while (++i < box->n_events) | ||
586 | box->event_list[i - 1] = box->event_list[i]; | ||
587 | |||
588 | --box->n_events; | ||
589 | break; | ||
590 | } | ||
591 | } | ||
592 | |||
593 | event->hw.idx = -1; | ||
594 | event->hw.last_tag = ~0ULL; | ||
595 | } | ||
596 | |||
597 | void uncore_pmu_event_read(struct perf_event *event) | ||
598 | { | ||
599 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
600 | uncore_perf_event_update(box, event); | ||
601 | } | ||
602 | |||
603 | /* | ||
604 | * validation ensures the group can be loaded onto the | ||
605 | * PMU if it was the only group available. | ||
606 | */ | ||
607 | static int uncore_validate_group(struct intel_uncore_pmu *pmu, | ||
608 | struct perf_event *event) | ||
609 | { | ||
610 | struct perf_event *leader = event->group_leader; | ||
611 | struct intel_uncore_box *fake_box; | ||
612 | int ret = -EINVAL, n; | ||
613 | |||
614 | fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); | ||
615 | if (!fake_box) | ||
616 | return -ENOMEM; | ||
617 | |||
618 | fake_box->pmu = pmu; | ||
619 | /* | ||
620 | * the event is not yet connected with its | ||
621 | * siblings therefore we must first collect | ||
622 | * existing siblings, then add the new event | ||
623 | * before we can simulate the scheduling | ||
624 | */ | ||
625 | n = uncore_collect_events(fake_box, leader, true); | ||
626 | if (n < 0) | ||
627 | goto out; | ||
628 | |||
629 | fake_box->n_events = n; | ||
630 | n = uncore_collect_events(fake_box, event, false); | ||
631 | if (n < 0) | ||
632 | goto out; | ||
633 | |||
634 | fake_box->n_events = n; | ||
635 | |||
636 | ret = uncore_assign_events(fake_box, NULL, n); | ||
637 | out: | ||
638 | kfree(fake_box); | ||
639 | return ret; | ||
640 | } | ||
641 | |||
642 | static int uncore_pmu_event_init(struct perf_event *event) | ||
643 | { | ||
644 | struct intel_uncore_pmu *pmu; | ||
645 | struct intel_uncore_box *box; | ||
646 | struct hw_perf_event *hwc = &event->hw; | ||
647 | int ret; | ||
648 | |||
649 | if (event->attr.type != event->pmu->type) | ||
650 | return -ENOENT; | ||
651 | |||
652 | pmu = uncore_event_to_pmu(event); | ||
653 | /* no device found for this pmu */ | ||
654 | if (pmu->func_id < 0) | ||
655 | return -ENOENT; | ||
656 | |||
657 | /* | ||
658 | * Uncore PMU does measure at all privilege level all the time. | ||
659 | * So it doesn't make sense to specify any exclude bits. | ||
660 | */ | ||
661 | if (event->attr.exclude_user || event->attr.exclude_kernel || | ||
662 | event->attr.exclude_hv || event->attr.exclude_idle) | ||
663 | return -EINVAL; | ||
664 | |||
665 | /* Sampling not supported yet */ | ||
666 | if (hwc->sample_period) | ||
667 | return -EINVAL; | ||
668 | |||
669 | /* | ||
670 | * Place all uncore events for a particular physical package | ||
671 | * onto a single cpu | ||
672 | */ | ||
673 | if (event->cpu < 0) | ||
674 | return -EINVAL; | ||
675 | box = uncore_pmu_to_box(pmu, event->cpu); | ||
676 | if (!box || box->cpu < 0) | ||
677 | return -EINVAL; | ||
678 | event->cpu = box->cpu; | ||
679 | |||
680 | event->hw.idx = -1; | ||
681 | event->hw.last_tag = ~0ULL; | ||
682 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
683 | event->hw.branch_reg.idx = EXTRA_REG_NONE; | ||
684 | |||
685 | if (event->attr.config == UNCORE_FIXED_EVENT) { | ||
686 | /* no fixed counter */ | ||
687 | if (!pmu->type->fixed_ctl) | ||
688 | return -EINVAL; | ||
689 | /* | ||
690 | * if there is only one fixed counter, only the first pmu | ||
691 | * can access the fixed counter | ||
692 | */ | ||
693 | if (pmu->type->single_fixed && pmu->pmu_idx > 0) | ||
694 | return -EINVAL; | ||
695 | |||
696 | /* fixed counters have event field hardcoded to zero */ | ||
697 | hwc->config = 0ULL; | ||
698 | } else { | ||
699 | hwc->config = event->attr.config & pmu->type->event_mask; | ||
700 | if (pmu->type->ops->hw_config) { | ||
701 | ret = pmu->type->ops->hw_config(box, event); | ||
702 | if (ret) | ||
703 | return ret; | ||
704 | } | ||
705 | } | ||
706 | |||
707 | if (event->group_leader != event) | ||
708 | ret = uncore_validate_group(pmu, event); | ||
709 | else | ||
710 | ret = 0; | ||
711 | |||
712 | return ret; | ||
713 | } | ||
714 | |||
715 | static ssize_t uncore_get_attr_cpumask(struct device *dev, | ||
716 | struct device_attribute *attr, char *buf) | ||
717 | { | ||
718 | return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask); | ||
719 | } | ||
720 | |||
721 | static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); | ||
722 | |||
723 | static struct attribute *uncore_pmu_attrs[] = { | ||
724 | &dev_attr_cpumask.attr, | ||
725 | NULL, | ||
726 | }; | ||
727 | |||
728 | static struct attribute_group uncore_pmu_attr_group = { | ||
729 | .attrs = uncore_pmu_attrs, | ||
730 | }; | ||
731 | |||
732 | static int uncore_pmu_register(struct intel_uncore_pmu *pmu) | ||
733 | { | ||
734 | int ret; | ||
735 | |||
736 | if (!pmu->type->pmu) { | ||
737 | pmu->pmu = (struct pmu) { | ||
738 | .attr_groups = pmu->type->attr_groups, | ||
739 | .task_ctx_nr = perf_invalid_context, | ||
740 | .event_init = uncore_pmu_event_init, | ||
741 | .add = uncore_pmu_event_add, | ||
742 | .del = uncore_pmu_event_del, | ||
743 | .start = uncore_pmu_event_start, | ||
744 | .stop = uncore_pmu_event_stop, | ||
745 | .read = uncore_pmu_event_read, | ||
746 | }; | ||
747 | } else { | ||
748 | pmu->pmu = *pmu->type->pmu; | ||
749 | pmu->pmu.attr_groups = pmu->type->attr_groups; | ||
750 | } | ||
751 | |||
752 | if (pmu->type->num_boxes == 1) { | ||
753 | if (strlen(pmu->type->name) > 0) | ||
754 | sprintf(pmu->name, "uncore_%s", pmu->type->name); | ||
755 | else | ||
756 | sprintf(pmu->name, "uncore"); | ||
757 | } else { | ||
758 | sprintf(pmu->name, "uncore_%s_%d", pmu->type->name, | ||
759 | pmu->pmu_idx); | ||
760 | } | ||
761 | |||
762 | ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); | ||
763 | return ret; | ||
764 | } | ||
765 | |||
766 | static void __init uncore_type_exit(struct intel_uncore_type *type) | ||
767 | { | ||
768 | int i; | ||
769 | |||
770 | for (i = 0; i < type->num_boxes; i++) | ||
771 | free_percpu(type->pmus[i].box); | ||
772 | kfree(type->pmus); | ||
773 | type->pmus = NULL; | ||
774 | kfree(type->events_group); | ||
775 | type->events_group = NULL; | ||
776 | } | ||
777 | |||
778 | static void __init uncore_types_exit(struct intel_uncore_type **types) | ||
779 | { | ||
780 | int i; | ||
781 | for (i = 0; types[i]; i++) | ||
782 | uncore_type_exit(types[i]); | ||
783 | } | ||
784 | |||
785 | static int __init uncore_type_init(struct intel_uncore_type *type) | ||
786 | { | ||
787 | struct intel_uncore_pmu *pmus; | ||
788 | struct attribute_group *attr_group; | ||
789 | struct attribute **attrs; | ||
790 | int i, j; | ||
791 | |||
792 | pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL); | ||
793 | if (!pmus) | ||
794 | return -ENOMEM; | ||
795 | |||
796 | type->pmus = pmus; | ||
797 | |||
798 | type->unconstrainted = (struct event_constraint) | ||
799 | __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, | ||
800 | 0, type->num_counters, 0, 0); | ||
801 | |||
802 | for (i = 0; i < type->num_boxes; i++) { | ||
803 | pmus[i].func_id = -1; | ||
804 | pmus[i].pmu_idx = i; | ||
805 | pmus[i].type = type; | ||
806 | INIT_LIST_HEAD(&pmus[i].box_list); | ||
807 | pmus[i].box = alloc_percpu(struct intel_uncore_box *); | ||
808 | if (!pmus[i].box) | ||
809 | goto fail; | ||
810 | } | ||
811 | |||
812 | if (type->event_descs) { | ||
813 | i = 0; | ||
814 | while (type->event_descs[i].attr.attr.name) | ||
815 | i++; | ||
816 | |||
817 | attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) + | ||
818 | sizeof(*attr_group), GFP_KERNEL); | ||
819 | if (!attr_group) | ||
820 | goto fail; | ||
821 | |||
822 | attrs = (struct attribute **)(attr_group + 1); | ||
823 | attr_group->name = "events"; | ||
824 | attr_group->attrs = attrs; | ||
825 | |||
826 | for (j = 0; j < i; j++) | ||
827 | attrs[j] = &type->event_descs[j].attr.attr; | ||
828 | |||
829 | type->events_group = attr_group; | ||
830 | } | ||
831 | |||
832 | type->pmu_group = &uncore_pmu_attr_group; | ||
833 | return 0; | ||
834 | fail: | ||
835 | uncore_type_exit(type); | ||
836 | return -ENOMEM; | ||
837 | } | ||
838 | |||
839 | static int __init uncore_types_init(struct intel_uncore_type **types) | ||
840 | { | ||
841 | int i, ret; | ||
842 | |||
843 | for (i = 0; types[i]; i++) { | ||
844 | ret = uncore_type_init(types[i]); | ||
845 | if (ret) | ||
846 | goto fail; | ||
847 | } | ||
848 | return 0; | ||
849 | fail: | ||
850 | while (--i >= 0) | ||
851 | uncore_type_exit(types[i]); | ||
852 | return ret; | ||
853 | } | ||
854 | |||
855 | /* | ||
856 | * add a pci uncore device | ||
857 | */ | ||
858 | static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) | ||
859 | { | ||
860 | struct intel_uncore_pmu *pmu; | ||
861 | struct intel_uncore_box *box; | ||
862 | struct intel_uncore_type *type; | ||
863 | int phys_id; | ||
864 | bool first_box = false; | ||
865 | |||
866 | phys_id = uncore_pcibus_to_physid(pdev->bus); | ||
867 | if (phys_id < 0) | ||
868 | return -ENODEV; | ||
869 | |||
870 | if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { | ||
871 | int idx = UNCORE_PCI_DEV_IDX(id->driver_data); | ||
872 | uncore_extra_pci_dev[phys_id][idx] = pdev; | ||
873 | pci_set_drvdata(pdev, NULL); | ||
874 | return 0; | ||
875 | } | ||
876 | |||
877 | type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; | ||
878 | box = uncore_alloc_box(type, NUMA_NO_NODE); | ||
879 | if (!box) | ||
880 | return -ENOMEM; | ||
881 | |||
882 | /* | ||
883 | * for performance monitoring unit with multiple boxes, | ||
884 | * each box has a different function id. | ||
885 | */ | ||
886 | pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; | ||
887 | /* Knights Landing uses a common PCI device ID for multiple instances of | ||
888 | * an uncore PMU device type. There is only one entry per device type in | ||
889 | * the knl_uncore_pci_ids table inspite of multiple devices present for | ||
890 | * some device types. Hence PCI device idx would be 0 for all devices. | ||
891 | * So increment pmu pointer to point to an unused array element. | ||
892 | */ | ||
893 | if (boot_cpu_data.x86_model == 87) | ||
894 | while (pmu->func_id >= 0) | ||
895 | pmu++; | ||
896 | if (pmu->func_id < 0) | ||
897 | pmu->func_id = pdev->devfn; | ||
898 | else | ||
899 | WARN_ON_ONCE(pmu->func_id != pdev->devfn); | ||
900 | |||
901 | box->phys_id = phys_id; | ||
902 | box->pci_dev = pdev; | ||
903 | box->pmu = pmu; | ||
904 | uncore_box_init(box); | ||
905 | pci_set_drvdata(pdev, box); | ||
906 | |||
907 | raw_spin_lock(&uncore_box_lock); | ||
908 | if (list_empty(&pmu->box_list)) | ||
909 | first_box = true; | ||
910 | list_add_tail(&box->list, &pmu->box_list); | ||
911 | raw_spin_unlock(&uncore_box_lock); | ||
912 | |||
913 | if (first_box) | ||
914 | uncore_pmu_register(pmu); | ||
915 | return 0; | ||
916 | } | ||
917 | |||
918 | static void uncore_pci_remove(struct pci_dev *pdev) | ||
919 | { | ||
920 | struct intel_uncore_box *box = pci_get_drvdata(pdev); | ||
921 | struct intel_uncore_pmu *pmu; | ||
922 | int i, cpu, phys_id; | ||
923 | bool last_box = false; | ||
924 | |||
925 | phys_id = uncore_pcibus_to_physid(pdev->bus); | ||
926 | box = pci_get_drvdata(pdev); | ||
927 | if (!box) { | ||
928 | for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { | ||
929 | if (uncore_extra_pci_dev[phys_id][i] == pdev) { | ||
930 | uncore_extra_pci_dev[phys_id][i] = NULL; | ||
931 | break; | ||
932 | } | ||
933 | } | ||
934 | WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX); | ||
935 | return; | ||
936 | } | ||
937 | |||
938 | pmu = box->pmu; | ||
939 | if (WARN_ON_ONCE(phys_id != box->phys_id)) | ||
940 | return; | ||
941 | |||
942 | pci_set_drvdata(pdev, NULL); | ||
943 | |||
944 | raw_spin_lock(&uncore_box_lock); | ||
945 | list_del(&box->list); | ||
946 | if (list_empty(&pmu->box_list)) | ||
947 | last_box = true; | ||
948 | raw_spin_unlock(&uncore_box_lock); | ||
949 | |||
950 | for_each_possible_cpu(cpu) { | ||
951 | if (*per_cpu_ptr(pmu->box, cpu) == box) { | ||
952 | *per_cpu_ptr(pmu->box, cpu) = NULL; | ||
953 | atomic_dec(&box->refcnt); | ||
954 | } | ||
955 | } | ||
956 | |||
957 | WARN_ON_ONCE(atomic_read(&box->refcnt) != 1); | ||
958 | kfree(box); | ||
959 | |||
960 | if (last_box) | ||
961 | perf_pmu_unregister(&pmu->pmu); | ||
962 | } | ||
963 | |||
964 | static int __init uncore_pci_init(void) | ||
965 | { | ||
966 | int ret; | ||
967 | |||
968 | switch (boot_cpu_data.x86_model) { | ||
969 | case 45: /* Sandy Bridge-EP */ | ||
970 | ret = snbep_uncore_pci_init(); | ||
971 | break; | ||
972 | case 62: /* Ivy Bridge-EP */ | ||
973 | ret = ivbep_uncore_pci_init(); | ||
974 | break; | ||
975 | case 63: /* Haswell-EP */ | ||
976 | ret = hswep_uncore_pci_init(); | ||
977 | break; | ||
978 | case 79: /* BDX-EP */ | ||
979 | case 86: /* BDX-DE */ | ||
980 | ret = bdx_uncore_pci_init(); | ||
981 | break; | ||
982 | case 42: /* Sandy Bridge */ | ||
983 | ret = snb_uncore_pci_init(); | ||
984 | break; | ||
985 | case 58: /* Ivy Bridge */ | ||
986 | ret = ivb_uncore_pci_init(); | ||
987 | break; | ||
988 | case 60: /* Haswell */ | ||
989 | case 69: /* Haswell Celeron */ | ||
990 | ret = hsw_uncore_pci_init(); | ||
991 | break; | ||
992 | case 61: /* Broadwell */ | ||
993 | ret = bdw_uncore_pci_init(); | ||
994 | break; | ||
995 | case 87: /* Knights Landing */ | ||
996 | ret = knl_uncore_pci_init(); | ||
997 | break; | ||
998 | case 94: /* SkyLake */ | ||
999 | ret = skl_uncore_pci_init(); | ||
1000 | break; | ||
1001 | default: | ||
1002 | return 0; | ||
1003 | } | ||
1004 | |||
1005 | if (ret) | ||
1006 | return ret; | ||
1007 | |||
1008 | ret = uncore_types_init(uncore_pci_uncores); | ||
1009 | if (ret) | ||
1010 | return ret; | ||
1011 | |||
1012 | uncore_pci_driver->probe = uncore_pci_probe; | ||
1013 | uncore_pci_driver->remove = uncore_pci_remove; | ||
1014 | |||
1015 | ret = pci_register_driver(uncore_pci_driver); | ||
1016 | if (ret == 0) | ||
1017 | pcidrv_registered = true; | ||
1018 | else | ||
1019 | uncore_types_exit(uncore_pci_uncores); | ||
1020 | |||
1021 | return ret; | ||
1022 | } | ||
1023 | |||
1024 | static void __init uncore_pci_exit(void) | ||
1025 | { | ||
1026 | if (pcidrv_registered) { | ||
1027 | pcidrv_registered = false; | ||
1028 | pci_unregister_driver(uncore_pci_driver); | ||
1029 | uncore_types_exit(uncore_pci_uncores); | ||
1030 | } | ||
1031 | } | ||
1032 | |||
1033 | /* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */ | ||
1034 | static LIST_HEAD(boxes_to_free); | ||
1035 | |||
1036 | static void uncore_kfree_boxes(void) | ||
1037 | { | ||
1038 | struct intel_uncore_box *box; | ||
1039 | |||
1040 | while (!list_empty(&boxes_to_free)) { | ||
1041 | box = list_entry(boxes_to_free.next, | ||
1042 | struct intel_uncore_box, list); | ||
1043 | list_del(&box->list); | ||
1044 | kfree(box); | ||
1045 | } | ||
1046 | } | ||
1047 | |||
1048 | static void uncore_cpu_dying(int cpu) | ||
1049 | { | ||
1050 | struct intel_uncore_type *type; | ||
1051 | struct intel_uncore_pmu *pmu; | ||
1052 | struct intel_uncore_box *box; | ||
1053 | int i, j; | ||
1054 | |||
1055 | for (i = 0; uncore_msr_uncores[i]; i++) { | ||
1056 | type = uncore_msr_uncores[i]; | ||
1057 | for (j = 0; j < type->num_boxes; j++) { | ||
1058 | pmu = &type->pmus[j]; | ||
1059 | box = *per_cpu_ptr(pmu->box, cpu); | ||
1060 | *per_cpu_ptr(pmu->box, cpu) = NULL; | ||
1061 | if (box && atomic_dec_and_test(&box->refcnt)) | ||
1062 | list_add(&box->list, &boxes_to_free); | ||
1063 | } | ||
1064 | } | ||
1065 | } | ||
1066 | |||
1067 | static int uncore_cpu_starting(int cpu) | ||
1068 | { | ||
1069 | struct intel_uncore_type *type; | ||
1070 | struct intel_uncore_pmu *pmu; | ||
1071 | struct intel_uncore_box *box, *exist; | ||
1072 | int i, j, k, phys_id; | ||
1073 | |||
1074 | phys_id = topology_physical_package_id(cpu); | ||
1075 | |||
1076 | for (i = 0; uncore_msr_uncores[i]; i++) { | ||
1077 | type = uncore_msr_uncores[i]; | ||
1078 | for (j = 0; j < type->num_boxes; j++) { | ||
1079 | pmu = &type->pmus[j]; | ||
1080 | box = *per_cpu_ptr(pmu->box, cpu); | ||
1081 | /* called by uncore_cpu_init? */ | ||
1082 | if (box && box->phys_id >= 0) { | ||
1083 | uncore_box_init(box); | ||
1084 | continue; | ||
1085 | } | ||
1086 | |||
1087 | for_each_online_cpu(k) { | ||
1088 | exist = *per_cpu_ptr(pmu->box, k); | ||
1089 | if (exist && exist->phys_id == phys_id) { | ||
1090 | atomic_inc(&exist->refcnt); | ||
1091 | *per_cpu_ptr(pmu->box, cpu) = exist; | ||
1092 | if (box) { | ||
1093 | list_add(&box->list, | ||
1094 | &boxes_to_free); | ||
1095 | box = NULL; | ||
1096 | } | ||
1097 | break; | ||
1098 | } | ||
1099 | } | ||
1100 | |||
1101 | if (box) { | ||
1102 | box->phys_id = phys_id; | ||
1103 | uncore_box_init(box); | ||
1104 | } | ||
1105 | } | ||
1106 | } | ||
1107 | return 0; | ||
1108 | } | ||
1109 | |||
1110 | static int uncore_cpu_prepare(int cpu, int phys_id) | ||
1111 | { | ||
1112 | struct intel_uncore_type *type; | ||
1113 | struct intel_uncore_pmu *pmu; | ||
1114 | struct intel_uncore_box *box; | ||
1115 | int i, j; | ||
1116 | |||
1117 | for (i = 0; uncore_msr_uncores[i]; i++) { | ||
1118 | type = uncore_msr_uncores[i]; | ||
1119 | for (j = 0; j < type->num_boxes; j++) { | ||
1120 | pmu = &type->pmus[j]; | ||
1121 | if (pmu->func_id < 0) | ||
1122 | pmu->func_id = j; | ||
1123 | |||
1124 | box = uncore_alloc_box(type, cpu_to_node(cpu)); | ||
1125 | if (!box) | ||
1126 | return -ENOMEM; | ||
1127 | |||
1128 | box->pmu = pmu; | ||
1129 | box->phys_id = phys_id; | ||
1130 | *per_cpu_ptr(pmu->box, cpu) = box; | ||
1131 | } | ||
1132 | } | ||
1133 | return 0; | ||
1134 | } | ||
1135 | |||
1136 | static void | ||
1137 | uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu) | ||
1138 | { | ||
1139 | struct intel_uncore_type *type; | ||
1140 | struct intel_uncore_pmu *pmu; | ||
1141 | struct intel_uncore_box *box; | ||
1142 | int i, j; | ||
1143 | |||
1144 | for (i = 0; uncores[i]; i++) { | ||
1145 | type = uncores[i]; | ||
1146 | for (j = 0; j < type->num_boxes; j++) { | ||
1147 | pmu = &type->pmus[j]; | ||
1148 | if (old_cpu < 0) | ||
1149 | box = uncore_pmu_to_box(pmu, new_cpu); | ||
1150 | else | ||
1151 | box = uncore_pmu_to_box(pmu, old_cpu); | ||
1152 | if (!box) | ||
1153 | continue; | ||
1154 | |||
1155 | if (old_cpu < 0) { | ||
1156 | WARN_ON_ONCE(box->cpu != -1); | ||
1157 | box->cpu = new_cpu; | ||
1158 | continue; | ||
1159 | } | ||
1160 | |||
1161 | WARN_ON_ONCE(box->cpu != old_cpu); | ||
1162 | if (new_cpu >= 0) { | ||
1163 | uncore_pmu_cancel_hrtimer(box); | ||
1164 | perf_pmu_migrate_context(&pmu->pmu, | ||
1165 | old_cpu, new_cpu); | ||
1166 | box->cpu = new_cpu; | ||
1167 | } else { | ||
1168 | box->cpu = -1; | ||
1169 | } | ||
1170 | } | ||
1171 | } | ||
1172 | } | ||
1173 | |||
1174 | static void uncore_event_exit_cpu(int cpu) | ||
1175 | { | ||
1176 | int i, phys_id, target; | ||
1177 | |||
1178 | /* if exiting cpu is used for collecting uncore events */ | ||
1179 | if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) | ||
1180 | return; | ||
1181 | |||
1182 | /* find a new cpu to collect uncore events */ | ||
1183 | phys_id = topology_physical_package_id(cpu); | ||
1184 | target = -1; | ||
1185 | for_each_online_cpu(i) { | ||
1186 | if (i == cpu) | ||
1187 | continue; | ||
1188 | if (phys_id == topology_physical_package_id(i)) { | ||
1189 | target = i; | ||
1190 | break; | ||
1191 | } | ||
1192 | } | ||
1193 | |||
1194 | /* migrate uncore events to the new cpu */ | ||
1195 | if (target >= 0) | ||
1196 | cpumask_set_cpu(target, &uncore_cpu_mask); | ||
1197 | |||
1198 | uncore_change_context(uncore_msr_uncores, cpu, target); | ||
1199 | uncore_change_context(uncore_pci_uncores, cpu, target); | ||
1200 | } | ||
1201 | |||
1202 | static void uncore_event_init_cpu(int cpu) | ||
1203 | { | ||
1204 | int i, phys_id; | ||
1205 | |||
1206 | phys_id = topology_physical_package_id(cpu); | ||
1207 | for_each_cpu(i, &uncore_cpu_mask) { | ||
1208 | if (phys_id == topology_physical_package_id(i)) | ||
1209 | return; | ||
1210 | } | ||
1211 | |||
1212 | cpumask_set_cpu(cpu, &uncore_cpu_mask); | ||
1213 | |||
1214 | uncore_change_context(uncore_msr_uncores, -1, cpu); | ||
1215 | uncore_change_context(uncore_pci_uncores, -1, cpu); | ||
1216 | } | ||
1217 | |||
1218 | static int uncore_cpu_notifier(struct notifier_block *self, | ||
1219 | unsigned long action, void *hcpu) | ||
1220 | { | ||
1221 | unsigned int cpu = (long)hcpu; | ||
1222 | |||
1223 | /* allocate/free data structure for uncore box */ | ||
1224 | switch (action & ~CPU_TASKS_FROZEN) { | ||
1225 | case CPU_UP_PREPARE: | ||
1226 | uncore_cpu_prepare(cpu, -1); | ||
1227 | break; | ||
1228 | case CPU_STARTING: | ||
1229 | uncore_cpu_starting(cpu); | ||
1230 | break; | ||
1231 | case CPU_UP_CANCELED: | ||
1232 | case CPU_DYING: | ||
1233 | uncore_cpu_dying(cpu); | ||
1234 | break; | ||
1235 | case CPU_ONLINE: | ||
1236 | case CPU_DEAD: | ||
1237 | uncore_kfree_boxes(); | ||
1238 | break; | ||
1239 | default: | ||
1240 | break; | ||
1241 | } | ||
1242 | |||
1243 | /* select the cpu that collects uncore events */ | ||
1244 | switch (action & ~CPU_TASKS_FROZEN) { | ||
1245 | case CPU_DOWN_FAILED: | ||
1246 | case CPU_STARTING: | ||
1247 | uncore_event_init_cpu(cpu); | ||
1248 | break; | ||
1249 | case CPU_DOWN_PREPARE: | ||
1250 | uncore_event_exit_cpu(cpu); | ||
1251 | break; | ||
1252 | default: | ||
1253 | break; | ||
1254 | } | ||
1255 | |||
1256 | return NOTIFY_OK; | ||
1257 | } | ||
1258 | |||
1259 | static struct notifier_block uncore_cpu_nb = { | ||
1260 | .notifier_call = uncore_cpu_notifier, | ||
1261 | /* | ||
1262 | * to migrate uncore events, our notifier should be executed | ||
1263 | * before perf core's notifier. | ||
1264 | */ | ||
1265 | .priority = CPU_PRI_PERF + 1, | ||
1266 | }; | ||
1267 | |||
1268 | static void __init uncore_cpu_setup(void *dummy) | ||
1269 | { | ||
1270 | uncore_cpu_starting(smp_processor_id()); | ||
1271 | } | ||
1272 | |||
1273 | static int __init uncore_cpu_init(void) | ||
1274 | { | ||
1275 | int ret; | ||
1276 | |||
1277 | switch (boot_cpu_data.x86_model) { | ||
1278 | case 26: /* Nehalem */ | ||
1279 | case 30: | ||
1280 | case 37: /* Westmere */ | ||
1281 | case 44: | ||
1282 | nhm_uncore_cpu_init(); | ||
1283 | break; | ||
1284 | case 42: /* Sandy Bridge */ | ||
1285 | case 58: /* Ivy Bridge */ | ||
1286 | case 60: /* Haswell */ | ||
1287 | case 69: /* Haswell */ | ||
1288 | case 70: /* Haswell */ | ||
1289 | case 61: /* Broadwell */ | ||
1290 | case 71: /* Broadwell */ | ||
1291 | snb_uncore_cpu_init(); | ||
1292 | break; | ||
1293 | case 45: /* Sandy Bridge-EP */ | ||
1294 | snbep_uncore_cpu_init(); | ||
1295 | break; | ||
1296 | case 46: /* Nehalem-EX */ | ||
1297 | case 47: /* Westmere-EX aka. Xeon E7 */ | ||
1298 | nhmex_uncore_cpu_init(); | ||
1299 | break; | ||
1300 | case 62: /* Ivy Bridge-EP */ | ||
1301 | ivbep_uncore_cpu_init(); | ||
1302 | break; | ||
1303 | case 63: /* Haswell-EP */ | ||
1304 | hswep_uncore_cpu_init(); | ||
1305 | break; | ||
1306 | case 79: /* BDX-EP */ | ||
1307 | case 86: /* BDX-DE */ | ||
1308 | bdx_uncore_cpu_init(); | ||
1309 | break; | ||
1310 | case 87: /* Knights Landing */ | ||
1311 | knl_uncore_cpu_init(); | ||
1312 | break; | ||
1313 | default: | ||
1314 | return 0; | ||
1315 | } | ||
1316 | |||
1317 | ret = uncore_types_init(uncore_msr_uncores); | ||
1318 | if (ret) | ||
1319 | return ret; | ||
1320 | |||
1321 | return 0; | ||
1322 | } | ||
1323 | |||
1324 | static int __init uncore_pmus_register(void) | ||
1325 | { | ||
1326 | struct intel_uncore_pmu *pmu; | ||
1327 | struct intel_uncore_type *type; | ||
1328 | int i, j; | ||
1329 | |||
1330 | for (i = 0; uncore_msr_uncores[i]; i++) { | ||
1331 | type = uncore_msr_uncores[i]; | ||
1332 | for (j = 0; j < type->num_boxes; j++) { | ||
1333 | pmu = &type->pmus[j]; | ||
1334 | uncore_pmu_register(pmu); | ||
1335 | } | ||
1336 | } | ||
1337 | |||
1338 | return 0; | ||
1339 | } | ||
1340 | |||
1341 | static void __init uncore_cpumask_init(void) | ||
1342 | { | ||
1343 | int cpu; | ||
1344 | |||
1345 | /* | ||
1346 | * ony invoke once from msr or pci init code | ||
1347 | */ | ||
1348 | if (!cpumask_empty(&uncore_cpu_mask)) | ||
1349 | return; | ||
1350 | |||
1351 | cpu_notifier_register_begin(); | ||
1352 | |||
1353 | for_each_online_cpu(cpu) { | ||
1354 | int i, phys_id = topology_physical_package_id(cpu); | ||
1355 | |||
1356 | for_each_cpu(i, &uncore_cpu_mask) { | ||
1357 | if (phys_id == topology_physical_package_id(i)) { | ||
1358 | phys_id = -1; | ||
1359 | break; | ||
1360 | } | ||
1361 | } | ||
1362 | if (phys_id < 0) | ||
1363 | continue; | ||
1364 | |||
1365 | uncore_cpu_prepare(cpu, phys_id); | ||
1366 | uncore_event_init_cpu(cpu); | ||
1367 | } | ||
1368 | on_each_cpu(uncore_cpu_setup, NULL, 1); | ||
1369 | |||
1370 | __register_cpu_notifier(&uncore_cpu_nb); | ||
1371 | |||
1372 | cpu_notifier_register_done(); | ||
1373 | } | ||
1374 | |||
1375 | |||
1376 | static int __init intel_uncore_init(void) | ||
1377 | { | ||
1378 | int ret; | ||
1379 | |||
1380 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | ||
1381 | return -ENODEV; | ||
1382 | |||
1383 | if (cpu_has_hypervisor) | ||
1384 | return -ENODEV; | ||
1385 | |||
1386 | ret = uncore_pci_init(); | ||
1387 | if (ret) | ||
1388 | goto fail; | ||
1389 | ret = uncore_cpu_init(); | ||
1390 | if (ret) { | ||
1391 | uncore_pci_exit(); | ||
1392 | goto fail; | ||
1393 | } | ||
1394 | uncore_cpumask_init(); | ||
1395 | |||
1396 | uncore_pmus_register(); | ||
1397 | return 0; | ||
1398 | fail: | ||
1399 | return ret; | ||
1400 | } | ||
1401 | device_initcall(intel_uncore_init); | ||