diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 11:02:58 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 11:02:58 -0500 |
commit | 35b740e4662ef386f0c60e1b60aaf5b44db9914c (patch) | |
tree | 502a8f9499bc1b4cb3300d666dab2d01a1921224 /arch/x86/kernel | |
parent | 423d091dfe58d3109d84c408810a7cfa82f6f184 (diff) | |
parent | 9e183426bfb52bb44bf3c443d6587e4d02478603 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (106 commits)
perf kvm: Fix copy & paste error in description
perf script: Kill script_spec__delete
perf top: Fix a memory leak
perf stat: Introduce get_ratio_color() helper
perf session: Remove impossible condition check
perf tools: Fix feature-bits rework fallout, remove unused variable
perf script: Add generic perl handler to process events
perf tools: Use for_each_set_bit() to iterate over feature flags
perf tools: Unify handling of features when writing feature section
perf report: Accept fifos as input file
perf tools: Moving code in some files
perf tools: Fix out-of-bound access to struct perf_session
perf tools: Continue processing header on unknown features
perf tools: Improve macros for struct feature_ops
perf: builtin-record: Document and check that mmap_pages must be a power of two.
perf: builtin-record: Provide advice if mmap'ing fails with EPERM.
perf tools: Fix truncated annotation
perf script: look up thread using tid instead of pid
perf tools: Look up thread names for system wide profiling
perf tools: Fix comm for processes with named threads
...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 262 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 51 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 88 | ||||
-rw-r--r-- | arch/x86/kernel/jump_label.c | 2 |
5 files changed, 316 insertions, 89 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bda212a001..5adce1040b1 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -484,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event) | |||
484 | return event->pmu == &pmu; | 484 | return event->pmu == &pmu; |
485 | } | 485 | } |
486 | 486 | ||
487 | /* | ||
488 | * Event scheduler state: | ||
489 | * | ||
490 | * Assign events iterating over all events and counters, beginning | ||
491 | * with events with least weights first. Keep the current iterator | ||
492 | * state in struct sched_state. | ||
493 | */ | ||
494 | struct sched_state { | ||
495 | int weight; | ||
496 | int event; /* event index */ | ||
497 | int counter; /* counter index */ | ||
498 | int unassigned; /* number of events to be assigned left */ | ||
499 | unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
500 | }; | ||
501 | |||
502 | /* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ | ||
503 | #define SCHED_STATES_MAX 2 | ||
504 | |||
505 | struct perf_sched { | ||
506 | int max_weight; | ||
507 | int max_events; | ||
508 | struct event_constraint **constraints; | ||
509 | struct sched_state state; | ||
510 | int saved_states; | ||
511 | struct sched_state saved[SCHED_STATES_MAX]; | ||
512 | }; | ||
513 | |||
514 | /* | ||
515 | * Initialize interator that runs through all events and counters. | ||
516 | */ | ||
517 | static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, | ||
518 | int num, int wmin, int wmax) | ||
519 | { | ||
520 | int idx; | ||
521 | |||
522 | memset(sched, 0, sizeof(*sched)); | ||
523 | sched->max_events = num; | ||
524 | sched->max_weight = wmax; | ||
525 | sched->constraints = c; | ||
526 | |||
527 | for (idx = 0; idx < num; idx++) { | ||
528 | if (c[idx]->weight == wmin) | ||
529 | break; | ||
530 | } | ||
531 | |||
532 | sched->state.event = idx; /* start with min weight */ | ||
533 | sched->state.weight = wmin; | ||
534 | sched->state.unassigned = num; | ||
535 | } | ||
536 | |||
537 | static void perf_sched_save_state(struct perf_sched *sched) | ||
538 | { | ||
539 | if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX)) | ||
540 | return; | ||
541 | |||
542 | sched->saved[sched->saved_states] = sched->state; | ||
543 | sched->saved_states++; | ||
544 | } | ||
545 | |||
546 | static bool perf_sched_restore_state(struct perf_sched *sched) | ||
547 | { | ||
548 | if (!sched->saved_states) | ||
549 | return false; | ||
550 | |||
551 | sched->saved_states--; | ||
552 | sched->state = sched->saved[sched->saved_states]; | ||
553 | |||
554 | /* continue with next counter: */ | ||
555 | clear_bit(sched->state.counter++, sched->state.used); | ||
556 | |||
557 | return true; | ||
558 | } | ||
559 | |||
560 | /* | ||
561 | * Select a counter for the current event to schedule. Return true on | ||
562 | * success. | ||
563 | */ | ||
564 | static bool __perf_sched_find_counter(struct perf_sched *sched) | ||
565 | { | ||
566 | struct event_constraint *c; | ||
567 | int idx; | ||
568 | |||
569 | if (!sched->state.unassigned) | ||
570 | return false; | ||
571 | |||
572 | if (sched->state.event >= sched->max_events) | ||
573 | return false; | ||
574 | |||
575 | c = sched->constraints[sched->state.event]; | ||
576 | |||
577 | /* Prefer fixed purpose counters */ | ||
578 | if (x86_pmu.num_counters_fixed) { | ||
579 | idx = X86_PMC_IDX_FIXED; | ||
580 | for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { | ||
581 | if (!__test_and_set_bit(idx, sched->state.used)) | ||
582 | goto done; | ||
583 | } | ||
584 | } | ||
585 | /* Grab the first unused counter starting with idx */ | ||
586 | idx = sched->state.counter; | ||
587 | for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { | ||
588 | if (!__test_and_set_bit(idx, sched->state.used)) | ||
589 | goto done; | ||
590 | } | ||
591 | |||
592 | return false; | ||
593 | |||
594 | done: | ||
595 | sched->state.counter = idx; | ||
596 | |||
597 | if (c->overlap) | ||
598 | perf_sched_save_state(sched); | ||
599 | |||
600 | return true; | ||
601 | } | ||
602 | |||
603 | static bool perf_sched_find_counter(struct perf_sched *sched) | ||
604 | { | ||
605 | while (!__perf_sched_find_counter(sched)) { | ||
606 | if (!perf_sched_restore_state(sched)) | ||
607 | return false; | ||
608 | } | ||
609 | |||
610 | return true; | ||
611 | } | ||
612 | |||
613 | /* | ||
614 | * Go through all unassigned events and find the next one to schedule. | ||
615 | * Take events with the least weight first. Return true on success. | ||
616 | */ | ||
617 | static bool perf_sched_next_event(struct perf_sched *sched) | ||
618 | { | ||
619 | struct event_constraint *c; | ||
620 | |||
621 | if (!sched->state.unassigned || !--sched->state.unassigned) | ||
622 | return false; | ||
623 | |||
624 | do { | ||
625 | /* next event */ | ||
626 | sched->state.event++; | ||
627 | if (sched->state.event >= sched->max_events) { | ||
628 | /* next weight */ | ||
629 | sched->state.event = 0; | ||
630 | sched->state.weight++; | ||
631 | if (sched->state.weight > sched->max_weight) | ||
632 | return false; | ||
633 | } | ||
634 | c = sched->constraints[sched->state.event]; | ||
635 | } while (c->weight != sched->state.weight); | ||
636 | |||
637 | sched->state.counter = 0; /* start with first counter */ | ||
638 | |||
639 | return true; | ||
640 | } | ||
641 | |||
642 | /* | ||
643 | * Assign a counter for each event. | ||
644 | */ | ||
645 | static int perf_assign_events(struct event_constraint **constraints, int n, | ||
646 | int wmin, int wmax, int *assign) | ||
647 | { | ||
648 | struct perf_sched sched; | ||
649 | |||
650 | perf_sched_init(&sched, constraints, n, wmin, wmax); | ||
651 | |||
652 | do { | ||
653 | if (!perf_sched_find_counter(&sched)) | ||
654 | break; /* failed */ | ||
655 | if (assign) | ||
656 | assign[sched.state.event] = sched.state.counter; | ||
657 | } while (perf_sched_next_event(&sched)); | ||
658 | |||
659 | return sched.state.unassigned; | ||
660 | } | ||
661 | |||
487 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | 662 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) |
488 | { | 663 | { |
489 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; | 664 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; |
490 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 665 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
491 | int i, j, w, wmax, num = 0; | 666 | int i, wmin, wmax, num = 0; |
492 | struct hw_perf_event *hwc; | 667 | struct hw_perf_event *hwc; |
493 | 668 | ||
494 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | 669 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
495 | 670 | ||
496 | for (i = 0; i < n; i++) { | 671 | for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { |
497 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); | 672 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); |
498 | constraints[i] = c; | 673 | constraints[i] = c; |
674 | wmin = min(wmin, c->weight); | ||
675 | wmax = max(wmax, c->weight); | ||
499 | } | 676 | } |
500 | 677 | ||
501 | /* | 678 | /* |
@@ -521,60 +698,12 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
521 | if (assign) | 698 | if (assign) |
522 | assign[i] = hwc->idx; | 699 | assign[i] = hwc->idx; |
523 | } | 700 | } |
524 | if (i == n) | ||
525 | goto done; | ||
526 | |||
527 | /* | ||
528 | * begin slow path | ||
529 | */ | ||
530 | |||
531 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
532 | 701 | ||
533 | /* | 702 | /* slow path */ |
534 | * weight = number of possible counters | 703 | if (i != n) |
535 | * | 704 | num = perf_assign_events(constraints, n, wmin, wmax, assign); |
536 | * 1 = most constrained, only works on one counter | ||
537 | * wmax = least constrained, works on any counter | ||
538 | * | ||
539 | * assign events to counters starting with most | ||
540 | * constrained events. | ||
541 | */ | ||
542 | wmax = x86_pmu.num_counters; | ||
543 | 705 | ||
544 | /* | 706 | /* |
545 | * when fixed event counters are present, | ||
546 | * wmax is incremented by 1 to account | ||
547 | * for one more choice | ||
548 | */ | ||
549 | if (x86_pmu.num_counters_fixed) | ||
550 | wmax++; | ||
551 | |||
552 | for (w = 1, num = n; num && w <= wmax; w++) { | ||
553 | /* for each event */ | ||
554 | for (i = 0; num && i < n; i++) { | ||
555 | c = constraints[i]; | ||
556 | hwc = &cpuc->event_list[i]->hw; | ||
557 | |||
558 | if (c->weight != w) | ||
559 | continue; | ||
560 | |||
561 | for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { | ||
562 | if (!test_bit(j, used_mask)) | ||
563 | break; | ||
564 | } | ||
565 | |||
566 | if (j == X86_PMC_IDX_MAX) | ||
567 | break; | ||
568 | |||
569 | __set_bit(j, used_mask); | ||
570 | |||
571 | if (assign) | ||
572 | assign[i] = j; | ||
573 | num--; | ||
574 | } | ||
575 | } | ||
576 | done: | ||
577 | /* | ||
578 | * scheduling failed or is just a simulation, | 707 | * scheduling failed or is just a simulation, |
579 | * free resources if necessary | 708 | * free resources if necessary |
580 | */ | 709 | */ |
@@ -1119,6 +1248,7 @@ static void __init pmu_check_apic(void) | |||
1119 | 1248 | ||
1120 | static int __init init_hw_perf_events(void) | 1249 | static int __init init_hw_perf_events(void) |
1121 | { | 1250 | { |
1251 | struct x86_pmu_quirk *quirk; | ||
1122 | struct event_constraint *c; | 1252 | struct event_constraint *c; |
1123 | int err; | 1253 | int err; |
1124 | 1254 | ||
@@ -1147,8 +1277,8 @@ static int __init init_hw_perf_events(void) | |||
1147 | 1277 | ||
1148 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1278 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1149 | 1279 | ||
1150 | if (x86_pmu.quirks) | 1280 | for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) |
1151 | x86_pmu.quirks(); | 1281 | quirk->func(); |
1152 | 1282 | ||
1153 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | 1283 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { |
1154 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | 1284 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
@@ -1171,12 +1301,18 @@ static int __init init_hw_perf_events(void) | |||
1171 | 1301 | ||
1172 | unconstrained = (struct event_constraint) | 1302 | unconstrained = (struct event_constraint) |
1173 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, | 1303 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
1174 | 0, x86_pmu.num_counters); | 1304 | 0, x86_pmu.num_counters, 0); |
1175 | 1305 | ||
1176 | if (x86_pmu.event_constraints) { | 1306 | if (x86_pmu.event_constraints) { |
1307 | /* | ||
1308 | * event on fixed counter2 (REF_CYCLES) only works on this | ||
1309 | * counter, so do not extend mask to generic counters | ||
1310 | */ | ||
1177 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1311 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
1178 | if (c->cmask != X86_RAW_EVENT_MASK) | 1312 | if (c->cmask != X86_RAW_EVENT_MASK |
1313 | || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) { | ||
1179 | continue; | 1314 | continue; |
1315 | } | ||
1180 | 1316 | ||
1181 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; | 1317 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; |
1182 | c->weight += x86_pmu.num_counters; | 1318 | c->weight += x86_pmu.num_counters; |
@@ -1566,3 +1702,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs) | |||
1566 | 1702 | ||
1567 | return misc; | 1703 | return misc; |
1568 | } | 1704 | } |
1705 | |||
1706 | void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) | ||
1707 | { | ||
1708 | cap->version = x86_pmu.version; | ||
1709 | cap->num_counters_gp = x86_pmu.num_counters; | ||
1710 | cap->num_counters_fixed = x86_pmu.num_counters_fixed; | ||
1711 | cap->bit_width_gp = x86_pmu.cntval_bits; | ||
1712 | cap->bit_width_fixed = x86_pmu.cntval_bits; | ||
1713 | cap->events_mask = (unsigned int)x86_pmu.events_maskl; | ||
1714 | cap->events_mask_len = x86_pmu.events_mask_len; | ||
1715 | } | ||
1716 | EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability); | ||
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index b9698d40ac4..8944062f46e 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -45,6 +45,7 @@ struct event_constraint { | |||
45 | u64 code; | 45 | u64 code; |
46 | u64 cmask; | 46 | u64 cmask; |
47 | int weight; | 47 | int weight; |
48 | int overlap; | ||
48 | }; | 49 | }; |
49 | 50 | ||
50 | struct amd_nb { | 51 | struct amd_nb { |
@@ -151,15 +152,40 @@ struct cpu_hw_events { | |||
151 | void *kfree_on_online; | 152 | void *kfree_on_online; |
152 | }; | 153 | }; |
153 | 154 | ||
154 | #define __EVENT_CONSTRAINT(c, n, m, w) {\ | 155 | #define __EVENT_CONSTRAINT(c, n, m, w, o) {\ |
155 | { .idxmsk64 = (n) }, \ | 156 | { .idxmsk64 = (n) }, \ |
156 | .code = (c), \ | 157 | .code = (c), \ |
157 | .cmask = (m), \ | 158 | .cmask = (m), \ |
158 | .weight = (w), \ | 159 | .weight = (w), \ |
160 | .overlap = (o), \ | ||
159 | } | 161 | } |
160 | 162 | ||
161 | #define EVENT_CONSTRAINT(c, n, m) \ | 163 | #define EVENT_CONSTRAINT(c, n, m) \ |
162 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 164 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) |
165 | |||
166 | /* | ||
167 | * The overlap flag marks event constraints with overlapping counter | ||
168 | * masks. This is the case if the counter mask of such an event is not | ||
169 | * a subset of any other counter mask of a constraint with an equal or | ||
170 | * higher weight, e.g.: | ||
171 | * | ||
172 | * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); | ||
173 | * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0); | ||
174 | * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0); | ||
175 | * | ||
176 | * The event scheduler may not select the correct counter in the first | ||
177 | * cycle because it needs to know which subsequent events will be | ||
178 | * scheduled. It may fail to schedule the events then. So we set the | ||
179 | * overlap flag for such constraints to give the scheduler a hint which | ||
180 | * events to select for counter rescheduling. | ||
181 | * | ||
182 | * Care must be taken as the rescheduling algorithm is O(n!) which | ||
183 | * will increase scheduling cycles for an over-commited system | ||
184 | * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros | ||
185 | * and its counter masks must be kept at a minimum. | ||
186 | */ | ||
187 | #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ | ||
188 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) | ||
163 | 189 | ||
164 | /* | 190 | /* |
165 | * Constraint on the Event code. | 191 | * Constraint on the Event code. |
@@ -235,6 +261,11 @@ union perf_capabilities { | |||
235 | u64 capabilities; | 261 | u64 capabilities; |
236 | }; | 262 | }; |
237 | 263 | ||
264 | struct x86_pmu_quirk { | ||
265 | struct x86_pmu_quirk *next; | ||
266 | void (*func)(void); | ||
267 | }; | ||
268 | |||
238 | /* | 269 | /* |
239 | * struct x86_pmu - generic x86 pmu | 270 | * struct x86_pmu - generic x86 pmu |
240 | */ | 271 | */ |
@@ -259,6 +290,11 @@ struct x86_pmu { | |||
259 | int num_counters_fixed; | 290 | int num_counters_fixed; |
260 | int cntval_bits; | 291 | int cntval_bits; |
261 | u64 cntval_mask; | 292 | u64 cntval_mask; |
293 | union { | ||
294 | unsigned long events_maskl; | ||
295 | unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)]; | ||
296 | }; | ||
297 | int events_mask_len; | ||
262 | int apic; | 298 | int apic; |
263 | u64 max_period; | 299 | u64 max_period; |
264 | struct event_constraint * | 300 | struct event_constraint * |
@@ -268,7 +304,7 @@ struct x86_pmu { | |||
268 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 304 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
269 | struct perf_event *event); | 305 | struct perf_event *event); |
270 | struct event_constraint *event_constraints; | 306 | struct event_constraint *event_constraints; |
271 | void (*quirks)(void); | 307 | struct x86_pmu_quirk *quirks; |
272 | int perfctr_second_write; | 308 | int perfctr_second_write; |
273 | 309 | ||
274 | int (*cpu_prepare)(int cpu); | 310 | int (*cpu_prepare)(int cpu); |
@@ -309,6 +345,15 @@ struct x86_pmu { | |||
309 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); | 345 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); |
310 | }; | 346 | }; |
311 | 347 | ||
348 | #define x86_add_quirk(func_) \ | ||
349 | do { \ | ||
350 | static struct x86_pmu_quirk __quirk __initdata = { \ | ||
351 | .func = func_, \ | ||
352 | }; \ | ||
353 | __quirk.next = x86_pmu.quirks; \ | ||
354 | x86_pmu.quirks = &__quirk; \ | ||
355 | } while (0) | ||
356 | |||
312 | #define ERF_NO_HT_SHARING 1 | 357 | #define ERF_NO_HT_SHARING 1 |
313 | #define ERF_HAS_RSP_1 2 | 358 | #define ERF_HAS_RSP_1 2 |
314 | 359 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index aeefd45697a..0397b23be8e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
492 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); | 492 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); |
493 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); | 493 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); |
494 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); | 494 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); |
495 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); | 495 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); |
496 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); | 496 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); |
497 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); | 497 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); |
498 | 498 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 121f1be4da1..3bd37bdf1b8 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -28,6 +28,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = | |||
28 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | 28 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, |
29 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | 29 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, |
30 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | 30 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
31 | [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ | ||
31 | }; | 32 | }; |
32 | 33 | ||
33 | static struct event_constraint intel_core_event_constraints[] __read_mostly = | 34 | static struct event_constraint intel_core_event_constraints[] __read_mostly = |
@@ -45,12 +46,7 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly = | |||
45 | { | 46 | { |
46 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 47 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
47 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 48 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
48 | /* | 49 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
49 | * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event | ||
50 | * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed | ||
51 | * ratio between these counters. | ||
52 | */ | ||
53 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
54 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | 50 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ |
55 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | 51 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ |
56 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | 52 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ |
@@ -68,7 +64,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = | |||
68 | { | 64 | { |
69 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 65 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
70 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 66 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
71 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 67 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
72 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | 68 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ |
73 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | 69 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ |
74 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | 70 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ |
@@ -90,7 +86,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly | |||
90 | { | 86 | { |
91 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 87 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
92 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 88 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
93 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 89 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
94 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | 90 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ |
95 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ | 91 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ |
96 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | 92 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ |
@@ -102,7 +98,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = | |||
102 | { | 98 | { |
103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 99 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
104 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 100 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
105 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 101 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
106 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ | 102 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ |
107 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ | 103 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ |
108 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ | 104 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ |
@@ -125,7 +121,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = | |||
125 | { | 121 | { |
126 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 122 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
127 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 123 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
128 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 124 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
129 | EVENT_CONSTRAINT_END | 125 | EVENT_CONSTRAINT_END |
130 | }; | 126 | }; |
131 | 127 | ||
@@ -1519,7 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
1519 | .guest_get_msrs = intel_guest_get_msrs, | 1515 | .guest_get_msrs = intel_guest_get_msrs, |
1520 | }; | 1516 | }; |
1521 | 1517 | ||
1522 | static void intel_clovertown_quirks(void) | 1518 | static __init void intel_clovertown_quirk(void) |
1523 | { | 1519 | { |
1524 | /* | 1520 | /* |
1525 | * PEBS is unreliable due to: | 1521 | * PEBS is unreliable due to: |
@@ -1545,19 +1541,60 @@ static void intel_clovertown_quirks(void) | |||
1545 | x86_pmu.pebs_constraints = NULL; | 1541 | x86_pmu.pebs_constraints = NULL; |
1546 | } | 1542 | } |
1547 | 1543 | ||
1548 | static void intel_sandybridge_quirks(void) | 1544 | static __init void intel_sandybridge_quirk(void) |
1549 | { | 1545 | { |
1550 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | 1546 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); |
1551 | x86_pmu.pebs = 0; | 1547 | x86_pmu.pebs = 0; |
1552 | x86_pmu.pebs_constraints = NULL; | 1548 | x86_pmu.pebs_constraints = NULL; |
1553 | } | 1549 | } |
1554 | 1550 | ||
1551 | static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { | ||
1552 | { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, | ||
1553 | { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, | ||
1554 | { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, | ||
1555 | { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, | ||
1556 | { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, | ||
1557 | { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, | ||
1558 | { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, | ||
1559 | }; | ||
1560 | |||
1561 | static __init void intel_arch_events_quirk(void) | ||
1562 | { | ||
1563 | int bit; | ||
1564 | |||
1565 | /* disable event that reported as not presend by cpuid */ | ||
1566 | for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { | ||
1567 | intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; | ||
1568 | printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", | ||
1569 | intel_arch_events_map[bit].name); | ||
1570 | } | ||
1571 | } | ||
1572 | |||
1573 | static __init void intel_nehalem_quirk(void) | ||
1574 | { | ||
1575 | union cpuid10_ebx ebx; | ||
1576 | |||
1577 | ebx.full = x86_pmu.events_maskl; | ||
1578 | if (ebx.split.no_branch_misses_retired) { | ||
1579 | /* | ||
1580 | * Erratum AAJ80 detected, we work it around by using | ||
1581 | * the BR_MISP_EXEC.ANY event. This will over-count | ||
1582 | * branch-misses, but it's still much better than the | ||
1583 | * architectural event which is often completely bogus: | ||
1584 | */ | ||
1585 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | ||
1586 | ebx.split.no_branch_misses_retired = 0; | ||
1587 | x86_pmu.events_maskl = ebx.full; | ||
1588 | printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); | ||
1589 | } | ||
1590 | } | ||
1591 | |||
1555 | __init int intel_pmu_init(void) | 1592 | __init int intel_pmu_init(void) |
1556 | { | 1593 | { |
1557 | union cpuid10_edx edx; | 1594 | union cpuid10_edx edx; |
1558 | union cpuid10_eax eax; | 1595 | union cpuid10_eax eax; |
1596 | union cpuid10_ebx ebx; | ||
1559 | unsigned int unused; | 1597 | unsigned int unused; |
1560 | unsigned int ebx; | ||
1561 | int version; | 1598 | int version; |
1562 | 1599 | ||
1563 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 1600 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
@@ -1574,8 +1611,8 @@ __init int intel_pmu_init(void) | |||
1574 | * Check whether the Architectural PerfMon supports | 1611 | * Check whether the Architectural PerfMon supports |
1575 | * Branch Misses Retired hw_event or not. | 1612 | * Branch Misses Retired hw_event or not. |
1576 | */ | 1613 | */ |
1577 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | 1614 | cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); |
1578 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | 1615 | if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) |
1579 | return -ENODEV; | 1616 | return -ENODEV; |
1580 | 1617 | ||
1581 | version = eax.split.version_id; | 1618 | version = eax.split.version_id; |
@@ -1589,6 +1626,9 @@ __init int intel_pmu_init(void) | |||
1589 | x86_pmu.cntval_bits = eax.split.bit_width; | 1626 | x86_pmu.cntval_bits = eax.split.bit_width; |
1590 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; | 1627 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; |
1591 | 1628 | ||
1629 | x86_pmu.events_maskl = ebx.full; | ||
1630 | x86_pmu.events_mask_len = eax.split.mask_length; | ||
1631 | |||
1592 | /* | 1632 | /* |
1593 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 1633 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
1594 | * assume at least 3 events: | 1634 | * assume at least 3 events: |
@@ -1608,6 +1648,8 @@ __init int intel_pmu_init(void) | |||
1608 | 1648 | ||
1609 | intel_ds_init(); | 1649 | intel_ds_init(); |
1610 | 1650 | ||
1651 | x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ | ||
1652 | |||
1611 | /* | 1653 | /* |
1612 | * Install the hw-cache-events table: | 1654 | * Install the hw-cache-events table: |
1613 | */ | 1655 | */ |
@@ -1617,7 +1659,7 @@ __init int intel_pmu_init(void) | |||
1617 | break; | 1659 | break; |
1618 | 1660 | ||
1619 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 1661 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ |
1620 | x86_pmu.quirks = intel_clovertown_quirks; | 1662 | x86_add_quirk(intel_clovertown_quirk); |
1621 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 1663 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ |
1622 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | 1664 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ |
1623 | case 29: /* six-core 45 nm xeon "Dunnington" */ | 1665 | case 29: /* six-core 45 nm xeon "Dunnington" */ |
@@ -1651,17 +1693,8 @@ __init int intel_pmu_init(void) | |||
1651 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | 1693 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
1652 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; | 1694 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; |
1653 | 1695 | ||
1654 | if (ebx & 0x40) { | 1696 | x86_add_quirk(intel_nehalem_quirk); |
1655 | /* | ||
1656 | * Erratum AAJ80 detected, we work it around by using | ||
1657 | * the BR_MISP_EXEC.ANY event. This will over-count | ||
1658 | * branch-misses, but it's still much better than the | ||
1659 | * architectural event which is often completely bogus: | ||
1660 | */ | ||
1661 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | ||
1662 | 1697 | ||
1663 | pr_cont("erratum AAJ80 worked around, "); | ||
1664 | } | ||
1665 | pr_cont("Nehalem events, "); | 1698 | pr_cont("Nehalem events, "); |
1666 | break; | 1699 | break; |
1667 | 1700 | ||
@@ -1701,7 +1734,7 @@ __init int intel_pmu_init(void) | |||
1701 | break; | 1734 | break; |
1702 | 1735 | ||
1703 | case 42: /* SandyBridge */ | 1736 | case 42: /* SandyBridge */ |
1704 | x86_pmu.quirks = intel_sandybridge_quirks; | 1737 | x86_add_quirk(intel_sandybridge_quirk); |
1705 | case 45: /* SandyBridge, "Romely-EP" */ | 1738 | case 45: /* SandyBridge, "Romely-EP" */ |
1706 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 1739 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
1707 | sizeof(hw_cache_event_ids)); | 1740 | sizeof(hw_cache_event_ids)); |
@@ -1738,5 +1771,6 @@ __init int intel_pmu_init(void) | |||
1738 | break; | 1771 | break; |
1739 | } | 1772 | } |
1740 | } | 1773 | } |
1774 | |||
1741 | return 0; | 1775 | return 0; |
1742 | } | 1776 | } |
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index ea9d5f2f13e..2889b3d4388 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c | |||
@@ -50,7 +50,7 @@ void arch_jump_label_transform(struct jump_entry *entry, | |||
50 | put_online_cpus(); | 50 | put_online_cpus(); |
51 | } | 51 | } |
52 | 52 | ||
53 | void arch_jump_label_transform_static(struct jump_entry *entry, | 53 | __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, |
54 | enum jump_label_type type) | 54 | enum jump_label_type type) |
55 | { | 55 | { |
56 | __jump_label_transform(entry, type, text_poke_early); | 56 | __jump_label_transform(entry, type, text_poke_early); |