diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 45 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 30 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd.c | 2 |
3 files changed, 72 insertions, 5 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5a469d3d0c6..fa6fdec5afb 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -499,11 +499,16 @@ struct sched_state { | |||
499 | unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 499 | unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
500 | }; | 500 | }; |
501 | 501 | ||
502 | /* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ | ||
503 | #define SCHED_STATES_MAX 2 | ||
504 | |||
502 | struct perf_sched { | 505 | struct perf_sched { |
503 | int max_weight; | 506 | int max_weight; |
504 | int max_events; | 507 | int max_events; |
505 | struct event_constraint **constraints; | 508 | struct event_constraint **constraints; |
506 | struct sched_state state; | 509 | struct sched_state state; |
510 | int saved_states; | ||
511 | struct sched_state saved[SCHED_STATES_MAX]; | ||
507 | }; | 512 | }; |
508 | 513 | ||
509 | /* | 514 | /* |
@@ -529,11 +534,34 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint ** | |||
529 | sched->state.unassigned = num; | 534 | sched->state.unassigned = num; |
530 | } | 535 | } |
531 | 536 | ||
537 | static void perf_sched_save_state(struct perf_sched *sched) | ||
538 | { | ||
539 | if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX)) | ||
540 | return; | ||
541 | |||
542 | sched->saved[sched->saved_states] = sched->state; | ||
543 | sched->saved_states++; | ||
544 | } | ||
545 | |||
546 | static bool perf_sched_restore_state(struct perf_sched *sched) | ||
547 | { | ||
548 | if (!sched->saved_states) | ||
549 | return false; | ||
550 | |||
551 | sched->saved_states--; | ||
552 | sched->state = sched->saved[sched->saved_states]; | ||
553 | |||
554 | /* continue with next counter: */ | ||
555 | clear_bit(sched->state.counter++, sched->state.used); | ||
556 | |||
557 | return true; | ||
558 | } | ||
559 | |||
532 | /* | 560 | /* |
533 | * Select a counter for the current event to schedule. Return true on | 561 | * Select a counter for the current event to schedule. Return true on |
534 | * success. | 562 | * success. |
535 | */ | 563 | */ |
536 | static bool perf_sched_find_counter(struct perf_sched *sched) | 564 | static bool __perf_sched_find_counter(struct perf_sched *sched) |
537 | { | 565 | { |
538 | struct event_constraint *c; | 566 | struct event_constraint *c; |
539 | int idx; | 567 | int idx; |
@@ -557,6 +585,19 @@ static bool perf_sched_find_counter(struct perf_sched *sched) | |||
557 | if (idx >= X86_PMC_IDX_MAX) | 585 | if (idx >= X86_PMC_IDX_MAX) |
558 | return false; | 586 | return false; |
559 | 587 | ||
588 | if (c->overlap) | ||
589 | perf_sched_save_state(sched); | ||
590 | |||
591 | return true; | ||
592 | } | ||
593 | |||
594 | static bool perf_sched_find_counter(struct perf_sched *sched) | ||
595 | { | ||
596 | while (!__perf_sched_find_counter(sched)) { | ||
597 | if (!perf_sched_restore_state(sched)) | ||
598 | return false; | ||
599 | } | ||
600 | |||
560 | return true; | 601 | return true; |
561 | } | 602 | } |
562 | 603 | ||
@@ -1250,7 +1291,7 @@ static int __init init_hw_perf_events(void) | |||
1250 | 1291 | ||
1251 | unconstrained = (struct event_constraint) | 1292 | unconstrained = (struct event_constraint) |
1252 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, | 1293 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
1253 | 0, x86_pmu.num_counters); | 1294 | 0, x86_pmu.num_counters, 0); |
1254 | 1295 | ||
1255 | if (x86_pmu.event_constraints) { | 1296 | if (x86_pmu.event_constraints) { |
1256 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1297 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index b9698d40ac4..51a985cbc12 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -45,6 +45,7 @@ struct event_constraint { | |||
45 | u64 code; | 45 | u64 code; |
46 | u64 cmask; | 46 | u64 cmask; |
47 | int weight; | 47 | int weight; |
48 | int overlap; | ||
48 | }; | 49 | }; |
49 | 50 | ||
50 | struct amd_nb { | 51 | struct amd_nb { |
@@ -151,15 +152,40 @@ struct cpu_hw_events { | |||
151 | void *kfree_on_online; | 152 | void *kfree_on_online; |
152 | }; | 153 | }; |
153 | 154 | ||
154 | #define __EVENT_CONSTRAINT(c, n, m, w) {\ | 155 | #define __EVENT_CONSTRAINT(c, n, m, w, o) {\ |
155 | { .idxmsk64 = (n) }, \ | 156 | { .idxmsk64 = (n) }, \ |
156 | .code = (c), \ | 157 | .code = (c), \ |
157 | .cmask = (m), \ | 158 | .cmask = (m), \ |
158 | .weight = (w), \ | 159 | .weight = (w), \ |
160 | .overlap = (o), \ | ||
159 | } | 161 | } |
160 | 162 | ||
161 | #define EVENT_CONSTRAINT(c, n, m) \ | 163 | #define EVENT_CONSTRAINT(c, n, m) \ |
162 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 164 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) |
165 | |||
166 | /* | ||
167 | * The overlap flag marks event constraints with overlapping counter | ||
168 | * masks. This is the case if the counter mask of such an event is not | ||
169 | * a subset of any other counter mask of a constraint with an equal or | ||
170 | * higher weight, e.g.: | ||
171 | * | ||
172 | * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); | ||
173 | * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0); | ||
174 | * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0); | ||
175 | * | ||
176 | * The event scheduler may not select the correct counter in the first | ||
177 | * cycle because it needs to know which subsequent events will be | ||
178 | * scheduled. It may fail to schedule the events then. So we set the | ||
179 | * overlap flag for such constraints to give the scheduler a hint which | ||
180 | * events to select for counter rescheduling. | ||
181 | * | ||
182 | * Care must be taken as the rescheduling algorithm is O(n!) which | ||
183 | * will increase scheduling cycles for an over-commited system | ||
184 | * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros | ||
185 | * and its counter masks must be kept at a minimum. | ||
186 | */ | ||
187 | #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ | ||
188 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) | ||
163 | 189 | ||
164 | /* | 190 | /* |
165 | * Constraint on the Event code. | 191 | * Constraint on the Event code. |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index aeefd45697a..0397b23be8e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
492 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); | 492 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); |
493 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); | 493 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); |
494 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); | 494 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); |
495 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); | 495 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); |
496 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); | 496 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); |
497 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); | 497 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); |
498 | 498 | ||