aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorRobert Richter <robert.richter@amd.com>2011-11-18 06:35:22 -0500
committerIngo Molnar <mingo@elte.hu>2011-12-06 02:33:56 -0500
commitbc1738f6ee83015f090867813dcca4d690e7917c (patch)
tree57c867a016a6eb0f426b9362b576a331379445b0 /arch
parent1e2ad28f80b4e155678259238f51edebc19e4014 (diff)
perf, x86: Fix event scheduler for constraints with overlapping counters
The current x86 event scheduler fails to resolve scheduling problems of certain combinations of events and constraints. This happens if the counter mask of such an event is not a subset of any other counter mask of a constraint with an equal or higher weight, e.g. constraints of the AMD family 15h pmu: counter mask weight amd_f15_PMC30 0x09 2 <--- overlapping counters amd_f15_PMC20 0x07 3 amd_f15_PMC53 0x38 3 The scheduler does not find then an existing solution. Here is an example: event code counter failure possible solution 0x02E PMC[3,0] 0 3 0x043 PMC[2:0] 1 0 0x045 PMC[2:0] 2 1 0x046 PMC[2:0] FAIL 2 The event scheduler may not select the correct counter in the first cycle because it needs to know which subsequent events will be scheduled. It may fail to schedule the events then. To solve this, we now save the scheduler state of events with overlapping counter counstraints. If we fail to schedule the events we rollback to those states and try to use another free counter. Constraints with overlapping counters are marked with a new introduced overlap flag. We set the overlap flag for such constraints to give the scheduler a hint which events to select for counter rescheduling. The EVENT_CONSTRAINT_OVERLAP() macro can be used for this. Care must be taken as the rescheduling algorithm is O(n!) which will increase scheduling cycles for an over-commited system dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros and its counter masks must be kept at a minimum. Thus, the current stack is limited to 2 states to limit the number of loops the algorithm takes in the worst case. On systems with no overlapping-counter constraints, this implementation does not increase the loop count compared to the previous algorithm. V2: * Renamed redo -> overlap. * Reimplementation using perf scheduling helper functions. V3: * Added WARN_ON_ONCE() if out of save states. * Changed function interface of perf_sched_restore_state() to use bool as return value. Signed-off-by: Robert Richter <robert.richter@amd.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1321616122-1533-3-git-send-email-robert.richter@amd.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c45
-rw-r--r--arch/x86/kernel/cpu/perf_event.h30
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c2
3 files changed, 72 insertions, 5 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5a469d3d0c6..fa6fdec5afb 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -499,11 +499,16 @@ struct sched_state {
499 unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 499 unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
500}; 500};
501 501
502/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
503#define SCHED_STATES_MAX 2
504
502struct perf_sched { 505struct perf_sched {
503 int max_weight; 506 int max_weight;
504 int max_events; 507 int max_events;
505 struct event_constraint **constraints; 508 struct event_constraint **constraints;
506 struct sched_state state; 509 struct sched_state state;
510 int saved_states;
511 struct sched_state saved[SCHED_STATES_MAX];
507}; 512};
508 513
509/* 514/*
@@ -529,11 +534,34 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
529 sched->state.unassigned = num; 534 sched->state.unassigned = num;
530} 535}
531 536
537static void perf_sched_save_state(struct perf_sched *sched)
538{
539 if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
540 return;
541
542 sched->saved[sched->saved_states] = sched->state;
543 sched->saved_states++;
544}
545
546static bool perf_sched_restore_state(struct perf_sched *sched)
547{
548 if (!sched->saved_states)
549 return false;
550
551 sched->saved_states--;
552 sched->state = sched->saved[sched->saved_states];
553
554 /* continue with next counter: */
555 clear_bit(sched->state.counter++, sched->state.used);
556
557 return true;
558}
559
532/* 560/*
533 * Select a counter for the current event to schedule. Return true on 561 * Select a counter for the current event to schedule. Return true on
534 * success. 562 * success.
535 */ 563 */
536static bool perf_sched_find_counter(struct perf_sched *sched) 564static bool __perf_sched_find_counter(struct perf_sched *sched)
537{ 565{
538 struct event_constraint *c; 566 struct event_constraint *c;
539 int idx; 567 int idx;
@@ -557,6 +585,19 @@ static bool perf_sched_find_counter(struct perf_sched *sched)
557 if (idx >= X86_PMC_IDX_MAX) 585 if (idx >= X86_PMC_IDX_MAX)
558 return false; 586 return false;
559 587
588 if (c->overlap)
589 perf_sched_save_state(sched);
590
591 return true;
592}
593
594static bool perf_sched_find_counter(struct perf_sched *sched)
595{
596 while (!__perf_sched_find_counter(sched)) {
597 if (!perf_sched_restore_state(sched))
598 return false;
599 }
600
560 return true; 601 return true;
561} 602}
562 603
@@ -1250,7 +1291,7 @@ static int __init init_hw_perf_events(void)
1250 1291
1251 unconstrained = (struct event_constraint) 1292 unconstrained = (struct event_constraint)
1252 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 1293 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1253 0, x86_pmu.num_counters); 1294 0, x86_pmu.num_counters, 0);
1254 1295
1255 if (x86_pmu.event_constraints) { 1296 if (x86_pmu.event_constraints) {
1256 for_each_event_constraint(c, x86_pmu.event_constraints) { 1297 for_each_event_constraint(c, x86_pmu.event_constraints) {
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index b9698d40ac4..51a985cbc12 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -45,6 +45,7 @@ struct event_constraint {
45 u64 code; 45 u64 code;
46 u64 cmask; 46 u64 cmask;
47 int weight; 47 int weight;
48 int overlap;
48}; 49};
49 50
50struct amd_nb { 51struct amd_nb {
@@ -151,15 +152,40 @@ struct cpu_hw_events {
151 void *kfree_on_online; 152 void *kfree_on_online;
152}; 153};
153 154
154#define __EVENT_CONSTRAINT(c, n, m, w) {\ 155#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
155 { .idxmsk64 = (n) }, \ 156 { .idxmsk64 = (n) }, \
156 .code = (c), \ 157 .code = (c), \
157 .cmask = (m), \ 158 .cmask = (m), \
158 .weight = (w), \ 159 .weight = (w), \
160 .overlap = (o), \
159} 161}
160 162
161#define EVENT_CONSTRAINT(c, n, m) \ 163#define EVENT_CONSTRAINT(c, n, m) \
162 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) 164 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
165
166/*
167 * The overlap flag marks event constraints with overlapping counter
168 * masks. This is the case if the counter mask of such an event is not
169 * a subset of any other counter mask of a constraint with an equal or
170 * higher weight, e.g.:
171 *
172 * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
173 * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
174 * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
175 *
176 * The event scheduler may not select the correct counter in the first
177 * cycle because it needs to know which subsequent events will be
178 * scheduled. It may fail to schedule the events then. So we set the
179 * overlap flag for such constraints to give the scheduler a hint which
180 * events to select for counter rescheduling.
181 *
182 * Care must be taken as the rescheduling algorithm is O(n!) which
183 * will increase scheduling cycles for an over-commited system
184 * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
185 * and its counter masks must be kept at a minimum.
186 */
187#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
188 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
163 189
164/* 190/*
165 * Constraint on the Event code. 191 * Constraint on the Event code.
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index aeefd45697a..0397b23be8e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = {
492static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); 492static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
493static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); 493static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
494static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); 494static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
495static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); 495static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
496static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); 496static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
497static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); 497static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
498 498