diff options
author | Paul Mackerras <paulus@samba.org> | 2009-09-09 16:28:49 -0400 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2009-09-10 21:27:59 -0400 |
commit | e51ee31e8af22948dcc3b115978469b09c96c3fd (patch) | |
tree | 92ca44af0d69da2f434d199c425650d933f25474 | |
parent | a6dbf93a2ad853585409e715eb96dca9177e3c39 (diff) |
powerpc/perf_counters: Reduce stack usage of power_check_constraints
Michael Ellerman reported stack-frame size warnings being produced
for power_check_constraints(), which uses an 8*8 array of u64 and
two 8*8 arrays of unsigned long, which are currently allocated on the
stack, along with some other smaller variables. These arrays come
to 1.5kB on 64-bit or 1kB on 32-bit, which is a bit too much for the
stack.
This fixes the problem by putting these arrays in the existing
per-cpu cpu_hw_counters struct. This is OK because two of the call
sites have interrupts disabled already; for the third call site we
use get_cpu_var, which disables preemption, so we know we won't
get a context switch while we're in power_check_constraints().
Note that power_check_constraints() can be called during context
switch but is not called from interrupts.
Reported-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: <stable@kernel.org)
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r-- | arch/powerpc/kernel/perf_counter.c | 55 |
1 files changed, 32 insertions, 23 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index ccd6b2135642..7ceefaf3a7f5 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c | |||
@@ -32,6 +32,9 @@ struct cpu_hw_counters { | |||
32 | unsigned long mmcr[3]; | 32 | unsigned long mmcr[3]; |
33 | struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; | 33 | struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; |
34 | u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; | 34 | u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; |
35 | u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
36 | unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
37 | unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
35 | }; | 38 | }; |
36 | DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); | 39 | DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); |
37 | 40 | ||
@@ -239,13 +242,11 @@ static void write_pmc(int idx, unsigned long val) | |||
239 | * and see if any combination of alternative codes is feasible. | 242 | * and see if any combination of alternative codes is feasible. |
240 | * The feasible set is returned in event[]. | 243 | * The feasible set is returned in event[]. |
241 | */ | 244 | */ |
242 | static int power_check_constraints(u64 event[], unsigned int cflags[], | 245 | static int power_check_constraints(struct cpu_hw_counters *cpuhw, |
246 | u64 event[], unsigned int cflags[], | ||
243 | int n_ev) | 247 | int n_ev) |
244 | { | 248 | { |
245 | unsigned long mask, value, nv; | 249 | unsigned long mask, value, nv; |
246 | u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
247 | unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
248 | unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
249 | unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; | 250 | unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; |
250 | int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; | 251 | int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; |
251 | int i, j; | 252 | int i, j; |
@@ -260,21 +261,23 @@ static int power_check_constraints(u64 event[], unsigned int cflags[], | |||
260 | if ((cflags[i] & PPMU_LIMITED_PMC_REQD) | 261 | if ((cflags[i] & PPMU_LIMITED_PMC_REQD) |
261 | && !ppmu->limited_pmc_event(event[i])) { | 262 | && !ppmu->limited_pmc_event(event[i])) { |
262 | ppmu->get_alternatives(event[i], cflags[i], | 263 | ppmu->get_alternatives(event[i], cflags[i], |
263 | alternatives[i]); | 264 | cpuhw->alternatives[i]); |
264 | event[i] = alternatives[i][0]; | 265 | event[i] = cpuhw->alternatives[i][0]; |
265 | } | 266 | } |
266 | if (ppmu->get_constraint(event[i], &amasks[i][0], | 267 | if (ppmu->get_constraint(event[i], &cpuhw->amasks[i][0], |
267 | &avalues[i][0])) | 268 | &cpuhw->avalues[i][0])) |
268 | return -1; | 269 | return -1; |
269 | } | 270 | } |
270 | value = mask = 0; | 271 | value = mask = 0; |
271 | for (i = 0; i < n_ev; ++i) { | 272 | for (i = 0; i < n_ev; ++i) { |
272 | nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf); | 273 | nv = (value | cpuhw->avalues[i][0]) + |
274 | (value & cpuhw->avalues[i][0] & addf); | ||
273 | if ((((nv + tadd) ^ value) & mask) != 0 || | 275 | if ((((nv + tadd) ^ value) & mask) != 0 || |
274 | (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0) | 276 | (((nv + tadd) ^ cpuhw->avalues[i][0]) & |
277 | cpuhw->amasks[i][0]) != 0) | ||
275 | break; | 278 | break; |
276 | value = nv; | 279 | value = nv; |
277 | mask |= amasks[i][0]; | 280 | mask |= cpuhw->amasks[i][0]; |
278 | } | 281 | } |
279 | if (i == n_ev) | 282 | if (i == n_ev) |
280 | return 0; /* all OK */ | 283 | return 0; /* all OK */ |
@@ -285,10 +288,11 @@ static int power_check_constraints(u64 event[], unsigned int cflags[], | |||
285 | for (i = 0; i < n_ev; ++i) { | 288 | for (i = 0; i < n_ev; ++i) { |
286 | choice[i] = 0; | 289 | choice[i] = 0; |
287 | n_alt[i] = ppmu->get_alternatives(event[i], cflags[i], | 290 | n_alt[i] = ppmu->get_alternatives(event[i], cflags[i], |
288 | alternatives[i]); | 291 | cpuhw->alternatives[i]); |
289 | for (j = 1; j < n_alt[i]; ++j) | 292 | for (j = 1; j < n_alt[i]; ++j) |
290 | ppmu->get_constraint(alternatives[i][j], | 293 | ppmu->get_constraint(cpuhw->alternatives[i][j], |
291 | &amasks[i][j], &avalues[i][j]); | 294 | &cpuhw->amasks[i][j], |
295 | &cpuhw->avalues[i][j]); | ||
292 | } | 296 | } |
293 | 297 | ||
294 | /* enumerate all possibilities and see if any will work */ | 298 | /* enumerate all possibilities and see if any will work */ |
@@ -307,11 +311,11 @@ static int power_check_constraints(u64 event[], unsigned int cflags[], | |||
307 | * where k > j, will satisfy the constraints. | 311 | * where k > j, will satisfy the constraints. |
308 | */ | 312 | */ |
309 | while (++j < n_alt[i]) { | 313 | while (++j < n_alt[i]) { |
310 | nv = (value | avalues[i][j]) + | 314 | nv = (value | cpuhw->avalues[i][j]) + |
311 | (value & avalues[i][j] & addf); | 315 | (value & cpuhw->avalues[i][j] & addf); |
312 | if ((((nv + tadd) ^ value) & mask) == 0 && | 316 | if ((((nv + tadd) ^ value) & mask) == 0 && |
313 | (((nv + tadd) ^ avalues[i][j]) | 317 | (((nv + tadd) ^ cpuhw->avalues[i][j]) |
314 | & amasks[i][j]) == 0) | 318 | & cpuhw->amasks[i][j]) == 0) |
315 | break; | 319 | break; |
316 | } | 320 | } |
317 | if (j >= n_alt[i]) { | 321 | if (j >= n_alt[i]) { |
@@ -333,7 +337,7 @@ static int power_check_constraints(u64 event[], unsigned int cflags[], | |||
333 | svalues[i] = value; | 337 | svalues[i] = value; |
334 | smasks[i] = mask; | 338 | smasks[i] = mask; |
335 | value = nv; | 339 | value = nv; |
336 | mask |= amasks[i][j]; | 340 | mask |= cpuhw->amasks[i][j]; |
337 | ++i; | 341 | ++i; |
338 | j = -1; | 342 | j = -1; |
339 | } | 343 | } |
@@ -341,7 +345,7 @@ static int power_check_constraints(u64 event[], unsigned int cflags[], | |||
341 | 345 | ||
342 | /* OK, we have a feasible combination, tell the caller the solution */ | 346 | /* OK, we have a feasible combination, tell the caller the solution */ |
343 | for (i = 0; i < n_ev; ++i) | 347 | for (i = 0; i < n_ev; ++i) |
344 | event[i] = alternatives[i][choice[i]]; | 348 | event[i] = cpuhw->alternatives[i][choice[i]]; |
345 | return 0; | 349 | return 0; |
346 | } | 350 | } |
347 | 351 | ||
@@ -745,7 +749,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, | |||
745 | return -EAGAIN; | 749 | return -EAGAIN; |
746 | if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n)) | 750 | if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n)) |
747 | return -EAGAIN; | 751 | return -EAGAIN; |
748 | i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0); | 752 | i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0); |
749 | if (i < 0) | 753 | if (i < 0) |
750 | return -EAGAIN; | 754 | return -EAGAIN; |
751 | cpuhw->n_counters = n0 + n; | 755 | cpuhw->n_counters = n0 + n; |
@@ -800,7 +804,7 @@ static int power_pmu_enable(struct perf_counter *counter) | |||
800 | cpuhw->flags[n0] = counter->hw.counter_base; | 804 | cpuhw->flags[n0] = counter->hw.counter_base; |
801 | if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1)) | 805 | if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1)) |
802 | goto out; | 806 | goto out; |
803 | if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1)) | 807 | if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) |
804 | goto out; | 808 | goto out; |
805 | 809 | ||
806 | counter->hw.config = cpuhw->events[n0]; | 810 | counter->hw.config = cpuhw->events[n0]; |
@@ -1005,6 +1009,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | |||
1005 | unsigned int cflags[MAX_HWCOUNTERS]; | 1009 | unsigned int cflags[MAX_HWCOUNTERS]; |
1006 | int n; | 1010 | int n; |
1007 | int err; | 1011 | int err; |
1012 | struct cpu_hw_counters *cpuhw; | ||
1008 | 1013 | ||
1009 | if (!ppmu) | 1014 | if (!ppmu) |
1010 | return ERR_PTR(-ENXIO); | 1015 | return ERR_PTR(-ENXIO); |
@@ -1083,7 +1088,11 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | |||
1083 | cflags[n] = flags; | 1088 | cflags[n] = flags; |
1084 | if (check_excludes(ctrs, cflags, n, 1)) | 1089 | if (check_excludes(ctrs, cflags, n, 1)) |
1085 | return ERR_PTR(-EINVAL); | 1090 | return ERR_PTR(-EINVAL); |
1086 | if (power_check_constraints(events, cflags, n + 1)) | 1091 | |
1092 | cpuhw = &get_cpu_var(cpu_hw_counters); | ||
1093 | err = power_check_constraints(cpuhw, events, cflags, n + 1); | ||
1094 | put_cpu_var(cpu_hw_counters); | ||
1095 | if (err) | ||
1087 | return ERR_PTR(-EINVAL); | 1096 | return ERR_PTR(-EINVAL); |
1088 | 1097 | ||
1089 | counter->hw.config = events[n]; | 1098 | counter->hw.config = events[n]; |