diff options
| author | Paul Mackerras <paulus@samba.org> | 2009-09-09 16:28:49 -0400 |
|---|---|---|
| committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2009-09-10 21:27:59 -0400 |
| commit | e51ee31e8af22948dcc3b115978469b09c96c3fd (patch) | |
| tree | 92ca44af0d69da2f434d199c425650d933f25474 | |
| parent | a6dbf93a2ad853585409e715eb96dca9177e3c39 (diff) | |
powerpc/perf_counters: Reduce stack usage of power_check_constraints
Michael Ellerman reported stack-frame size warnings being produced
for power_check_constraints(), which uses an 8*8 array of u64 and
two 8*8 arrays of unsigned long, which are currently allocated on the
stack, along with some other smaller variables. These arrays come
to 1.5kB on 64-bit or 1kB on 32-bit, which is a bit too much for the
stack.
This fixes the problem by putting these arrays in the existing
per-cpu cpu_hw_counters struct. This is OK because two of the call
sites have interrupts disabled already; for the third call site we
use get_cpu_var, which disables preemption, so we know we won't
get a context switch while we're in power_check_constraints().
Note that power_check_constraints() can be called during context
switch but is not called from interrupts.
Reported-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: <stable@kernel.org)
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
| -rw-r--r-- | arch/powerpc/kernel/perf_counter.c | 55 |
1 files changed, 32 insertions, 23 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index ccd6b213564..7ceefaf3a7f 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c | |||
| @@ -32,6 +32,9 @@ struct cpu_hw_counters { | |||
| 32 | unsigned long mmcr[3]; | 32 | unsigned long mmcr[3]; |
| 33 | struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; | 33 | struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; |
| 34 | u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; | 34 | u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; |
| 35 | u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
| 36 | unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
| 37 | unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
| 35 | }; | 38 | }; |
| 36 | DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); | 39 | DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); |
| 37 | 40 | ||
| @@ -239,13 +242,11 @@ static void write_pmc(int idx, unsigned long val) | |||
| 239 | * and see if any combination of alternative codes is feasible. | 242 | * and see if any combination of alternative codes is feasible. |
| 240 | * The feasible set is returned in event[]. | 243 | * The feasible set is returned in event[]. |
| 241 | */ | 244 | */ |
| 242 | static int power_check_constraints(u64 event[], unsigned int cflags[], | 245 | static int power_check_constraints(struct cpu_hw_counters *cpuhw, |
| 246 | u64 event[], unsigned int cflags[], | ||
| 243 | int n_ev) | 247 | int n_ev) |
| 244 | { | 248 | { |
| 245 | unsigned long mask, value, nv; | 249 | unsigned long mask, value, nv; |
| 246 | u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
| 247 | unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
| 248 | unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
| 249 | unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; | 250 | unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; |
| 250 | int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; | 251 | int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; |
| 251 | int i, j; | 252 | int i, j; |
| @@ -260,21 +261,23 @@ static int power_check_constraints(u64 event[], unsigned int cflags[], | |||
| 260 | if ((cflags[i] & PPMU_LIMITED_PMC_REQD) | 261 | if ((cflags[i] & PPMU_LIMITED_PMC_REQD) |
| 261 | && !ppmu->limited_pmc_event(event[i])) { | 262 | && !ppmu->limited_pmc_event(event[i])) { |
| 262 | ppmu->get_alternatives(event[i], cflags[i], | 263 | ppmu->get_alternatives(event[i], cflags[i], |
| 263 | alternatives[i]); | 264 | cpuhw->alternatives[i]); |
| 264 | event[i] = alternatives[i][0]; | 265 | event[i] = cpuhw->alternatives[i][0]; |
| 265 | } | 266 | } |
| 266 | if (ppmu->get_constraint(event[i], &amasks[i][0], | 267 | if (ppmu->get_constraint(event[i], &cpuhw->amasks[i][0], |
| 267 | &avalues[i][0])) | 268 | &cpuhw->avalues[i][0])) |
| 268 | return -1; | 269 | return -1; |
| 269 | } | 270 | } |
| 270 | value = mask = 0; | 271 | value = mask = 0; |
| 271 | for (i = 0; i < n_ev; ++i) { | 272 | for (i = 0; i < n_ev; ++i) { |
| 272 | nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf); | 273 | nv = (value | cpuhw->avalues[i][0]) + |
| 274 | (value & cpuhw->avalues[i][0] & addf); | ||
| 273 | if ((((nv + tadd) ^ value) & mask) != 0 || | 275 | if ((((nv + tadd) ^ value) & mask) != 0 || |
| 274 | (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0) | 276 | (((nv + tadd) ^ cpuhw->avalues[i][0]) & |
| 277 | cpuhw->amasks[i][0]) != 0) | ||
| 275 | break; | 278 | break; |
| 276 | value = nv; | 279 | value = nv; |
| 277 | mask |= amasks[i][0]; | 280 | mask |= cpuhw->amasks[i][0]; |
| 278 | } | 281 | } |
| 279 | if (i == n_ev) | 282 | if (i == n_ev) |
| 280 | return 0; /* all OK */ | 283 | return 0; /* all OK */ |
| @@ -285,10 +288,11 @@ static int power_check_constraints(u64 event[], unsigned int cflags[], | |||
| 285 | for (i = 0; i < n_ev; ++i) { | 288 | for (i = 0; i < n_ev; ++i) { |
| 286 | choice[i] = 0; | 289 | choice[i] = 0; |
| 287 | n_alt[i] = ppmu->get_alternatives(event[i], cflags[i], | 290 | n_alt[i] = ppmu->get_alternatives(event[i], cflags[i], |
| 288 | alternatives[i]); | 291 | cpuhw->alternatives[i]); |
| 289 | for (j = 1; j < n_alt[i]; ++j) | 292 | for (j = 1; j < n_alt[i]; ++j) |
| 290 | ppmu->get_constraint(alternatives[i][j], | 293 | ppmu->get_constraint(cpuhw->alternatives[i][j], |
| 291 | &amasks[i][j], &avalues[i][j]); | 294 | &cpuhw->amasks[i][j], |
| 295 | &cpuhw->avalues[i][j]); | ||
| 292 | } | 296 | } |
| 293 | 297 | ||
| 294 | /* enumerate all possibilities and see if any will work */ | 298 | /* enumerate all possibilities and see if any will work */ |
| @@ -307,11 +311,11 @@ static int power_check_constraints(u64 event[], unsigned int cflags[], | |||
| 307 | * where k > j, will satisfy the constraints. | 311 | * where k > j, will satisfy the constraints. |
| 308 | */ | 312 | */ |
| 309 | while (++j < n_alt[i]) { | 313 | while (++j < n_alt[i]) { |
| 310 | nv = (value | avalues[i][j]) + | 314 | nv = (value | cpuhw->avalues[i][j]) + |
| 311 | (value & avalues[i][j] & addf); | 315 | (value & cpuhw->avalues[i][j] & addf); |
| 312 | if ((((nv + tadd) ^ value) & mask) == 0 && | 316 | if ((((nv + tadd) ^ value) & mask) == 0 && |
| 313 | (((nv + tadd) ^ avalues[i][j]) | 317 | (((nv + tadd) ^ cpuhw->avalues[i][j]) |
| 314 | & amasks[i][j]) == 0) | 318 | & cpuhw->amasks[i][j]) == 0) |
| 315 | break; | 319 | break; |
| 316 | } | 320 | } |
| 317 | if (j >= n_alt[i]) { | 321 | if (j >= n_alt[i]) { |
| @@ -333,7 +337,7 @@ static int power_check_constraints(u64 event[], unsigned int cflags[], | |||
| 333 | svalues[i] = value; | 337 | svalues[i] = value; |
| 334 | smasks[i] = mask; | 338 | smasks[i] = mask; |
| 335 | value = nv; | 339 | value = nv; |
| 336 | mask |= amasks[i][j]; | 340 | mask |= cpuhw->amasks[i][j]; |
| 337 | ++i; | 341 | ++i; |
| 338 | j = -1; | 342 | j = -1; |
| 339 | } | 343 | } |
| @@ -341,7 +345,7 @@ static int power_check_constraints(u64 event[], unsigned int cflags[], | |||
| 341 | 345 | ||
| 342 | /* OK, we have a feasible combination, tell the caller the solution */ | 346 | /* OK, we have a feasible combination, tell the caller the solution */ |
| 343 | for (i = 0; i < n_ev; ++i) | 347 | for (i = 0; i < n_ev; ++i) |
| 344 | event[i] = alternatives[i][choice[i]]; | 348 | event[i] = cpuhw->alternatives[i][choice[i]]; |
| 345 | return 0; | 349 | return 0; |
| 346 | } | 350 | } |
| 347 | 351 | ||
| @@ -745,7 +749,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, | |||
| 745 | return -EAGAIN; | 749 | return -EAGAIN; |
| 746 | if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n)) | 750 | if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n)) |
| 747 | return -EAGAIN; | 751 | return -EAGAIN; |
| 748 | i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0); | 752 | i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0); |
| 749 | if (i < 0) | 753 | if (i < 0) |
| 750 | return -EAGAIN; | 754 | return -EAGAIN; |
| 751 | cpuhw->n_counters = n0 + n; | 755 | cpuhw->n_counters = n0 + n; |
| @@ -800,7 +804,7 @@ static int power_pmu_enable(struct perf_counter *counter) | |||
| 800 | cpuhw->flags[n0] = counter->hw.counter_base; | 804 | cpuhw->flags[n0] = counter->hw.counter_base; |
| 801 | if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1)) | 805 | if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1)) |
| 802 | goto out; | 806 | goto out; |
| 803 | if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1)) | 807 | if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) |
| 804 | goto out; | 808 | goto out; |
| 805 | 809 | ||
| 806 | counter->hw.config = cpuhw->events[n0]; | 810 | counter->hw.config = cpuhw->events[n0]; |
| @@ -1005,6 +1009,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | |||
| 1005 | unsigned int cflags[MAX_HWCOUNTERS]; | 1009 | unsigned int cflags[MAX_HWCOUNTERS]; |
| 1006 | int n; | 1010 | int n; |
| 1007 | int err; | 1011 | int err; |
| 1012 | struct cpu_hw_counters *cpuhw; | ||
| 1008 | 1013 | ||
| 1009 | if (!ppmu) | 1014 | if (!ppmu) |
| 1010 | return ERR_PTR(-ENXIO); | 1015 | return ERR_PTR(-ENXIO); |
| @@ -1083,7 +1088,11 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | |||
| 1083 | cflags[n] = flags; | 1088 | cflags[n] = flags; |
| 1084 | if (check_excludes(ctrs, cflags, n, 1)) | 1089 | if (check_excludes(ctrs, cflags, n, 1)) |
| 1085 | return ERR_PTR(-EINVAL); | 1090 | return ERR_PTR(-EINVAL); |
| 1086 | if (power_check_constraints(events, cflags, n + 1)) | 1091 | |
| 1092 | cpuhw = &get_cpu_var(cpu_hw_counters); | ||
| 1093 | err = power_check_constraints(cpuhw, events, cflags, n + 1); | ||
| 1094 | put_cpu_var(cpu_hw_counters); | ||
| 1095 | if (err) | ||
| 1087 | return ERR_PTR(-EINVAL); | 1096 | return ERR_PTR(-EINVAL); |
| 1088 | 1097 | ||
| 1089 | counter->hw.config = events[n]; | 1098 | counter->hw.config = events[n]; |
