aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2009-09-09 16:28:49 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-09-10 21:27:59 -0400
commite51ee31e8af22948dcc3b115978469b09c96c3fd (patch)
tree92ca44af0d69da2f434d199c425650d933f25474 /arch
parenta6dbf93a2ad853585409e715eb96dca9177e3c39 (diff)
powerpc/perf_counters: Reduce stack usage of power_check_constraints
Michael Ellerman reported stack-frame size warnings being produced for power_check_constraints(), which uses an 8*8 array of u64 and two 8*8 arrays of unsigned long, which are currently allocated on the stack, along with some other smaller variables. These arrays come to 1.5kB on 64-bit or 1kB on 32-bit, which is a bit too much for the stack. This fixes the problem by putting these arrays in the existing per-cpu cpu_hw_counters struct. This is OK because two of the call sites have interrupts disabled already; for the third call site we use get_cpu_var, which disables preemption, so we know we won't get a context switch while we're in power_check_constraints(). Note that power_check_constraints() can be called during context switch but is not called from interrupts. Reported-by: Michael Ellerman <michael@ellerman.id.au> Signed-off-by: Paul Mackerras <paulus@samba.org> Cc: <stable@kernel.org) Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/kernel/perf_counter.c55
1 files changed, 32 insertions, 23 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index ccd6b2135642..7ceefaf3a7f5 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -32,6 +32,9 @@ struct cpu_hw_counters {
32 unsigned long mmcr[3]; 32 unsigned long mmcr[3];
33 struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; 33 struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS];
34 u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; 34 u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
35 u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
36 unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
37 unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
35}; 38};
36DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); 39DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
37 40
@@ -239,13 +242,11 @@ static void write_pmc(int idx, unsigned long val)
239 * and see if any combination of alternative codes is feasible. 242 * and see if any combination of alternative codes is feasible.
240 * The feasible set is returned in event[]. 243 * The feasible set is returned in event[].
241 */ 244 */
242static int power_check_constraints(u64 event[], unsigned int cflags[], 245static int power_check_constraints(struct cpu_hw_counters *cpuhw,
246 u64 event[], unsigned int cflags[],
243 int n_ev) 247 int n_ev)
244{ 248{
245 unsigned long mask, value, nv; 249 unsigned long mask, value, nv;
246 u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
247 unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
248 unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
249 unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; 250 unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
250 int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; 251 int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
251 int i, j; 252 int i, j;
@@ -260,21 +261,23 @@ static int power_check_constraints(u64 event[], unsigned int cflags[],
260 if ((cflags[i] & PPMU_LIMITED_PMC_REQD) 261 if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
261 && !ppmu->limited_pmc_event(event[i])) { 262 && !ppmu->limited_pmc_event(event[i])) {
262 ppmu->get_alternatives(event[i], cflags[i], 263 ppmu->get_alternatives(event[i], cflags[i],
263 alternatives[i]); 264 cpuhw->alternatives[i]);
264 event[i] = alternatives[i][0]; 265 event[i] = cpuhw->alternatives[i][0];
265 } 266 }
266 if (ppmu->get_constraint(event[i], &amasks[i][0], 267 if (ppmu->get_constraint(event[i], &cpuhw->amasks[i][0],
267 &avalues[i][0])) 268 &cpuhw->avalues[i][0]))
268 return -1; 269 return -1;
269 } 270 }
270 value = mask = 0; 271 value = mask = 0;
271 for (i = 0; i < n_ev; ++i) { 272 for (i = 0; i < n_ev; ++i) {
272 nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf); 273 nv = (value | cpuhw->avalues[i][0]) +
274 (value & cpuhw->avalues[i][0] & addf);
273 if ((((nv + tadd) ^ value) & mask) != 0 || 275 if ((((nv + tadd) ^ value) & mask) != 0 ||
274 (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0) 276 (((nv + tadd) ^ cpuhw->avalues[i][0]) &
277 cpuhw->amasks[i][0]) != 0)
275 break; 278 break;
276 value = nv; 279 value = nv;
277 mask |= amasks[i][0]; 280 mask |= cpuhw->amasks[i][0];
278 } 281 }
279 if (i == n_ev) 282 if (i == n_ev)
280 return 0; /* all OK */ 283 return 0; /* all OK */
@@ -285,10 +288,11 @@ static int power_check_constraints(u64 event[], unsigned int cflags[],
285 for (i = 0; i < n_ev; ++i) { 288 for (i = 0; i < n_ev; ++i) {
286 choice[i] = 0; 289 choice[i] = 0;
287 n_alt[i] = ppmu->get_alternatives(event[i], cflags[i], 290 n_alt[i] = ppmu->get_alternatives(event[i], cflags[i],
288 alternatives[i]); 291 cpuhw->alternatives[i]);
289 for (j = 1; j < n_alt[i]; ++j) 292 for (j = 1; j < n_alt[i]; ++j)
290 ppmu->get_constraint(alternatives[i][j], 293 ppmu->get_constraint(cpuhw->alternatives[i][j],
291 &amasks[i][j], &avalues[i][j]); 294 &cpuhw->amasks[i][j],
295 &cpuhw->avalues[i][j]);
292 } 296 }
293 297
294 /* enumerate all possibilities and see if any will work */ 298 /* enumerate all possibilities and see if any will work */
@@ -307,11 +311,11 @@ static int power_check_constraints(u64 event[], unsigned int cflags[],
307 * where k > j, will satisfy the constraints. 311 * where k > j, will satisfy the constraints.
308 */ 312 */
309 while (++j < n_alt[i]) { 313 while (++j < n_alt[i]) {
310 nv = (value | avalues[i][j]) + 314 nv = (value | cpuhw->avalues[i][j]) +
311 (value & avalues[i][j] & addf); 315 (value & cpuhw->avalues[i][j] & addf);
312 if ((((nv + tadd) ^ value) & mask) == 0 && 316 if ((((nv + tadd) ^ value) & mask) == 0 &&
313 (((nv + tadd) ^ avalues[i][j]) 317 (((nv + tadd) ^ cpuhw->avalues[i][j])
314 & amasks[i][j]) == 0) 318 & cpuhw->amasks[i][j]) == 0)
315 break; 319 break;
316 } 320 }
317 if (j >= n_alt[i]) { 321 if (j >= n_alt[i]) {
@@ -333,7 +337,7 @@ static int power_check_constraints(u64 event[], unsigned int cflags[],
333 svalues[i] = value; 337 svalues[i] = value;
334 smasks[i] = mask; 338 smasks[i] = mask;
335 value = nv; 339 value = nv;
336 mask |= amasks[i][j]; 340 mask |= cpuhw->amasks[i][j];
337 ++i; 341 ++i;
338 j = -1; 342 j = -1;
339 } 343 }
@@ -341,7 +345,7 @@ static int power_check_constraints(u64 event[], unsigned int cflags[],
341 345
342 /* OK, we have a feasible combination, tell the caller the solution */ 346 /* OK, we have a feasible combination, tell the caller the solution */
343 for (i = 0; i < n_ev; ++i) 347 for (i = 0; i < n_ev; ++i)
344 event[i] = alternatives[i][choice[i]]; 348 event[i] = cpuhw->alternatives[i][choice[i]];
345 return 0; 349 return 0;
346} 350}
347 351
@@ -745,7 +749,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
745 return -EAGAIN; 749 return -EAGAIN;
746 if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n)) 750 if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n))
747 return -EAGAIN; 751 return -EAGAIN;
748 i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0); 752 i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0);
749 if (i < 0) 753 if (i < 0)
750 return -EAGAIN; 754 return -EAGAIN;
751 cpuhw->n_counters = n0 + n; 755 cpuhw->n_counters = n0 + n;
@@ -800,7 +804,7 @@ static int power_pmu_enable(struct perf_counter *counter)
800 cpuhw->flags[n0] = counter->hw.counter_base; 804 cpuhw->flags[n0] = counter->hw.counter_base;
801 if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1)) 805 if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1))
802 goto out; 806 goto out;
803 if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1)) 807 if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
804 goto out; 808 goto out;
805 809
806 counter->hw.config = cpuhw->events[n0]; 810 counter->hw.config = cpuhw->events[n0];
@@ -1005,6 +1009,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1005 unsigned int cflags[MAX_HWCOUNTERS]; 1009 unsigned int cflags[MAX_HWCOUNTERS];
1006 int n; 1010 int n;
1007 int err; 1011 int err;
1012 struct cpu_hw_counters *cpuhw;
1008 1013
1009 if (!ppmu) 1014 if (!ppmu)
1010 return ERR_PTR(-ENXIO); 1015 return ERR_PTR(-ENXIO);
@@ -1083,7 +1088,11 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1083 cflags[n] = flags; 1088 cflags[n] = flags;
1084 if (check_excludes(ctrs, cflags, n, 1)) 1089 if (check_excludes(ctrs, cflags, n, 1))
1085 return ERR_PTR(-EINVAL); 1090 return ERR_PTR(-EINVAL);
1086 if (power_check_constraints(events, cflags, n + 1)) 1091
1092 cpuhw = &get_cpu_var(cpu_hw_counters);
1093 err = power_check_constraints(cpuhw, events, cflags, n + 1);
1094 put_cpu_var(cpu_hw_counters);
1095 if (err)
1087 return ERR_PTR(-EINVAL); 1096 return ERR_PTR(-EINVAL);
1088 1097
1089 counter->hw.config = events[n]; 1098 counter->hw.config = events[n];