aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/power6-pmu.c
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2009-04-29 08:38:51 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-29 08:58:35 -0400
commitab7ef2e50a557af92f4f90689f51fadadafc16b2 (patch)
tree71ef1cbc279e5d2ad96b6c701617ac60ff36c363 /arch/powerpc/kernel/power6-pmu.c
parent98144511427c192e4249ff66a3f9debc55c59411 (diff)
perf_counter: powerpc: allow use of limited-function counters
POWER5+ and POWER6 have two hardware counters with limited functionality: PMC5 counts instructions completed in run state and PMC6 counts cycles in run state. (Run state is the state when a hardware RUN bit is 1; the idle task clears RUN while waiting for work to do and sets it when there is work to do.) These counters can't be written to by the kernel, can't generate interrupts, and don't obey the freeze conditions. That means we can only use them for per-task counters (where we know we'll always be in run state; we can't put a per-task counter on an idle task), and only if we don't want interrupts and we do want to count in all processor modes. Obviously some counters can't go on a limited hardware counter, but there are also situations where we can only put a counter on a limited hardware counter - if there are already counters on that exclude some processor modes and we want to put on a per-task cycle or instruction counter that doesn't exclude any processor mode, it could go on if it can use a limited hardware counter. To keep track of these constraints, this adds a flags argument to the processor-specific get_alternatives() functions, with three bits defined: one to say that we can accept alternative event codes that go on limited counters, one to say we only want alternatives on limited counters, and one to say that this is a per-task counter and therefore events that are gated by run state are equivalent to those that aren't (e.g. a "cycles" event is equivalent to a "cycles in run state" event). These flags are computed for each counter and stored in the counter->hw.counter_base field (slightly wonky name for what it does, but it was an existing unused field). Since the limited counters don't freeze when we freeze the other counters, we need some special handling to avoid getting skew between things counted on the limited counters and those counted on normal counters. To minimize this skew, if we are using any limited counters, we read PMC5 and PMC6 immediately after setting and clearing the freeze bit. This is done in a single asm in the new write_mmcr0() function. The code here is specific to PMC5 and PMC6 being the limited hardware counters. Being more general (e.g. having a bitmap of limited hardware counter numbers) would have meant more complex code to read the limited counters when freezing and unfreezing the normal counters, with conditional branches, which would have increased the skew. Since it isn't necessary for the code to be more general at this stage, it isn't. This also extends the back-ends for POWER5+ and POWER6 to be able to handle up to 6 counters rather than the 4 they previously handled. Signed-off-by: Paul Mackerras <paulus@samba.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Robert Richter <robert.richter@amd.com> LKML-Reference: <18936.19035.163066.892208@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/powerpc/kernel/power6-pmu.c')
-rw-r--r--arch/powerpc/kernel/power6-pmu.c119
1 files changed, 101 insertions, 18 deletions
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index fce1fc290a1d..d44049f0ae27 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -182,7 +182,7 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
182 unsigned int ttmset = 0; 182 unsigned int ttmset = 0;
183 unsigned int pmc_inuse = 0; 183 unsigned int pmc_inuse = 0;
184 184
185 if (n_ev > 4) 185 if (n_ev > 6)
186 return -1; 186 return -1;
187 for (i = 0; i < n_ev; ++i) { 187 for (i = 0; i < n_ev; ++i) {
188 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; 188 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
@@ -202,6 +202,8 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
202 for (pmc = 0; pmc < 4; ++pmc) 202 for (pmc = 0; pmc < 4; ++pmc)
203 if (!(pmc_inuse & (1 << pmc))) 203 if (!(pmc_inuse & (1 << pmc)))
204 break; 204 break;
205 if (pmc >= 4)
206 return -1;
205 pmc_inuse |= 1 << pmc; 207 pmc_inuse |= 1 << pmc;
206 } 208 }
207 hwc[i] = pmc; 209 hwc[i] = pmc;
@@ -240,7 +242,8 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
240 } 242 }
241 if (power6_marked_instr_event(event[i])) 243 if (power6_marked_instr_event(event[i]))
242 mmcra |= MMCRA_SAMPLE_ENABLE; 244 mmcra |= MMCRA_SAMPLE_ENABLE;
243 mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); 245 if (pmc < 4)
246 mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
244 } 247 }
245 mmcr[0] = 0; 248 mmcr[0] = 0;
246 if (pmc_inuse & 1) 249 if (pmc_inuse & 1)
@@ -256,19 +259,20 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
256 * Layout of constraint bits: 259 * Layout of constraint bits:
257 * 260 *
258 * 0-1 add field: number of uses of PMC1 (max 1) 261 * 0-1 add field: number of uses of PMC1 (max 1)
259 * 2-3, 4-5, 6-7: ditto for PMC2, 3, 4 262 * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6
260 * 8-10 select field: nest (subunit) event selector 263 * 12-15 add field: number of uses of PMC1-4 (max 4)
261 * 16-19 select field: unit on byte 0 of event bus 264 * 16-19 select field: unit on byte 0 of event bus
262 * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 265 * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
266 * 32-34 select field: nest (subunit) event selector
263 */ 267 */
264static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp) 268static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
265{ 269{
266 int pmc, byte, sh; 270 int pmc, byte, sh, subunit;
267 unsigned int mask = 0, value = 0; 271 u64 mask = 0, value = 0;
268 272
269 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 273 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
270 if (pmc) { 274 if (pmc) {
271 if (pmc > 4) 275 if (pmc > 4 && !(event == 0x500009 || event == 0x600005))
272 return -1; 276 return -1;
273 sh = (pmc - 1) * 2; 277 sh = (pmc - 1) * 2;
274 mask |= 2 << sh; 278 mask |= 2 << sh;
@@ -276,26 +280,38 @@ static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
276 } 280 }
277 if (event & PM_BUSEVENT_MSK) { 281 if (event & PM_BUSEVENT_MSK) {
278 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; 282 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
279 sh = byte * 4; 283 sh = byte * 4 + (16 - PM_UNIT_SH);
280 mask |= PM_UNIT_MSKS << sh; 284 mask |= PM_UNIT_MSKS << sh;
281 value |= (event & PM_UNIT_MSKS) << sh; 285 value |= (u64)(event & PM_UNIT_MSKS) << sh;
282 if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { 286 if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
283 mask |= PM_SUBUNIT_MSKS; 287 subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
284 value |= event & PM_SUBUNIT_MSKS; 288 mask |= (u64)PM_SUBUNIT_MSK << 32;
289 value |= (u64)subunit << 32;
285 } 290 }
286 } 291 }
292 if (pmc <= 4) {
293 mask |= 0x8000; /* add field for count of PMC1-4 uses */
294 value |= 0x1000;
295 }
287 *maskp = mask; 296 *maskp = mask;
288 *valp = value; 297 *valp = value;
289 return 0; 298 return 0;
290} 299}
291 300
301static int p6_limited_pmc_event(unsigned int event)
302{
303 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
304
305 return pmc == 5 || pmc == 6;
306}
307
292#define MAX_ALT 4 /* at most 4 alternatives for any event */ 308#define MAX_ALT 4 /* at most 4 alternatives for any event */
293 309
294static const unsigned int event_alternatives[][MAX_ALT] = { 310static const unsigned int event_alternatives[][MAX_ALT] = {
295 { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */ 311 { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */
296 { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */ 312 { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
297 { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */ 313 { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */
298 { 0x10000a, 0x2000f4 }, /* PM_RUN_CYC */ 314 { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */
299 { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */ 315 { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */
300 { 0x10000e, 0x400010 }, /* PM_PURR */ 316 { 0x10000e, 0x400010 }, /* PM_PURR */
301 { 0x100010, 0x4000f8 }, /* PM_FLUSH */ 317 { 0x100010, 0x4000f8 }, /* PM_FLUSH */
@@ -340,13 +356,15 @@ static int find_alternatives_list(unsigned int event)
340 return -1; 356 return -1;
341} 357}
342 358
343static int p6_get_alternatives(unsigned int event, unsigned int alt[]) 359static int p6_get_alternatives(unsigned int event, unsigned int flags,
360 unsigned int alt[])
344{ 361{
345 int i, j; 362 int i, j, nlim;
346 unsigned int aevent, psel, pmc; 363 unsigned int aevent, psel, pmc;
347 unsigned int nalt = 1; 364 unsigned int nalt = 1;
348 365
349 alt[0] = event; 366 alt[0] = event;
367 nlim = p6_limited_pmc_event(event);
350 368
351 /* check the alternatives table */ 369 /* check the alternatives table */
352 i = find_alternatives_list(event); 370 i = find_alternatives_list(event);
@@ -358,6 +376,7 @@ static int p6_get_alternatives(unsigned int event, unsigned int alt[])
358 break; 376 break;
359 if (aevent != event) 377 if (aevent != event)
360 alt[nalt++] = aevent; 378 alt[nalt++] = aevent;
379 nlim += p6_limited_pmc_event(aevent);
361 } 380 }
362 381
363 } else { 382 } else {
@@ -375,13 +394,75 @@ static int p6_get_alternatives(unsigned int event, unsigned int alt[])
375 ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH); 394 ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
376 } 395 }
377 396
397 if (flags & PPMU_ONLY_COUNT_RUN) {
398 /*
399 * We're only counting in RUN state,
400 * so PM_CYC is equivalent to PM_RUN_CYC,
401 * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR.
402 * This doesn't include alternatives that don't provide
403 * any extra flexibility in assigning PMCs (e.g.
404 * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC).
405 * Note that even with these additional alternatives
406 * we never end up with more than 4 alternatives for any event.
407 */
408 j = nalt;
409 for (i = 0; i < nalt; ++i) {
410 switch (alt[i]) {
411 case 0x1e: /* PM_CYC */
412 alt[j++] = 0x600005; /* PM_RUN_CYC */
413 ++nlim;
414 break;
415 case 0x10000a: /* PM_RUN_CYC */
416 alt[j++] = 0x1e; /* PM_CYC */
417 break;
418 case 2: /* PM_INST_CMPL */
419 alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
420 ++nlim;
421 break;
422 case 0x500009: /* PM_RUN_INST_CMPL */
423 alt[j++] = 2; /* PM_INST_CMPL */
424 break;
425 case 0x10000e: /* PM_PURR */
426 alt[j++] = 0x4000f4; /* PM_RUN_PURR */
427 break;
428 case 0x4000f4: /* PM_RUN_PURR */
429 alt[j++] = 0x10000e; /* PM_PURR */
430 break;
431 }
432 }
433 nalt = j;
434 }
435
436 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
437 /* remove the limited PMC events */
438 j = 0;
439 for (i = 0; i < nalt; ++i) {
440 if (!p6_limited_pmc_event(alt[i])) {
441 alt[j] = alt[i];
442 ++j;
443 }
444 }
445 nalt = j;
446 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
447 /* remove all but the limited PMC events */
448 j = 0;
449 for (i = 0; i < nalt; ++i) {
450 if (p6_limited_pmc_event(alt[i])) {
451 alt[j] = alt[i];
452 ++j;
453 }
454 }
455 nalt = j;
456 }
457
378 return nalt; 458 return nalt;
379} 459}
380 460
381static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) 461static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
382{ 462{
383 /* Set PMCxSEL to 0 to disable PMCx */ 463 /* Set PMCxSEL to 0 to disable PMCx */
384 mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); 464 if (pmc <= 3)
465 mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
385} 466}
386 467
387static int power6_generic_events[] = { 468static int power6_generic_events[] = {
@@ -394,14 +475,16 @@ static int power6_generic_events[] = {
394}; 475};
395 476
396struct power_pmu power6_pmu = { 477struct power_pmu power6_pmu = {
397 .n_counter = 4, 478 .n_counter = 6,
398 .max_alternatives = MAX_ALT, 479 .max_alternatives = MAX_ALT,
399 .add_fields = 0x55, 480 .add_fields = 0x1555,
400 .test_adder = 0, 481 .test_adder = 0x3000,
401 .compute_mmcr = p6_compute_mmcr, 482 .compute_mmcr = p6_compute_mmcr,
402 .get_constraint = p6_get_constraint, 483 .get_constraint = p6_get_constraint,
403 .get_alternatives = p6_get_alternatives, 484 .get_alternatives = p6_get_alternatives,
404 .disable_pmc = p6_disable_pmc, 485 .disable_pmc = p6_disable_pmc,
405 .n_generic = ARRAY_SIZE(power6_generic_events), 486 .n_generic = ARRAY_SIZE(power6_generic_events),
406 .generic_events = power6_generic_events, 487 .generic_events = power6_generic_events,
488 .limited_pmc5_6 = 1,
489 .limited_pmc_event = p6_limited_pmc_event,
407}; 490};