aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/power5+-pmu.c
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2009-04-29 08:38:51 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-29 08:58:35 -0400
commitab7ef2e50a557af92f4f90689f51fadadafc16b2 (patch)
tree71ef1cbc279e5d2ad96b6c701617ac60ff36c363 /arch/powerpc/kernel/power5+-pmu.c
parent98144511427c192e4249ff66a3f9debc55c59411 (diff)
perf_counter: powerpc: allow use of limited-function counters
POWER5+ and POWER6 have two hardware counters with limited functionality: PMC5 counts instructions completed in run state and PMC6 counts cycles in run state. (Run state is the state when a hardware RUN bit is 1; the idle task clears RUN while waiting for work to do and sets it when there is work to do.) These counters can't be written to by the kernel, can't generate interrupts, and don't obey the freeze conditions. That means we can only use them for per-task counters (where we know we'll always be in run state; we can't put a per-task counter on an idle task), and only if we don't want interrupts and we do want to count in all processor modes. Obviously some counters can't go on a limited hardware counter, but there are also situations where we can only put a counter on a limited hardware counter - if there are already counters on that exclude some processor modes and we want to put on a per-task cycle or instruction counter that doesn't exclude any processor mode, it could go on if it can use a limited hardware counter. To keep track of these constraints, this adds a flags argument to the processor-specific get_alternatives() functions, with three bits defined: one to say that we can accept alternative event codes that go on limited counters, one to say we only want alternatives on limited counters, and one to say that this is a per-task counter and therefore events that are gated by run state are equivalent to those that aren't (e.g. a "cycles" event is equivalent to a "cycles in run state" event). These flags are computed for each counter and stored in the counter->hw.counter_base field (slightly wonky name for what it does, but it was an existing unused field). Since the limited counters don't freeze when we freeze the other counters, we need some special handling to avoid getting skew between things counted on the limited counters and those counted on normal counters. To minimize this skew, if we are using any limited counters, we read PMC5 and PMC6 immediately after setting and clearing the freeze bit. This is done in a single asm in the new write_mmcr0() function. The code here is specific to PMC5 and PMC6 being the limited hardware counters. Being more general (e.g. having a bitmap of limited hardware counter numbers) would have meant more complex code to read the limited counters when freezing and unfreezing the normal counters, with conditional branches, which would have increased the skew. Since it isn't necessary for the code to be more general at this stage, it isn't. This also extends the back-ends for POWER5+ and POWER6 to be able to handle up to 6 counters rather than the 4 they previously handled. Signed-off-by: Paul Mackerras <paulus@samba.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Robert Richter <robert.richter@amd.com> LKML-Reference: <18936.19035.163066.892208@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/powerpc/kernel/power5+-pmu.c')
-rw-r--r--arch/powerpc/kernel/power5+-pmu.c117
1 files changed, 98 insertions, 19 deletions
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 1222c8ea3c26..8154eaa2404f 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -78,8 +78,8 @@
78 * Layout of constraint bits: 78 * Layout of constraint bits:
79 * 6666555555555544444444443333333333222222222211111111110000000000 79 * 6666555555555544444444443333333333222222222211111111110000000000
80 * 3210987654321098765432109876543210987654321098765432109876543210 80 * 3210987654321098765432109876543210987654321098765432109876543210
81 * [ ><><>< ><> <><>[ > < >< >< >< ><><><><> 81 * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><>
82 * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P4P3P2P1 82 * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1
83 * 83 *
84 * NC - number of counters 84 * NC - number of counters
85 * 51: NC error 0x0008_0000_0000_0000 85 * 51: NC error 0x0008_0000_0000_0000
@@ -105,18 +105,18 @@
105 * 30: IDU|GRS events needed 0x00_4000_0000 105 * 30: IDU|GRS events needed 0x00_4000_0000
106 * 106 *
107 * B0 107 * B0
108 * 20-23: Byte 0 event source 0x00f0_0000 108 * 24-27: Byte 0 event source 0x0f00_0000
109 * Encoding as for the event code 109 * Encoding as for the event code
110 * 110 *
111 * B1, B2, B3 111 * B1, B2, B3
112 * 16-19, 12-15, 8-11: Byte 1, 2, 3 event sources 112 * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
113 * 113 *
114 * P4 114 * P6
115 * 7: P1 error 0x80 115 * 11: P6 error 0x800
116 * 6-7: Count of events needing PMC4 116 * 10-11: Count of events needing PMC6
117 * 117 *
118 * P1..P3 118 * P1..P5
119 * 0-6: Count of events needing PMC1..PMC3 119 * 0-9: Count of events needing PMC1..PMC5
120 */ 120 */
121 121
122static const int grsel_shift[8] = { 122static const int grsel_shift[8] = {
@@ -143,11 +143,13 @@ static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
143 143
144 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 144 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
145 if (pmc) { 145 if (pmc) {
146 if (pmc > 4) 146 if (pmc > 6)
147 return -1; 147 return -1;
148 sh = (pmc - 1) * 2; 148 sh = (pmc - 1) * 2;
149 mask |= 2 << sh; 149 mask |= 2 << sh;
150 value |= 1 << sh; 150 value |= 1 << sh;
151 if (pmc >= 5 && !(event == 0x500009 || event == 0x600005))
152 return -1;
151 } 153 }
152 if (event & PM_BUSEVENT_MSK) { 154 if (event & PM_BUSEVENT_MSK) {
153 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; 155 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
@@ -173,16 +175,26 @@ static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
173 value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; 175 value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
174 } 176 }
175 /* Set byte lane select field */ 177 /* Set byte lane select field */
176 mask |= 0xfULL << (20 - 4 * byte); 178 mask |= 0xfULL << (24 - 4 * byte);
177 value |= (u64)unit << (20 - 4 * byte); 179 value |= (u64)unit << (24 - 4 * byte);
180 }
181 if (pmc < 5) {
182 /* need a counter from PMC1-4 set */
183 mask |= 0x8000000000000ull;
184 value |= 0x1000000000000ull;
178 } 185 }
179 mask |= 0x8000000000000ull;
180 value |= 0x1000000000000ull;
181 *maskp = mask; 186 *maskp = mask;
182 *valp = value; 187 *valp = value;
183 return 0; 188 return 0;
184} 189}
185 190
191static int power5p_limited_pmc_event(unsigned int event)
192{
193 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
194
195 return pmc == 5 || pmc == 6;
196}
197
186#define MAX_ALT 3 /* at most 3 alternatives for any event */ 198#define MAX_ALT 3 /* at most 3 alternatives for any event */
187 199
188static const unsigned int event_alternatives[][MAX_ALT] = { 200static const unsigned int event_alternatives[][MAX_ALT] = {
@@ -193,6 +205,7 @@ static const unsigned int event_alternatives[][MAX_ALT] = {
193 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ 205 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
194 { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */ 206 { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */
195 { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */ 207 { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */
208 { 0x100005, 0x600005 }, /* PM_RUN_CYC */
196 { 0x100009, 0x200009 }, /* PM_INST_CMPL */ 209 { 0x100009, 0x200009 }, /* PM_INST_CMPL */
197 { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */ 210 { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */
198 { 0x300009, 0x400009 }, /* PM_INST_DISP */ 211 { 0x300009, 0x400009 }, /* PM_INST_DISP */
@@ -260,24 +273,85 @@ static int find_alternative_bdecode(unsigned int event)
260 return -1; 273 return -1;
261} 274}
262 275
263static int power5p_get_alternatives(unsigned int event, unsigned int alt[]) 276static int power5p_get_alternatives(unsigned int event, unsigned int flags,
277 unsigned int alt[])
264{ 278{
265 int i, j, ae, nalt = 1; 279 int i, j, ae, nalt = 1;
280 int nlim;
266 281
267 alt[0] = event; 282 alt[0] = event;
268 nalt = 1; 283 nalt = 1;
284 nlim = power5p_limited_pmc_event(event);
269 i = find_alternative(event); 285 i = find_alternative(event);
270 if (i >= 0) { 286 if (i >= 0) {
271 for (j = 0; j < MAX_ALT; ++j) { 287 for (j = 0; j < MAX_ALT; ++j) {
272 ae = event_alternatives[i][j]; 288 ae = event_alternatives[i][j];
273 if (ae && ae != event) 289 if (ae && ae != event)
274 alt[nalt++] = ae; 290 alt[nalt++] = ae;
291 nlim += power5p_limited_pmc_event(ae);
275 } 292 }
276 } else { 293 } else {
277 ae = find_alternative_bdecode(event); 294 ae = find_alternative_bdecode(event);
278 if (ae > 0) 295 if (ae > 0)
279 alt[nalt++] = ae; 296 alt[nalt++] = ae;
280 } 297 }
298
299 if (flags & PPMU_ONLY_COUNT_RUN) {
300 /*
301 * We're only counting in RUN state,
302 * so PM_CYC is equivalent to PM_RUN_CYC
303 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
304 * This doesn't include alternatives that don't provide
305 * any extra flexibility in assigning PMCs (e.g.
306 * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC).
307 * Note that even with these additional alternatives
308 * we never end up with more than 3 alternatives for any event.
309 */
310 j = nalt;
311 for (i = 0; i < nalt; ++i) {
312 switch (alt[i]) {
313 case 0xf: /* PM_CYC */
314 alt[j++] = 0x600005; /* PM_RUN_CYC */
315 ++nlim;
316 break;
317 case 0x600005: /* PM_RUN_CYC */
318 alt[j++] = 0xf;
319 break;
320 case 0x100009: /* PM_INST_CMPL */
321 alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
322 ++nlim;
323 break;
324 case 0x500009: /* PM_RUN_INST_CMPL */
325 alt[j++] = 0x100009; /* PM_INST_CMPL */
326 alt[j++] = 0x200009;
327 break;
328 }
329 }
330 nalt = j;
331 }
332
333 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
334 /* remove the limited PMC events */
335 j = 0;
336 for (i = 0; i < nalt; ++i) {
337 if (!power5p_limited_pmc_event(alt[i])) {
338 alt[j] = alt[i];
339 ++j;
340 }
341 }
342 nalt = j;
343 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
344 /* remove all but the limited PMC events */
345 j = 0;
346 for (i = 0; i < nalt; ++i) {
347 if (power5p_limited_pmc_event(alt[i])) {
348 alt[j] = alt[i];
349 ++j;
350 }
351 }
352 nalt = j;
353 }
354
281 return nalt; 355 return nalt;
282} 356}
283 357
@@ -390,7 +464,7 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev,
390 unsigned char unituse[16]; 464 unsigned char unituse[16];
391 int ttmuse; 465 int ttmuse;
392 466
393 if (n_ev > 4) 467 if (n_ev > 6)
394 return -1; 468 return -1;
395 469
396 /* First pass to count resource use */ 470 /* First pass to count resource use */
@@ -399,7 +473,7 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev,
399 for (i = 0; i < n_ev; ++i) { 473 for (i = 0; i < n_ev; ++i) {
400 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; 474 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
401 if (pmc) { 475 if (pmc) {
402 if (pmc > 4) 476 if (pmc > 6)
403 return -1; 477 return -1;
404 if (pmc_inuse & (1 << (pmc - 1))) 478 if (pmc_inuse & (1 << (pmc - 1)))
405 return -1; 479 return -1;
@@ -488,13 +562,16 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev,
488 if (pmc >= 4) 562 if (pmc >= 4)
489 return -1; 563 return -1;
490 pmc_inuse |= 1 << pmc; 564 pmc_inuse |= 1 << pmc;
491 } else { 565 } else if (pmc <= 4) {
492 /* Direct event */ 566 /* Direct event */
493 --pmc; 567 --pmc;
494 if (isbus && (byte & 2) && 568 if (isbus && (byte & 2) &&
495 (psel == 8 || psel == 0x10 || psel == 0x28)) 569 (psel == 8 || psel == 0x10 || psel == 0x28))
496 /* add events on higher-numbered bus */ 570 /* add events on higher-numbered bus */
497 mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); 571 mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
572 } else {
573 /* Instructions or run cycles on PMC5/6 */
574 --pmc;
498 } 575 }
499 if (isbus && unit == PM_GRS) { 576 if (isbus && unit == PM_GRS) {
500 bit = psel & 7; 577 bit = psel & 7;
@@ -538,7 +615,7 @@ static int power5p_generic_events[] = {
538}; 615};
539 616
540struct power_pmu power5p_pmu = { 617struct power_pmu power5p_pmu = {
541 .n_counter = 4, 618 .n_counter = 6,
542 .max_alternatives = MAX_ALT, 619 .max_alternatives = MAX_ALT,
543 .add_fields = 0x7000000000055ull, 620 .add_fields = 0x7000000000055ull,
544 .test_adder = 0x3000040000000ull, 621 .test_adder = 0x3000040000000ull,
@@ -548,4 +625,6 @@ struct power_pmu power5p_pmu = {
548 .disable_pmc = power5p_disable_pmc, 625 .disable_pmc = power5p_disable_pmc,
549 .n_generic = ARRAY_SIZE(power5p_generic_events), 626 .n_generic = ARRAY_SIZE(power5p_generic_events),
550 .generic_events = power5p_generic_events, 627 .generic_events = power5p_generic_events,
628 .limited_pmc5_6 = 1,
629 .limited_pmc_event = power5p_limited_pmc_event,
551}; 630};