diff options
author | Will Deacon <will.deacon@arm.com> | 2010-04-30 06:33:33 -0400 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2010-05-17 06:53:57 -0400 |
commit | 49e6a32f2f0876b6267584d9c7e0e213bca6e2b8 (patch) | |
tree | 4bf0191baf59163878396678dc514d519315aac3 /arch/arm/kernel/perf_event.c | |
parent | 181193f398e7d8da6b1196138f0e219709621743 (diff) |
ARM: 6070/1: perf-events: add support for xscale PMUs
The perf-events framework for ARM only supports v6 and v7 cores.
This patch adds support for xscale v1 and v2 PMUs to perf, based on the
OProfile drivers in arch/arm/oprofile/op_model_xscale.c
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/kernel/perf_event.c')
-rw-r--r-- | arch/arm/kernel/perf_event.c | 827 |
1 files changed, 821 insertions, 6 deletions
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 10a0bcdf2158..381f1211233e 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
@@ -2108,6 +2108,803 @@ static u32 __init armv7_reset_read_pmnc(void) | |||
2108 | return nb_cnt + 1; | 2108 | return nb_cnt + 1; |
2109 | } | 2109 | } |
2110 | 2110 | ||
2111 | /* | ||
2112 | * ARMv5 [xscale] Performance counter handling code. | ||
2113 | * | ||
2114 | * Based on xscale OProfile code. | ||
2115 | * | ||
2116 | * There are two variants of the xscale PMU that we support: | ||
2117 | * - xscale1pmu: 2 event counters and a cycle counter | ||
2118 | * - xscale2pmu: 4 event counters and a cycle counter | ||
2119 | * The two variants share event definitions, but have different | ||
2120 | * PMU structures. | ||
2121 | */ | ||
2122 | |||
2123 | enum xscale_perf_types { | ||
2124 | XSCALE_PERFCTR_ICACHE_MISS = 0x00, | ||
2125 | XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, | ||
2126 | XSCALE_PERFCTR_DATA_STALL = 0x02, | ||
2127 | XSCALE_PERFCTR_ITLB_MISS = 0x03, | ||
2128 | XSCALE_PERFCTR_DTLB_MISS = 0x04, | ||
2129 | XSCALE_PERFCTR_BRANCH = 0x05, | ||
2130 | XSCALE_PERFCTR_BRANCH_MISS = 0x06, | ||
2131 | XSCALE_PERFCTR_INSTRUCTION = 0x07, | ||
2132 | XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, | ||
2133 | XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, | ||
2134 | XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, | ||
2135 | XSCALE_PERFCTR_DCACHE_MISS = 0x0B, | ||
2136 | XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, | ||
2137 | XSCALE_PERFCTR_PC_CHANGED = 0x0D, | ||
2138 | XSCALE_PERFCTR_BCU_REQUEST = 0x10, | ||
2139 | XSCALE_PERFCTR_BCU_FULL = 0x11, | ||
2140 | XSCALE_PERFCTR_BCU_DRAIN = 0x12, | ||
2141 | XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, | ||
2142 | XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, | ||
2143 | XSCALE_PERFCTR_RMW = 0x16, | ||
2144 | /* XSCALE_PERFCTR_CCNT is not hardware defined */ | ||
2145 | XSCALE_PERFCTR_CCNT = 0xFE, | ||
2146 | XSCALE_PERFCTR_UNUSED = 0xFF, | ||
2147 | }; | ||
2148 | |||
2149 | enum xscale_counters { | ||
2150 | XSCALE_CYCLE_COUNTER = 1, | ||
2151 | XSCALE_COUNTER0, | ||
2152 | XSCALE_COUNTER1, | ||
2153 | XSCALE_COUNTER2, | ||
2154 | XSCALE_COUNTER3, | ||
2155 | }; | ||
2156 | |||
2157 | static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { | ||
2158 | [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, | ||
2159 | [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, | ||
2160 | [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, | ||
2161 | [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, | ||
2162 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, | ||
2163 | [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, | ||
2164 | [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, | ||
2165 | }; | ||
2166 | |||
2167 | static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] | ||
2168 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
2169 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
2170 | [C(L1D)] = { | ||
2171 | [C(OP_READ)] = { | ||
2172 | [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, | ||
2173 | [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, | ||
2174 | }, | ||
2175 | [C(OP_WRITE)] = { | ||
2176 | [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, | ||
2177 | [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, | ||
2178 | }, | ||
2179 | [C(OP_PREFETCH)] = { | ||
2180 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2181 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2182 | }, | ||
2183 | }, | ||
2184 | [C(L1I)] = { | ||
2185 | [C(OP_READ)] = { | ||
2186 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2187 | [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, | ||
2188 | }, | ||
2189 | [C(OP_WRITE)] = { | ||
2190 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2191 | [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, | ||
2192 | }, | ||
2193 | [C(OP_PREFETCH)] = { | ||
2194 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2195 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2196 | }, | ||
2197 | }, | ||
2198 | [C(LL)] = { | ||
2199 | [C(OP_READ)] = { | ||
2200 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2201 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2202 | }, | ||
2203 | [C(OP_WRITE)] = { | ||
2204 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2205 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2206 | }, | ||
2207 | [C(OP_PREFETCH)] = { | ||
2208 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2209 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2210 | }, | ||
2211 | }, | ||
2212 | [C(DTLB)] = { | ||
2213 | [C(OP_READ)] = { | ||
2214 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2215 | [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, | ||
2216 | }, | ||
2217 | [C(OP_WRITE)] = { | ||
2218 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2219 | [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, | ||
2220 | }, | ||
2221 | [C(OP_PREFETCH)] = { | ||
2222 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2223 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2224 | }, | ||
2225 | }, | ||
2226 | [C(ITLB)] = { | ||
2227 | [C(OP_READ)] = { | ||
2228 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2229 | [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, | ||
2230 | }, | ||
2231 | [C(OP_WRITE)] = { | ||
2232 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2233 | [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, | ||
2234 | }, | ||
2235 | [C(OP_PREFETCH)] = { | ||
2236 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2237 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2238 | }, | ||
2239 | }, | ||
2240 | [C(BPU)] = { | ||
2241 | [C(OP_READ)] = { | ||
2242 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2243 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2244 | }, | ||
2245 | [C(OP_WRITE)] = { | ||
2246 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2247 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2248 | }, | ||
2249 | [C(OP_PREFETCH)] = { | ||
2250 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2251 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2252 | }, | ||
2253 | }, | ||
2254 | }; | ||
2255 | |||
2256 | #define XSCALE_PMU_ENABLE 0x001 | ||
2257 | #define XSCALE_PMN_RESET 0x002 | ||
2258 | #define XSCALE_CCNT_RESET 0x004 | ||
2259 | #define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) | ||
2260 | #define XSCALE_PMU_CNT64 0x008 | ||
2261 | |||
2262 | static inline int | ||
2263 | xscalepmu_event_map(int config) | ||
2264 | { | ||
2265 | int mapping = xscale_perf_map[config]; | ||
2266 | if (HW_OP_UNSUPPORTED == mapping) | ||
2267 | mapping = -EOPNOTSUPP; | ||
2268 | return mapping; | ||
2269 | } | ||
2270 | |||
2271 | static u64 | ||
2272 | xscalepmu_raw_event(u64 config) | ||
2273 | { | ||
2274 | return config & 0xff; | ||
2275 | } | ||
2276 | |||
2277 | #define XSCALE1_OVERFLOWED_MASK 0x700 | ||
2278 | #define XSCALE1_CCOUNT_OVERFLOW 0x400 | ||
2279 | #define XSCALE1_COUNT0_OVERFLOW 0x100 | ||
2280 | #define XSCALE1_COUNT1_OVERFLOW 0x200 | ||
2281 | #define XSCALE1_CCOUNT_INT_EN 0x040 | ||
2282 | #define XSCALE1_COUNT0_INT_EN 0x010 | ||
2283 | #define XSCALE1_COUNT1_INT_EN 0x020 | ||
2284 | #define XSCALE1_COUNT0_EVT_SHFT 12 | ||
2285 | #define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) | ||
2286 | #define XSCALE1_COUNT1_EVT_SHFT 20 | ||
2287 | #define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) | ||
2288 | |||
2289 | static inline u32 | ||
2290 | xscale1pmu_read_pmnc(void) | ||
2291 | { | ||
2292 | u32 val; | ||
2293 | asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); | ||
2294 | return val; | ||
2295 | } | ||
2296 | |||
2297 | static inline void | ||
2298 | xscale1pmu_write_pmnc(u32 val) | ||
2299 | { | ||
2300 | /* upper 4bits and 7, 11 are write-as-0 */ | ||
2301 | val &= 0xffff77f; | ||
2302 | asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); | ||
2303 | } | ||
2304 | |||
2305 | static inline int | ||
2306 | xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, | ||
2307 | enum xscale_counters counter) | ||
2308 | { | ||
2309 | int ret = 0; | ||
2310 | |||
2311 | switch (counter) { | ||
2312 | case XSCALE_CYCLE_COUNTER: | ||
2313 | ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; | ||
2314 | break; | ||
2315 | case XSCALE_COUNTER0: | ||
2316 | ret = pmnc & XSCALE1_COUNT0_OVERFLOW; | ||
2317 | break; | ||
2318 | case XSCALE_COUNTER1: | ||
2319 | ret = pmnc & XSCALE1_COUNT1_OVERFLOW; | ||
2320 | break; | ||
2321 | default: | ||
2322 | WARN_ONCE(1, "invalid counter number (%d)\n", counter); | ||
2323 | } | ||
2324 | |||
2325 | return ret; | ||
2326 | } | ||
2327 | |||
2328 | static irqreturn_t | ||
2329 | xscale1pmu_handle_irq(int irq_num, void *dev) | ||
2330 | { | ||
2331 | unsigned long pmnc; | ||
2332 | struct perf_sample_data data; | ||
2333 | struct cpu_hw_events *cpuc; | ||
2334 | struct pt_regs *regs; | ||
2335 | int idx; | ||
2336 | |||
2337 | /* | ||
2338 | * NOTE: there's an A stepping erratum that states if an overflow | ||
2339 | * bit already exists and another occurs, the previous | ||
2340 | * Overflow bit gets cleared. There's no workaround. | ||
2341 | * Fixed in B stepping or later. | ||
2342 | */ | ||
2343 | pmnc = xscale1pmu_read_pmnc(); | ||
2344 | |||
2345 | /* | ||
2346 | * Write the value back to clear the overflow flags. Overflow | ||
2347 | * flags remain in pmnc for use below. We also disable the PMU | ||
2348 | * while we process the interrupt. | ||
2349 | */ | ||
2350 | xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); | ||
2351 | |||
2352 | if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) | ||
2353 | return IRQ_NONE; | ||
2354 | |||
2355 | regs = get_irq_regs(); | ||
2356 | |||
2357 | perf_sample_data_init(&data, 0); | ||
2358 | |||
2359 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
2360 | for (idx = 0; idx <= armpmu->num_events; ++idx) { | ||
2361 | struct perf_event *event = cpuc->events[idx]; | ||
2362 | struct hw_perf_event *hwc; | ||
2363 | |||
2364 | if (!test_bit(idx, cpuc->active_mask)) | ||
2365 | continue; | ||
2366 | |||
2367 | if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) | ||
2368 | continue; | ||
2369 | |||
2370 | hwc = &event->hw; | ||
2371 | armpmu_event_update(event, hwc, idx); | ||
2372 | data.period = event->hw.last_period; | ||
2373 | if (!armpmu_event_set_period(event, hwc, idx)) | ||
2374 | continue; | ||
2375 | |||
2376 | if (perf_event_overflow(event, 0, &data, regs)) | ||
2377 | armpmu->disable(hwc, idx); | ||
2378 | } | ||
2379 | |||
2380 | perf_event_do_pending(); | ||
2381 | |||
2382 | /* | ||
2383 | * Re-enable the PMU. | ||
2384 | */ | ||
2385 | pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; | ||
2386 | xscale1pmu_write_pmnc(pmnc); | ||
2387 | |||
2388 | return IRQ_HANDLED; | ||
2389 | } | ||
2390 | |||
2391 | static void | ||
2392 | xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
2393 | { | ||
2394 | unsigned long val, mask, evt, flags; | ||
2395 | |||
2396 | switch (idx) { | ||
2397 | case XSCALE_CYCLE_COUNTER: | ||
2398 | mask = 0; | ||
2399 | evt = XSCALE1_CCOUNT_INT_EN; | ||
2400 | break; | ||
2401 | case XSCALE_COUNTER0: | ||
2402 | mask = XSCALE1_COUNT0_EVT_MASK; | ||
2403 | evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | | ||
2404 | XSCALE1_COUNT0_INT_EN; | ||
2405 | break; | ||
2406 | case XSCALE_COUNTER1: | ||
2407 | mask = XSCALE1_COUNT1_EVT_MASK; | ||
2408 | evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | | ||
2409 | XSCALE1_COUNT1_INT_EN; | ||
2410 | break; | ||
2411 | default: | ||
2412 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
2413 | return; | ||
2414 | } | ||
2415 | |||
2416 | spin_lock_irqsave(&pmu_lock, flags); | ||
2417 | val = xscale1pmu_read_pmnc(); | ||
2418 | val &= ~mask; | ||
2419 | val |= evt; | ||
2420 | xscale1pmu_write_pmnc(val); | ||
2421 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2422 | } | ||
2423 | |||
2424 | static void | ||
2425 | xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
2426 | { | ||
2427 | unsigned long val, mask, evt, flags; | ||
2428 | |||
2429 | switch (idx) { | ||
2430 | case XSCALE_CYCLE_COUNTER: | ||
2431 | mask = XSCALE1_CCOUNT_INT_EN; | ||
2432 | evt = 0; | ||
2433 | break; | ||
2434 | case XSCALE_COUNTER0: | ||
2435 | mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; | ||
2436 | evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; | ||
2437 | break; | ||
2438 | case XSCALE_COUNTER1: | ||
2439 | mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; | ||
2440 | evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; | ||
2441 | break; | ||
2442 | default: | ||
2443 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
2444 | return; | ||
2445 | } | ||
2446 | |||
2447 | spin_lock_irqsave(&pmu_lock, flags); | ||
2448 | val = xscale1pmu_read_pmnc(); | ||
2449 | val &= ~mask; | ||
2450 | val |= evt; | ||
2451 | xscale1pmu_write_pmnc(val); | ||
2452 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2453 | } | ||
2454 | |||
2455 | static int | ||
2456 | xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, | ||
2457 | struct hw_perf_event *event) | ||
2458 | { | ||
2459 | if (XSCALE_PERFCTR_CCNT == event->config_base) { | ||
2460 | if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) | ||
2461 | return -EAGAIN; | ||
2462 | |||
2463 | return XSCALE_CYCLE_COUNTER; | ||
2464 | } else { | ||
2465 | if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) { | ||
2466 | return XSCALE_COUNTER1; | ||
2467 | } | ||
2468 | |||
2469 | if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) { | ||
2470 | return XSCALE_COUNTER0; | ||
2471 | } | ||
2472 | |||
2473 | return -EAGAIN; | ||
2474 | } | ||
2475 | } | ||
2476 | |||
2477 | static void | ||
2478 | xscale1pmu_start(void) | ||
2479 | { | ||
2480 | unsigned long flags, val; | ||
2481 | |||
2482 | spin_lock_irqsave(&pmu_lock, flags); | ||
2483 | val = xscale1pmu_read_pmnc(); | ||
2484 | val |= XSCALE_PMU_ENABLE; | ||
2485 | xscale1pmu_write_pmnc(val); | ||
2486 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2487 | } | ||
2488 | |||
2489 | static void | ||
2490 | xscale1pmu_stop(void) | ||
2491 | { | ||
2492 | unsigned long flags, val; | ||
2493 | |||
2494 | spin_lock_irqsave(&pmu_lock, flags); | ||
2495 | val = xscale1pmu_read_pmnc(); | ||
2496 | val &= ~XSCALE_PMU_ENABLE; | ||
2497 | xscale1pmu_write_pmnc(val); | ||
2498 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2499 | } | ||
2500 | |||
2501 | static inline u32 | ||
2502 | xscale1pmu_read_counter(int counter) | ||
2503 | { | ||
2504 | u32 val = 0; | ||
2505 | |||
2506 | switch (counter) { | ||
2507 | case XSCALE_CYCLE_COUNTER: | ||
2508 | asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); | ||
2509 | break; | ||
2510 | case XSCALE_COUNTER0: | ||
2511 | asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); | ||
2512 | break; | ||
2513 | case XSCALE_COUNTER1: | ||
2514 | asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); | ||
2515 | break; | ||
2516 | } | ||
2517 | |||
2518 | return val; | ||
2519 | } | ||
2520 | |||
2521 | static inline void | ||
2522 | xscale1pmu_write_counter(int counter, u32 val) | ||
2523 | { | ||
2524 | switch (counter) { | ||
2525 | case XSCALE_CYCLE_COUNTER: | ||
2526 | asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); | ||
2527 | break; | ||
2528 | case XSCALE_COUNTER0: | ||
2529 | asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); | ||
2530 | break; | ||
2531 | case XSCALE_COUNTER1: | ||
2532 | asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); | ||
2533 | break; | ||
2534 | } | ||
2535 | } | ||
2536 | |||
2537 | static const struct arm_pmu xscale1pmu = { | ||
2538 | .id = ARM_PERF_PMU_ID_XSCALE1, | ||
2539 | .handle_irq = xscale1pmu_handle_irq, | ||
2540 | .enable = xscale1pmu_enable_event, | ||
2541 | .disable = xscale1pmu_disable_event, | ||
2542 | .event_map = xscalepmu_event_map, | ||
2543 | .raw_event = xscalepmu_raw_event, | ||
2544 | .read_counter = xscale1pmu_read_counter, | ||
2545 | .write_counter = xscale1pmu_write_counter, | ||
2546 | .get_event_idx = xscale1pmu_get_event_idx, | ||
2547 | .start = xscale1pmu_start, | ||
2548 | .stop = xscale1pmu_stop, | ||
2549 | .num_events = 3, | ||
2550 | .max_period = (1LLU << 32) - 1, | ||
2551 | }; | ||
2552 | |||
2553 | #define XSCALE2_OVERFLOWED_MASK 0x01f | ||
2554 | #define XSCALE2_CCOUNT_OVERFLOW 0x001 | ||
2555 | #define XSCALE2_COUNT0_OVERFLOW 0x002 | ||
2556 | #define XSCALE2_COUNT1_OVERFLOW 0x004 | ||
2557 | #define XSCALE2_COUNT2_OVERFLOW 0x008 | ||
2558 | #define XSCALE2_COUNT3_OVERFLOW 0x010 | ||
2559 | #define XSCALE2_CCOUNT_INT_EN 0x001 | ||
2560 | #define XSCALE2_COUNT0_INT_EN 0x002 | ||
2561 | #define XSCALE2_COUNT1_INT_EN 0x004 | ||
2562 | #define XSCALE2_COUNT2_INT_EN 0x008 | ||
2563 | #define XSCALE2_COUNT3_INT_EN 0x010 | ||
2564 | #define XSCALE2_COUNT0_EVT_SHFT 0 | ||
2565 | #define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) | ||
2566 | #define XSCALE2_COUNT1_EVT_SHFT 8 | ||
2567 | #define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) | ||
2568 | #define XSCALE2_COUNT2_EVT_SHFT 16 | ||
2569 | #define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) | ||
2570 | #define XSCALE2_COUNT3_EVT_SHFT 24 | ||
2571 | #define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) | ||
2572 | |||
2573 | static inline u32 | ||
2574 | xscale2pmu_read_pmnc(void) | ||
2575 | { | ||
2576 | u32 val; | ||
2577 | asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); | ||
2578 | /* bits 1-2 and 4-23 are read-unpredictable */ | ||
2579 | return val & 0xff000009; | ||
2580 | } | ||
2581 | |||
2582 | static inline void | ||
2583 | xscale2pmu_write_pmnc(u32 val) | ||
2584 | { | ||
2585 | /* bits 4-23 are write-as-0, 24-31 are write ignored */ | ||
2586 | val &= 0xf; | ||
2587 | asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); | ||
2588 | } | ||
2589 | |||
2590 | static inline u32 | ||
2591 | xscale2pmu_read_overflow_flags(void) | ||
2592 | { | ||
2593 | u32 val; | ||
2594 | asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); | ||
2595 | return val; | ||
2596 | } | ||
2597 | |||
2598 | static inline void | ||
2599 | xscale2pmu_write_overflow_flags(u32 val) | ||
2600 | { | ||
2601 | asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); | ||
2602 | } | ||
2603 | |||
2604 | static inline u32 | ||
2605 | xscale2pmu_read_event_select(void) | ||
2606 | { | ||
2607 | u32 val; | ||
2608 | asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); | ||
2609 | return val; | ||
2610 | } | ||
2611 | |||
2612 | static inline void | ||
2613 | xscale2pmu_write_event_select(u32 val) | ||
2614 | { | ||
2615 | asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); | ||
2616 | } | ||
2617 | |||
2618 | static inline u32 | ||
2619 | xscale2pmu_read_int_enable(void) | ||
2620 | { | ||
2621 | u32 val; | ||
2622 | asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); | ||
2623 | return val; | ||
2624 | } | ||
2625 | |||
2626 | static void | ||
2627 | xscale2pmu_write_int_enable(u32 val) | ||
2628 | { | ||
2629 | asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); | ||
2630 | } | ||
2631 | |||
2632 | static inline int | ||
2633 | xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, | ||
2634 | enum xscale_counters counter) | ||
2635 | { | ||
2636 | int ret = 0; | ||
2637 | |||
2638 | switch (counter) { | ||
2639 | case XSCALE_CYCLE_COUNTER: | ||
2640 | ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; | ||
2641 | break; | ||
2642 | case XSCALE_COUNTER0: | ||
2643 | ret = of_flags & XSCALE2_COUNT0_OVERFLOW; | ||
2644 | break; | ||
2645 | case XSCALE_COUNTER1: | ||
2646 | ret = of_flags & XSCALE2_COUNT1_OVERFLOW; | ||
2647 | break; | ||
2648 | case XSCALE_COUNTER2: | ||
2649 | ret = of_flags & XSCALE2_COUNT2_OVERFLOW; | ||
2650 | break; | ||
2651 | case XSCALE_COUNTER3: | ||
2652 | ret = of_flags & XSCALE2_COUNT3_OVERFLOW; | ||
2653 | break; | ||
2654 | default: | ||
2655 | WARN_ONCE(1, "invalid counter number (%d)\n", counter); | ||
2656 | } | ||
2657 | |||
2658 | return ret; | ||
2659 | } | ||
2660 | |||
2661 | static irqreturn_t | ||
2662 | xscale2pmu_handle_irq(int irq_num, void *dev) | ||
2663 | { | ||
2664 | unsigned long pmnc, of_flags; | ||
2665 | struct perf_sample_data data; | ||
2666 | struct cpu_hw_events *cpuc; | ||
2667 | struct pt_regs *regs; | ||
2668 | int idx; | ||
2669 | |||
2670 | /* Disable the PMU. */ | ||
2671 | pmnc = xscale2pmu_read_pmnc(); | ||
2672 | xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); | ||
2673 | |||
2674 | /* Check the overflow flag register. */ | ||
2675 | of_flags = xscale2pmu_read_overflow_flags(); | ||
2676 | if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) | ||
2677 | return IRQ_NONE; | ||
2678 | |||
2679 | /* Clear the overflow bits. */ | ||
2680 | xscale2pmu_write_overflow_flags(of_flags); | ||
2681 | |||
2682 | regs = get_irq_regs(); | ||
2683 | |||
2684 | perf_sample_data_init(&data, 0); | ||
2685 | |||
2686 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
2687 | for (idx = 0; idx <= armpmu->num_events; ++idx) { | ||
2688 | struct perf_event *event = cpuc->events[idx]; | ||
2689 | struct hw_perf_event *hwc; | ||
2690 | |||
2691 | if (!test_bit(idx, cpuc->active_mask)) | ||
2692 | continue; | ||
2693 | |||
2694 | if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) | ||
2695 | continue; | ||
2696 | |||
2697 | hwc = &event->hw; | ||
2698 | armpmu_event_update(event, hwc, idx); | ||
2699 | data.period = event->hw.last_period; | ||
2700 | if (!armpmu_event_set_period(event, hwc, idx)) | ||
2701 | continue; | ||
2702 | |||
2703 | if (perf_event_overflow(event, 0, &data, regs)) | ||
2704 | armpmu->disable(hwc, idx); | ||
2705 | } | ||
2706 | |||
2707 | perf_event_do_pending(); | ||
2708 | |||
2709 | /* | ||
2710 | * Re-enable the PMU. | ||
2711 | */ | ||
2712 | pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; | ||
2713 | xscale2pmu_write_pmnc(pmnc); | ||
2714 | |||
2715 | return IRQ_HANDLED; | ||
2716 | } | ||
2717 | |||
2718 | static void | ||
2719 | xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
2720 | { | ||
2721 | unsigned long flags, ien, evtsel; | ||
2722 | |||
2723 | ien = xscale2pmu_read_int_enable(); | ||
2724 | evtsel = xscale2pmu_read_event_select(); | ||
2725 | |||
2726 | switch (idx) { | ||
2727 | case XSCALE_CYCLE_COUNTER: | ||
2728 | ien |= XSCALE2_CCOUNT_INT_EN; | ||
2729 | break; | ||
2730 | case XSCALE_COUNTER0: | ||
2731 | ien |= XSCALE2_COUNT0_INT_EN; | ||
2732 | evtsel &= ~XSCALE2_COUNT0_EVT_MASK; | ||
2733 | evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; | ||
2734 | break; | ||
2735 | case XSCALE_COUNTER1: | ||
2736 | ien |= XSCALE2_COUNT1_INT_EN; | ||
2737 | evtsel &= ~XSCALE2_COUNT1_EVT_MASK; | ||
2738 | evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; | ||
2739 | break; | ||
2740 | case XSCALE_COUNTER2: | ||
2741 | ien |= XSCALE2_COUNT2_INT_EN; | ||
2742 | evtsel &= ~XSCALE2_COUNT2_EVT_MASK; | ||
2743 | evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; | ||
2744 | break; | ||
2745 | case XSCALE_COUNTER3: | ||
2746 | ien |= XSCALE2_COUNT3_INT_EN; | ||
2747 | evtsel &= ~XSCALE2_COUNT3_EVT_MASK; | ||
2748 | evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; | ||
2749 | break; | ||
2750 | default: | ||
2751 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
2752 | return; | ||
2753 | } | ||
2754 | |||
2755 | spin_lock_irqsave(&pmu_lock, flags); | ||
2756 | xscale2pmu_write_event_select(evtsel); | ||
2757 | xscale2pmu_write_int_enable(ien); | ||
2758 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2759 | } | ||
2760 | |||
2761 | static void | ||
2762 | xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
2763 | { | ||
2764 | unsigned long flags, ien, evtsel; | ||
2765 | |||
2766 | ien = xscale2pmu_read_int_enable(); | ||
2767 | evtsel = xscale2pmu_read_event_select(); | ||
2768 | |||
2769 | switch (idx) { | ||
2770 | case XSCALE_CYCLE_COUNTER: | ||
2771 | ien &= ~XSCALE2_CCOUNT_INT_EN; | ||
2772 | break; | ||
2773 | case XSCALE_COUNTER0: | ||
2774 | ien &= ~XSCALE2_COUNT0_INT_EN; | ||
2775 | evtsel &= ~XSCALE2_COUNT0_EVT_MASK; | ||
2776 | evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; | ||
2777 | break; | ||
2778 | case XSCALE_COUNTER1: | ||
2779 | ien &= ~XSCALE2_COUNT1_INT_EN; | ||
2780 | evtsel &= ~XSCALE2_COUNT1_EVT_MASK; | ||
2781 | evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; | ||
2782 | break; | ||
2783 | case XSCALE_COUNTER2: | ||
2784 | ien &= ~XSCALE2_COUNT2_INT_EN; | ||
2785 | evtsel &= ~XSCALE2_COUNT2_EVT_MASK; | ||
2786 | evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; | ||
2787 | break; | ||
2788 | case XSCALE_COUNTER3: | ||
2789 | ien &= ~XSCALE2_COUNT3_INT_EN; | ||
2790 | evtsel &= ~XSCALE2_COUNT3_EVT_MASK; | ||
2791 | evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; | ||
2792 | break; | ||
2793 | default: | ||
2794 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
2795 | return; | ||
2796 | } | ||
2797 | |||
2798 | spin_lock_irqsave(&pmu_lock, flags); | ||
2799 | xscale2pmu_write_event_select(evtsel); | ||
2800 | xscale2pmu_write_int_enable(ien); | ||
2801 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2802 | } | ||
2803 | |||
2804 | static int | ||
2805 | xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, | ||
2806 | struct hw_perf_event *event) | ||
2807 | { | ||
2808 | int idx = xscale1pmu_get_event_idx(cpuc, event); | ||
2809 | if (idx >= 0) | ||
2810 | goto out; | ||
2811 | |||
2812 | if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) | ||
2813 | idx = XSCALE_COUNTER3; | ||
2814 | else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) | ||
2815 | idx = XSCALE_COUNTER2; | ||
2816 | out: | ||
2817 | return idx; | ||
2818 | } | ||
2819 | |||
2820 | static void | ||
2821 | xscale2pmu_start(void) | ||
2822 | { | ||
2823 | unsigned long flags, val; | ||
2824 | |||
2825 | spin_lock_irqsave(&pmu_lock, flags); | ||
2826 | val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; | ||
2827 | val |= XSCALE_PMU_ENABLE; | ||
2828 | xscale2pmu_write_pmnc(val); | ||
2829 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2830 | } | ||
2831 | |||
2832 | static void | ||
2833 | xscale2pmu_stop(void) | ||
2834 | { | ||
2835 | unsigned long flags, val; | ||
2836 | |||
2837 | spin_lock_irqsave(&pmu_lock, flags); | ||
2838 | val = xscale2pmu_read_pmnc(); | ||
2839 | val &= ~XSCALE_PMU_ENABLE; | ||
2840 | xscale2pmu_write_pmnc(val); | ||
2841 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2842 | } | ||
2843 | |||
2844 | static inline u32 | ||
2845 | xscale2pmu_read_counter(int counter) | ||
2846 | { | ||
2847 | u32 val = 0; | ||
2848 | |||
2849 | switch (counter) { | ||
2850 | case XSCALE_CYCLE_COUNTER: | ||
2851 | asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); | ||
2852 | break; | ||
2853 | case XSCALE_COUNTER0: | ||
2854 | asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); | ||
2855 | break; | ||
2856 | case XSCALE_COUNTER1: | ||
2857 | asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); | ||
2858 | break; | ||
2859 | case XSCALE_COUNTER2: | ||
2860 | asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); | ||
2861 | break; | ||
2862 | case XSCALE_COUNTER3: | ||
2863 | asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); | ||
2864 | break; | ||
2865 | } | ||
2866 | |||
2867 | return val; | ||
2868 | } | ||
2869 | |||
2870 | static inline void | ||
2871 | xscale2pmu_write_counter(int counter, u32 val) | ||
2872 | { | ||
2873 | switch (counter) { | ||
2874 | case XSCALE_CYCLE_COUNTER: | ||
2875 | asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); | ||
2876 | break; | ||
2877 | case XSCALE_COUNTER0: | ||
2878 | asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); | ||
2879 | break; | ||
2880 | case XSCALE_COUNTER1: | ||
2881 | asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); | ||
2882 | break; | ||
2883 | case XSCALE_COUNTER2: | ||
2884 | asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); | ||
2885 | break; | ||
2886 | case XSCALE_COUNTER3: | ||
2887 | asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); | ||
2888 | break; | ||
2889 | } | ||
2890 | } | ||
2891 | |||
2892 | static const struct arm_pmu xscale2pmu = { | ||
2893 | .id = ARM_PERF_PMU_ID_XSCALE2, | ||
2894 | .handle_irq = xscale2pmu_handle_irq, | ||
2895 | .enable = xscale2pmu_enable_event, | ||
2896 | .disable = xscale2pmu_disable_event, | ||
2897 | .event_map = xscalepmu_event_map, | ||
2898 | .raw_event = xscalepmu_raw_event, | ||
2899 | .read_counter = xscale2pmu_read_counter, | ||
2900 | .write_counter = xscale2pmu_write_counter, | ||
2901 | .get_event_idx = xscale2pmu_get_event_idx, | ||
2902 | .start = xscale2pmu_start, | ||
2903 | .stop = xscale2pmu_stop, | ||
2904 | .num_events = 5, | ||
2905 | .max_period = (1LLU << 32) - 1, | ||
2906 | }; | ||
2907 | |||
2111 | static int __init | 2908 | static int __init |
2112 | init_hw_perf_events(void) | 2909 | init_hw_perf_events(void) |
2113 | { | 2910 | { |
@@ -2115,7 +2912,7 @@ init_hw_perf_events(void) | |||
2115 | unsigned long implementor = (cpuid & 0xFF000000) >> 24; | 2912 | unsigned long implementor = (cpuid & 0xFF000000) >> 24; |
2116 | unsigned long part_number = (cpuid & 0xFFF0); | 2913 | unsigned long part_number = (cpuid & 0xFFF0); |
2117 | 2914 | ||
2118 | /* We only support ARM CPUs implemented by ARM at the moment. */ | 2915 | /* ARM Ltd CPUs. */ |
2119 | if (0x41 == implementor) { | 2916 | if (0x41 == implementor) { |
2120 | switch (part_number) { | 2917 | switch (part_number) { |
2121 | case 0xB360: /* ARM1136 */ | 2918 | case 0xB360: /* ARM1136 */ |
@@ -2157,15 +2954,33 @@ init_hw_perf_events(void) | |||
2157 | armv7pmu.num_events = armv7_reset_read_pmnc(); | 2954 | armv7pmu.num_events = armv7_reset_read_pmnc(); |
2158 | perf_max_events = armv7pmu.num_events; | 2955 | perf_max_events = armv7pmu.num_events; |
2159 | break; | 2956 | break; |
2160 | default: | 2957 | } |
2161 | pr_info("no hardware support available\n"); | 2958 | /* Intel CPUs [xscale]. */ |
2162 | perf_max_events = -1; | 2959 | } else if (0x69 == implementor) { |
2960 | part_number = (cpuid >> 13) & 0x7; | ||
2961 | switch (part_number) { | ||
2962 | case 1: | ||
2963 | armpmu = &xscale1pmu; | ||
2964 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, | ||
2965 | sizeof(xscale_perf_cache_map)); | ||
2966 | perf_max_events = xscale1pmu.num_events; | ||
2967 | break; | ||
2968 | case 2: | ||
2969 | armpmu = &xscale2pmu; | ||
2970 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, | ||
2971 | sizeof(xscale_perf_cache_map)); | ||
2972 | perf_max_events = xscale2pmu.num_events; | ||
2973 | break; | ||
2163 | } | 2974 | } |
2164 | } | 2975 | } |
2165 | 2976 | ||
2166 | if (armpmu) | 2977 | if (armpmu) { |
2167 | pr_info("enabled with %s PMU driver, %d counters available\n", | 2978 | pr_info("enabled with %s PMU driver, %d counters available\n", |
2168 | arm_pmu_names[armpmu->id], armpmu->num_events); | 2979 | arm_pmu_names[armpmu->id], armpmu->num_events); |
2980 | } else { | ||
2981 | pr_info("no hardware support available\n"); | ||
2982 | perf_max_events = -1; | ||
2983 | } | ||
2169 | 2984 | ||
2170 | return 0; | 2985 | return 0; |
2171 | } | 2986 | } |