aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2010-04-30 06:33:33 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2010-05-17 06:53:57 -0400
commit49e6a32f2f0876b6267584d9c7e0e213bca6e2b8 (patch)
tree4bf0191baf59163878396678dc514d519315aac3
parent181193f398e7d8da6b1196138f0e219709621743 (diff)
ARM: 6070/1: perf-events: add support for xscale PMUs
The perf-events framework for ARM only supports v6 and v7 cores. This patch adds support for xscale v1 and v2 PMUs to perf, based on the OProfile drivers in arch/arm/oprofile/op_model_xscale.c Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
-rw-r--r--arch/arm/kernel/perf_event.c827
1 files changed, 821 insertions, 6 deletions
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 10a0bcdf2158..381f1211233e 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -2108,6 +2108,803 @@ static u32 __init armv7_reset_read_pmnc(void)
2108 return nb_cnt + 1; 2108 return nb_cnt + 1;
2109} 2109}
2110 2110
2111/*
2112 * ARMv5 [xscale] Performance counter handling code.
2113 *
2114 * Based on xscale OProfile code.
2115 *
2116 * There are two variants of the xscale PMU that we support:
2117 * - xscale1pmu: 2 event counters and a cycle counter
2118 * - xscale2pmu: 4 event counters and a cycle counter
2119 * The two variants share event definitions, but have different
2120 * PMU structures.
2121 */
2122
2123enum xscale_perf_types {
2124 XSCALE_PERFCTR_ICACHE_MISS = 0x00,
2125 XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
2126 XSCALE_PERFCTR_DATA_STALL = 0x02,
2127 XSCALE_PERFCTR_ITLB_MISS = 0x03,
2128 XSCALE_PERFCTR_DTLB_MISS = 0x04,
2129 XSCALE_PERFCTR_BRANCH = 0x05,
2130 XSCALE_PERFCTR_BRANCH_MISS = 0x06,
2131 XSCALE_PERFCTR_INSTRUCTION = 0x07,
2132 XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08,
2133 XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
2134 XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A,
2135 XSCALE_PERFCTR_DCACHE_MISS = 0x0B,
2136 XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C,
2137 XSCALE_PERFCTR_PC_CHANGED = 0x0D,
2138 XSCALE_PERFCTR_BCU_REQUEST = 0x10,
2139 XSCALE_PERFCTR_BCU_FULL = 0x11,
2140 XSCALE_PERFCTR_BCU_DRAIN = 0x12,
2141 XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14,
2142 XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15,
2143 XSCALE_PERFCTR_RMW = 0x16,
2144 /* XSCALE_PERFCTR_CCNT is not hardware defined */
2145 XSCALE_PERFCTR_CCNT = 0xFE,
2146 XSCALE_PERFCTR_UNUSED = 0xFF,
2147};
2148
2149enum xscale_counters {
2150 XSCALE_CYCLE_COUNTER = 1,
2151 XSCALE_COUNTER0,
2152 XSCALE_COUNTER1,
2153 XSCALE_COUNTER2,
2154 XSCALE_COUNTER3,
2155};
2156
2157static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
2158 [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
2159 [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
2160 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
2161 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
2162 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
2163 [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
2164 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
2165};
2166
2167static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
2168 [PERF_COUNT_HW_CACHE_OP_MAX]
2169 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
2170 [C(L1D)] = {
2171 [C(OP_READ)] = {
2172 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
2173 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
2174 },
2175 [C(OP_WRITE)] = {
2176 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
2177 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
2178 },
2179 [C(OP_PREFETCH)] = {
2180 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2181 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2182 },
2183 },
2184 [C(L1I)] = {
2185 [C(OP_READ)] = {
2186 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2187 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
2188 },
2189 [C(OP_WRITE)] = {
2190 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2191 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
2192 },
2193 [C(OP_PREFETCH)] = {
2194 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2195 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2196 },
2197 },
2198 [C(LL)] = {
2199 [C(OP_READ)] = {
2200 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2201 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2202 },
2203 [C(OP_WRITE)] = {
2204 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2205 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2206 },
2207 [C(OP_PREFETCH)] = {
2208 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2209 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2210 },
2211 },
2212 [C(DTLB)] = {
2213 [C(OP_READ)] = {
2214 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2215 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
2216 },
2217 [C(OP_WRITE)] = {
2218 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2219 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
2220 },
2221 [C(OP_PREFETCH)] = {
2222 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2223 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2224 },
2225 },
2226 [C(ITLB)] = {
2227 [C(OP_READ)] = {
2228 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2229 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
2230 },
2231 [C(OP_WRITE)] = {
2232 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2233 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
2234 },
2235 [C(OP_PREFETCH)] = {
2236 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2237 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2238 },
2239 },
2240 [C(BPU)] = {
2241 [C(OP_READ)] = {
2242 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2243 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2244 },
2245 [C(OP_WRITE)] = {
2246 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2247 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2248 },
2249 [C(OP_PREFETCH)] = {
2250 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2251 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2252 },
2253 },
2254};
2255
2256#define XSCALE_PMU_ENABLE 0x001
2257#define XSCALE_PMN_RESET 0x002
2258#define XSCALE_CCNT_RESET 0x004
2259#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
2260#define XSCALE_PMU_CNT64 0x008
2261
2262static inline int
2263xscalepmu_event_map(int config)
2264{
2265 int mapping = xscale_perf_map[config];
2266 if (HW_OP_UNSUPPORTED == mapping)
2267 mapping = -EOPNOTSUPP;
2268 return mapping;
2269}
2270
2271static u64
2272xscalepmu_raw_event(u64 config)
2273{
2274 return config & 0xff;
2275}
2276
2277#define XSCALE1_OVERFLOWED_MASK 0x700
2278#define XSCALE1_CCOUNT_OVERFLOW 0x400
2279#define XSCALE1_COUNT0_OVERFLOW 0x100
2280#define XSCALE1_COUNT1_OVERFLOW 0x200
2281#define XSCALE1_CCOUNT_INT_EN 0x040
2282#define XSCALE1_COUNT0_INT_EN 0x010
2283#define XSCALE1_COUNT1_INT_EN 0x020
2284#define XSCALE1_COUNT0_EVT_SHFT 12
2285#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
2286#define XSCALE1_COUNT1_EVT_SHFT 20
2287#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
2288
2289static inline u32
2290xscale1pmu_read_pmnc(void)
2291{
2292 u32 val;
2293 asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
2294 return val;
2295}
2296
2297static inline void
2298xscale1pmu_write_pmnc(u32 val)
2299{
2300 /* upper 4bits and 7, 11 are write-as-0 */
2301 val &= 0xffff77f;
2302 asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
2303}
2304
2305static inline int
2306xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
2307 enum xscale_counters counter)
2308{
2309 int ret = 0;
2310
2311 switch (counter) {
2312 case XSCALE_CYCLE_COUNTER:
2313 ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
2314 break;
2315 case XSCALE_COUNTER0:
2316 ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
2317 break;
2318 case XSCALE_COUNTER1:
2319 ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
2320 break;
2321 default:
2322 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2323 }
2324
2325 return ret;
2326}
2327
2328static irqreturn_t
2329xscale1pmu_handle_irq(int irq_num, void *dev)
2330{
2331 unsigned long pmnc;
2332 struct perf_sample_data data;
2333 struct cpu_hw_events *cpuc;
2334 struct pt_regs *regs;
2335 int idx;
2336
2337 /*
2338 * NOTE: there's an A stepping erratum that states if an overflow
2339 * bit already exists and another occurs, the previous
2340 * Overflow bit gets cleared. There's no workaround.
2341 * Fixed in B stepping or later.
2342 */
2343 pmnc = xscale1pmu_read_pmnc();
2344
2345 /*
2346 * Write the value back to clear the overflow flags. Overflow
2347 * flags remain in pmnc for use below. We also disable the PMU
2348 * while we process the interrupt.
2349 */
2350 xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2351
2352 if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
2353 return IRQ_NONE;
2354
2355 regs = get_irq_regs();
2356
2357 perf_sample_data_init(&data, 0);
2358
2359 cpuc = &__get_cpu_var(cpu_hw_events);
2360 for (idx = 0; idx <= armpmu->num_events; ++idx) {
2361 struct perf_event *event = cpuc->events[idx];
2362 struct hw_perf_event *hwc;
2363
2364 if (!test_bit(idx, cpuc->active_mask))
2365 continue;
2366
2367 if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
2368 continue;
2369
2370 hwc = &event->hw;
2371 armpmu_event_update(event, hwc, idx);
2372 data.period = event->hw.last_period;
2373 if (!armpmu_event_set_period(event, hwc, idx))
2374 continue;
2375
2376 if (perf_event_overflow(event, 0, &data, regs))
2377 armpmu->disable(hwc, idx);
2378 }
2379
2380 perf_event_do_pending();
2381
2382 /*
2383 * Re-enable the PMU.
2384 */
2385 pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2386 xscale1pmu_write_pmnc(pmnc);
2387
2388 return IRQ_HANDLED;
2389}
2390
2391static void
2392xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
2393{
2394 unsigned long val, mask, evt, flags;
2395
2396 switch (idx) {
2397 case XSCALE_CYCLE_COUNTER:
2398 mask = 0;
2399 evt = XSCALE1_CCOUNT_INT_EN;
2400 break;
2401 case XSCALE_COUNTER0:
2402 mask = XSCALE1_COUNT0_EVT_MASK;
2403 evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
2404 XSCALE1_COUNT0_INT_EN;
2405 break;
2406 case XSCALE_COUNTER1:
2407 mask = XSCALE1_COUNT1_EVT_MASK;
2408 evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
2409 XSCALE1_COUNT1_INT_EN;
2410 break;
2411 default:
2412 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2413 return;
2414 }
2415
2416 spin_lock_irqsave(&pmu_lock, flags);
2417 val = xscale1pmu_read_pmnc();
2418 val &= ~mask;
2419 val |= evt;
2420 xscale1pmu_write_pmnc(val);
2421 spin_unlock_irqrestore(&pmu_lock, flags);
2422}
2423
2424static void
2425xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
2426{
2427 unsigned long val, mask, evt, flags;
2428
2429 switch (idx) {
2430 case XSCALE_CYCLE_COUNTER:
2431 mask = XSCALE1_CCOUNT_INT_EN;
2432 evt = 0;
2433 break;
2434 case XSCALE_COUNTER0:
2435 mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
2436 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
2437 break;
2438 case XSCALE_COUNTER1:
2439 mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
2440 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
2441 break;
2442 default:
2443 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2444 return;
2445 }
2446
2447 spin_lock_irqsave(&pmu_lock, flags);
2448 val = xscale1pmu_read_pmnc();
2449 val &= ~mask;
2450 val |= evt;
2451 xscale1pmu_write_pmnc(val);
2452 spin_unlock_irqrestore(&pmu_lock, flags);
2453}
2454
2455static int
2456xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
2457 struct hw_perf_event *event)
2458{
2459 if (XSCALE_PERFCTR_CCNT == event->config_base) {
2460 if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
2461 return -EAGAIN;
2462
2463 return XSCALE_CYCLE_COUNTER;
2464 } else {
2465 if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) {
2466 return XSCALE_COUNTER1;
2467 }
2468
2469 if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) {
2470 return XSCALE_COUNTER0;
2471 }
2472
2473 return -EAGAIN;
2474 }
2475}
2476
2477static void
2478xscale1pmu_start(void)
2479{
2480 unsigned long flags, val;
2481
2482 spin_lock_irqsave(&pmu_lock, flags);
2483 val = xscale1pmu_read_pmnc();
2484 val |= XSCALE_PMU_ENABLE;
2485 xscale1pmu_write_pmnc(val);
2486 spin_unlock_irqrestore(&pmu_lock, flags);
2487}
2488
2489static void
2490xscale1pmu_stop(void)
2491{
2492 unsigned long flags, val;
2493
2494 spin_lock_irqsave(&pmu_lock, flags);
2495 val = xscale1pmu_read_pmnc();
2496 val &= ~XSCALE_PMU_ENABLE;
2497 xscale1pmu_write_pmnc(val);
2498 spin_unlock_irqrestore(&pmu_lock, flags);
2499}
2500
2501static inline u32
2502xscale1pmu_read_counter(int counter)
2503{
2504 u32 val = 0;
2505
2506 switch (counter) {
2507 case XSCALE_CYCLE_COUNTER:
2508 asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
2509 break;
2510 case XSCALE_COUNTER0:
2511 asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
2512 break;
2513 case XSCALE_COUNTER1:
2514 asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
2515 break;
2516 }
2517
2518 return val;
2519}
2520
2521static inline void
2522xscale1pmu_write_counter(int counter, u32 val)
2523{
2524 switch (counter) {
2525 case XSCALE_CYCLE_COUNTER:
2526 asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
2527 break;
2528 case XSCALE_COUNTER0:
2529 asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
2530 break;
2531 case XSCALE_COUNTER1:
2532 asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
2533 break;
2534 }
2535}
2536
2537static const struct arm_pmu xscale1pmu = {
2538 .id = ARM_PERF_PMU_ID_XSCALE1,
2539 .handle_irq = xscale1pmu_handle_irq,
2540 .enable = xscale1pmu_enable_event,
2541 .disable = xscale1pmu_disable_event,
2542 .event_map = xscalepmu_event_map,
2543 .raw_event = xscalepmu_raw_event,
2544 .read_counter = xscale1pmu_read_counter,
2545 .write_counter = xscale1pmu_write_counter,
2546 .get_event_idx = xscale1pmu_get_event_idx,
2547 .start = xscale1pmu_start,
2548 .stop = xscale1pmu_stop,
2549 .num_events = 3,
2550 .max_period = (1LLU << 32) - 1,
2551};
2552
2553#define XSCALE2_OVERFLOWED_MASK 0x01f
2554#define XSCALE2_CCOUNT_OVERFLOW 0x001
2555#define XSCALE2_COUNT0_OVERFLOW 0x002
2556#define XSCALE2_COUNT1_OVERFLOW 0x004
2557#define XSCALE2_COUNT2_OVERFLOW 0x008
2558#define XSCALE2_COUNT3_OVERFLOW 0x010
2559#define XSCALE2_CCOUNT_INT_EN 0x001
2560#define XSCALE2_COUNT0_INT_EN 0x002
2561#define XSCALE2_COUNT1_INT_EN 0x004
2562#define XSCALE2_COUNT2_INT_EN 0x008
2563#define XSCALE2_COUNT3_INT_EN 0x010
2564#define XSCALE2_COUNT0_EVT_SHFT 0
2565#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
2566#define XSCALE2_COUNT1_EVT_SHFT 8
2567#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
2568#define XSCALE2_COUNT2_EVT_SHFT 16
2569#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
2570#define XSCALE2_COUNT3_EVT_SHFT 24
2571#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
2572
2573static inline u32
2574xscale2pmu_read_pmnc(void)
2575{
2576 u32 val;
2577 asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
2578 /* bits 1-2 and 4-23 are read-unpredictable */
2579 return val & 0xff000009;
2580}
2581
2582static inline void
2583xscale2pmu_write_pmnc(u32 val)
2584{
2585 /* bits 4-23 are write-as-0, 24-31 are write ignored */
2586 val &= 0xf;
2587 asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
2588}
2589
2590static inline u32
2591xscale2pmu_read_overflow_flags(void)
2592{
2593 u32 val;
2594 asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
2595 return val;
2596}
2597
2598static inline void
2599xscale2pmu_write_overflow_flags(u32 val)
2600{
2601 asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
2602}
2603
2604static inline u32
2605xscale2pmu_read_event_select(void)
2606{
2607 u32 val;
2608 asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
2609 return val;
2610}
2611
2612static inline void
2613xscale2pmu_write_event_select(u32 val)
2614{
2615 asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
2616}
2617
2618static inline u32
2619xscale2pmu_read_int_enable(void)
2620{
2621 u32 val;
2622 asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
2623 return val;
2624}
2625
2626static void
2627xscale2pmu_write_int_enable(u32 val)
2628{
2629 asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
2630}
2631
2632static inline int
2633xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
2634 enum xscale_counters counter)
2635{
2636 int ret = 0;
2637
2638 switch (counter) {
2639 case XSCALE_CYCLE_COUNTER:
2640 ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
2641 break;
2642 case XSCALE_COUNTER0:
2643 ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
2644 break;
2645 case XSCALE_COUNTER1:
2646 ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
2647 break;
2648 case XSCALE_COUNTER2:
2649 ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
2650 break;
2651 case XSCALE_COUNTER3:
2652 ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
2653 break;
2654 default:
2655 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2656 }
2657
2658 return ret;
2659}
2660
2661static irqreturn_t
2662xscale2pmu_handle_irq(int irq_num, void *dev)
2663{
2664 unsigned long pmnc, of_flags;
2665 struct perf_sample_data data;
2666 struct cpu_hw_events *cpuc;
2667 struct pt_regs *regs;
2668 int idx;
2669
2670 /* Disable the PMU. */
2671 pmnc = xscale2pmu_read_pmnc();
2672 xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2673
2674 /* Check the overflow flag register. */
2675 of_flags = xscale2pmu_read_overflow_flags();
2676 if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
2677 return IRQ_NONE;
2678
2679 /* Clear the overflow bits. */
2680 xscale2pmu_write_overflow_flags(of_flags);
2681
2682 regs = get_irq_regs();
2683
2684 perf_sample_data_init(&data, 0);
2685
2686 cpuc = &__get_cpu_var(cpu_hw_events);
2687 for (idx = 0; idx <= armpmu->num_events; ++idx) {
2688 struct perf_event *event = cpuc->events[idx];
2689 struct hw_perf_event *hwc;
2690
2691 if (!test_bit(idx, cpuc->active_mask))
2692 continue;
2693
2694 if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
2695 continue;
2696
2697 hwc = &event->hw;
2698 armpmu_event_update(event, hwc, idx);
2699 data.period = event->hw.last_period;
2700 if (!armpmu_event_set_period(event, hwc, idx))
2701 continue;
2702
2703 if (perf_event_overflow(event, 0, &data, regs))
2704 armpmu->disable(hwc, idx);
2705 }
2706
2707 perf_event_do_pending();
2708
2709 /*
2710 * Re-enable the PMU.
2711 */
2712 pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2713 xscale2pmu_write_pmnc(pmnc);
2714
2715 return IRQ_HANDLED;
2716}
2717
2718static void
2719xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
2720{
2721 unsigned long flags, ien, evtsel;
2722
2723 ien = xscale2pmu_read_int_enable();
2724 evtsel = xscale2pmu_read_event_select();
2725
2726 switch (idx) {
2727 case XSCALE_CYCLE_COUNTER:
2728 ien |= XSCALE2_CCOUNT_INT_EN;
2729 break;
2730 case XSCALE_COUNTER0:
2731 ien |= XSCALE2_COUNT0_INT_EN;
2732 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2733 evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
2734 break;
2735 case XSCALE_COUNTER1:
2736 ien |= XSCALE2_COUNT1_INT_EN;
2737 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2738 evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
2739 break;
2740 case XSCALE_COUNTER2:
2741 ien |= XSCALE2_COUNT2_INT_EN;
2742 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2743 evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
2744 break;
2745 case XSCALE_COUNTER3:
2746 ien |= XSCALE2_COUNT3_INT_EN;
2747 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2748 evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
2749 break;
2750 default:
2751 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2752 return;
2753 }
2754
2755 spin_lock_irqsave(&pmu_lock, flags);
2756 xscale2pmu_write_event_select(evtsel);
2757 xscale2pmu_write_int_enable(ien);
2758 spin_unlock_irqrestore(&pmu_lock, flags);
2759}
2760
2761static void
2762xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
2763{
2764 unsigned long flags, ien, evtsel;
2765
2766 ien = xscale2pmu_read_int_enable();
2767 evtsel = xscale2pmu_read_event_select();
2768
2769 switch (idx) {
2770 case XSCALE_CYCLE_COUNTER:
2771 ien &= ~XSCALE2_CCOUNT_INT_EN;
2772 break;
2773 case XSCALE_COUNTER0:
2774 ien &= ~XSCALE2_COUNT0_INT_EN;
2775 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2776 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
2777 break;
2778 case XSCALE_COUNTER1:
2779 ien &= ~XSCALE2_COUNT1_INT_EN;
2780 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2781 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
2782 break;
2783 case XSCALE_COUNTER2:
2784 ien &= ~XSCALE2_COUNT2_INT_EN;
2785 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2786 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
2787 break;
2788 case XSCALE_COUNTER3:
2789 ien &= ~XSCALE2_COUNT3_INT_EN;
2790 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2791 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
2792 break;
2793 default:
2794 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2795 return;
2796 }
2797
2798 spin_lock_irqsave(&pmu_lock, flags);
2799 xscale2pmu_write_event_select(evtsel);
2800 xscale2pmu_write_int_enable(ien);
2801 spin_unlock_irqrestore(&pmu_lock, flags);
2802}
2803
2804static int
2805xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
2806 struct hw_perf_event *event)
2807{
2808 int idx = xscale1pmu_get_event_idx(cpuc, event);
2809 if (idx >= 0)
2810 goto out;
2811
2812 if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
2813 idx = XSCALE_COUNTER3;
2814 else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
2815 idx = XSCALE_COUNTER2;
2816out:
2817 return idx;
2818}
2819
2820static void
2821xscale2pmu_start(void)
2822{
2823 unsigned long flags, val;
2824
2825 spin_lock_irqsave(&pmu_lock, flags);
2826 val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
2827 val |= XSCALE_PMU_ENABLE;
2828 xscale2pmu_write_pmnc(val);
2829 spin_unlock_irqrestore(&pmu_lock, flags);
2830}
2831
2832static void
2833xscale2pmu_stop(void)
2834{
2835 unsigned long flags, val;
2836
2837 spin_lock_irqsave(&pmu_lock, flags);
2838 val = xscale2pmu_read_pmnc();
2839 val &= ~XSCALE_PMU_ENABLE;
2840 xscale2pmu_write_pmnc(val);
2841 spin_unlock_irqrestore(&pmu_lock, flags);
2842}
2843
2844static inline u32
2845xscale2pmu_read_counter(int counter)
2846{
2847 u32 val = 0;
2848
2849 switch (counter) {
2850 case XSCALE_CYCLE_COUNTER:
2851 asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
2852 break;
2853 case XSCALE_COUNTER0:
2854 asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
2855 break;
2856 case XSCALE_COUNTER1:
2857 asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
2858 break;
2859 case XSCALE_COUNTER2:
2860 asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
2861 break;
2862 case XSCALE_COUNTER3:
2863 asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
2864 break;
2865 }
2866
2867 return val;
2868}
2869
2870static inline void
2871xscale2pmu_write_counter(int counter, u32 val)
2872{
2873 switch (counter) {
2874 case XSCALE_CYCLE_COUNTER:
2875 asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
2876 break;
2877 case XSCALE_COUNTER0:
2878 asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
2879 break;
2880 case XSCALE_COUNTER1:
2881 asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
2882 break;
2883 case XSCALE_COUNTER2:
2884 asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
2885 break;
2886 case XSCALE_COUNTER3:
2887 asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
2888 break;
2889 }
2890}
2891
2892static const struct arm_pmu xscale2pmu = {
2893 .id = ARM_PERF_PMU_ID_XSCALE2,
2894 .handle_irq = xscale2pmu_handle_irq,
2895 .enable = xscale2pmu_enable_event,
2896 .disable = xscale2pmu_disable_event,
2897 .event_map = xscalepmu_event_map,
2898 .raw_event = xscalepmu_raw_event,
2899 .read_counter = xscale2pmu_read_counter,
2900 .write_counter = xscale2pmu_write_counter,
2901 .get_event_idx = xscale2pmu_get_event_idx,
2902 .start = xscale2pmu_start,
2903 .stop = xscale2pmu_stop,
2904 .num_events = 5,
2905 .max_period = (1LLU << 32) - 1,
2906};
2907
2111static int __init 2908static int __init
2112init_hw_perf_events(void) 2909init_hw_perf_events(void)
2113{ 2910{
@@ -2115,7 +2912,7 @@ init_hw_perf_events(void)
2115 unsigned long implementor = (cpuid & 0xFF000000) >> 24; 2912 unsigned long implementor = (cpuid & 0xFF000000) >> 24;
2116 unsigned long part_number = (cpuid & 0xFFF0); 2913 unsigned long part_number = (cpuid & 0xFFF0);
2117 2914
2118 /* We only support ARM CPUs implemented by ARM at the moment. */ 2915 /* ARM Ltd CPUs. */
2119 if (0x41 == implementor) { 2916 if (0x41 == implementor) {
2120 switch (part_number) { 2917 switch (part_number) {
2121 case 0xB360: /* ARM1136 */ 2918 case 0xB360: /* ARM1136 */
@@ -2157,15 +2954,33 @@ init_hw_perf_events(void)
2157 armv7pmu.num_events = armv7_reset_read_pmnc(); 2954 armv7pmu.num_events = armv7_reset_read_pmnc();
2158 perf_max_events = armv7pmu.num_events; 2955 perf_max_events = armv7pmu.num_events;
2159 break; 2956 break;
2160 default: 2957 }
2161 pr_info("no hardware support available\n"); 2958 /* Intel CPUs [xscale]. */
2162 perf_max_events = -1; 2959 } else if (0x69 == implementor) {
2960 part_number = (cpuid >> 13) & 0x7;
2961 switch (part_number) {
2962 case 1:
2963 armpmu = &xscale1pmu;
2964 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2965 sizeof(xscale_perf_cache_map));
2966 perf_max_events = xscale1pmu.num_events;
2967 break;
2968 case 2:
2969 armpmu = &xscale2pmu;
2970 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2971 sizeof(xscale_perf_cache_map));
2972 perf_max_events = xscale2pmu.num_events;
2973 break;
2163 } 2974 }
2164 } 2975 }
2165 2976
2166 if (armpmu) 2977 if (armpmu) {
2167 pr_info("enabled with %s PMU driver, %d counters available\n", 2978 pr_info("enabled with %s PMU driver, %d counters available\n",
2168 arm_pmu_names[armpmu->id], armpmu->num_events); 2979 arm_pmu_names[armpmu->id], armpmu->num_events);
2980 } else {
2981 pr_info("no hardware support available\n");
2982 perf_max_events = -1;
2983 }
2169 2984
2170 return 0; 2985 return 0;
2171} 2986}