diff options
| author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-22 10:38:37 -0500 |
|---|---|---|
| committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-22 10:38:37 -0500 |
| commit | fcc9d2e5a6c89d22b8b773a64fb4ad21ac318446 (patch) | |
| tree | a57612d1888735a2ec7972891b68c1ac5ec8faea /arch/powerpc/kernel | |
| parent | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (diff) | |
Diffstat (limited to 'arch/powerpc/kernel')
| -rw-r--r-- | arch/powerpc/kernel/cpu_setup_power7.S | 95 | ||||
| -rw-r--r-- | arch/powerpc/kernel/e500-pmu.c | 134 | ||||
| -rw-r--r-- | arch/powerpc/kernel/init_task.c | 29 | ||||
| -rw-r--r-- | arch/powerpc/kernel/mpc7450-pmu.c | 422 | ||||
| -rw-r--r-- | arch/powerpc/kernel/perf_callchain.c | 492 | ||||
| -rw-r--r-- | arch/powerpc/kernel/perf_event.c | 1432 | ||||
| -rw-r--r-- | arch/powerpc/kernel/perf_event_fsl_emb.c | 688 | ||||
| -rw-r--r-- | arch/powerpc/kernel/power4-pmu.c | 621 | ||||
| -rw-r--r-- | arch/powerpc/kernel/power5+-pmu.c | 690 | ||||
| -rw-r--r-- | arch/powerpc/kernel/power5-pmu.c | 629 | ||||
| -rw-r--r-- | arch/powerpc/kernel/power6-pmu.c | 552 | ||||
| -rw-r--r-- | arch/powerpc/kernel/power7-pmu.c | 377 | ||||
| -rw-r--r-- | arch/powerpc/kernel/ppc970-pmu.c | 502 |
13 files changed, 6663 insertions, 0 deletions
diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S new file mode 100644 index 00000000000..76797c5105d --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_power7.S | |||
| @@ -0,0 +1,95 @@ | |||
| 1 | /* | ||
| 2 | * This file contains low level CPU setup functions. | ||
| 3 | * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or | ||
| 6 | * modify it under the terms of the GNU General Public License | ||
| 7 | * as published by the Free Software Foundation; either version | ||
| 8 | * 2 of the License, or (at your option) any later version. | ||
| 9 | * | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <asm/processor.h> | ||
| 13 | #include <asm/page.h> | ||
| 14 | #include <asm/cputable.h> | ||
| 15 | #include <asm/ppc_asm.h> | ||
| 16 | #include <asm/asm-offsets.h> | ||
| 17 | #include <asm/cache.h> | ||
| 18 | |||
| 19 | /* Entry: r3 = crap, r4 = ptr to cputable entry | ||
| 20 | * | ||
| 21 | * Note that we can be called twice for pseudo-PVRs | ||
| 22 | */ | ||
| 23 | _GLOBAL(__setup_cpu_power7) | ||
| 24 | mflr r11 | ||
| 25 | bl __init_hvmode_206 | ||
| 26 | mtlr r11 | ||
| 27 | beqlr | ||
| 28 | li r0,0 | ||
| 29 | mtspr SPRN_LPID,r0 | ||
| 30 | bl __init_LPCR | ||
| 31 | bl __init_TLB | ||
| 32 | mtlr r11 | ||
| 33 | blr | ||
| 34 | |||
| 35 | _GLOBAL(__restore_cpu_power7) | ||
| 36 | mflr r11 | ||
| 37 | mfmsr r3 | ||
| 38 | rldicl. r0,r3,4,63 | ||
| 39 | beqlr | ||
| 40 | li r0,0 | ||
| 41 | mtspr SPRN_LPID,r0 | ||
| 42 | bl __init_LPCR | ||
| 43 | bl __init_TLB | ||
| 44 | mtlr r11 | ||
| 45 | blr | ||
| 46 | |||
| 47 | __init_hvmode_206: | ||
| 48 | /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */ | ||
| 49 | mfmsr r3 | ||
| 50 | rldicl. r0,r3,4,63 | ||
| 51 | bnelr | ||
| 52 | ld r5,CPU_SPEC_FEATURES(r4) | ||
| 53 | LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE) | ||
| 54 | xor r5,r5,r6 | ||
| 55 | std r5,CPU_SPEC_FEATURES(r4) | ||
| 56 | blr | ||
| 57 | |||
| 58 | __init_LPCR: | ||
| 59 | /* Setup a sane LPCR: | ||
| 60 | * | ||
| 61 | * LPES = 0b01 (HSRR0/1 used for 0x500) | ||
| 62 | * PECE = 0b111 | ||
| 63 | * DPFD = 4 | ||
| 64 | * HDICE = 0 | ||
| 65 | * VC = 0b100 (VPM0=1, VPM1=0, ISL=0) | ||
| 66 | * VRMASD = 0b10000 (L=1, LP=00) | ||
| 67 | * | ||
| 68 | * Other bits untouched for now | ||
| 69 | */ | ||
| 70 | mfspr r3,SPRN_LPCR | ||
| 71 | li r5,1 | ||
| 72 | rldimi r3,r5, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 | ||
| 73 | ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) | ||
| 74 | li r5,4 | ||
| 75 | rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3 | ||
| 76 | clrrdi r3,r3,1 /* clear HDICE */ | ||
| 77 | li r5,4 | ||
| 78 | rldimi r3,r5, LPCR_VC_SH, 0 | ||
| 79 | li r5,0x10 | ||
| 80 | rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 | ||
| 81 | mtspr SPRN_LPCR,r3 | ||
| 82 | isync | ||
| 83 | blr | ||
| 84 | |||
| 85 | __init_TLB: | ||
| 86 | /* Clear the TLB */ | ||
| 87 | li r6,128 | ||
| 88 | mtctr r6 | ||
| 89 | li r7,0xc00 /* IS field = 0b11 */ | ||
| 90 | ptesync | ||
| 91 | 2: tlbiel r7 | ||
| 92 | addi r7,r7,0x1000 | ||
| 93 | bdnz 2b | ||
| 94 | ptesync | ||
| 95 | 1: blr | ||
diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c new file mode 100644 index 00000000000..cb2e2949c8d --- /dev/null +++ b/arch/powerpc/kernel/e500-pmu.c | |||
| @@ -0,0 +1,134 @@ | |||
| 1 | /* | ||
| 2 | * Performance counter support for e500 family processors. | ||
| 3 | * | ||
| 4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
| 5 | * Copyright 2010 Freescale Semiconductor, Inc. | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License | ||
| 9 | * as published by the Free Software Foundation; either version | ||
| 10 | * 2 of the License, or (at your option) any later version. | ||
| 11 | */ | ||
| 12 | #include <linux/string.h> | ||
| 13 | #include <linux/perf_event.h> | ||
| 14 | #include <asm/reg.h> | ||
| 15 | #include <asm/cputable.h> | ||
| 16 | |||
| 17 | /* | ||
| 18 | * Map of generic hardware event types to hardware events | ||
| 19 | * Zero if unsupported | ||
| 20 | */ | ||
| 21 | static int e500_generic_events[] = { | ||
| 22 | [PERF_COUNT_HW_CPU_CYCLES] = 1, | ||
| 23 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, | ||
| 24 | [PERF_COUNT_HW_CACHE_MISSES] = 41, /* Data L1 cache reloads */ | ||
| 25 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12, | ||
| 26 | [PERF_COUNT_HW_BRANCH_MISSES] = 15, | ||
| 27 | }; | ||
| 28 | |||
| 29 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
| 30 | |||
| 31 | /* | ||
| 32 | * Table of generalized cache-related events. | ||
| 33 | * 0 means not supported, -1 means nonsensical, other values | ||
| 34 | * are event codes. | ||
| 35 | */ | ||
| 36 | static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
| 37 | /* | ||
| 38 | * D-cache misses are not split into read/write/prefetch; | ||
| 39 | * use raw event 41. | ||
| 40 | */ | ||
| 41 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 42 | [C(OP_READ)] = { 27, 0 }, | ||
| 43 | [C(OP_WRITE)] = { 28, 0 }, | ||
| 44 | [C(OP_PREFETCH)] = { 29, 0 }, | ||
| 45 | }, | ||
| 46 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 47 | [C(OP_READ)] = { 2, 60 }, | ||
| 48 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 49 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
| 50 | }, | ||
| 51 | /* | ||
| 52 | * Assuming LL means L2, it's not a good match for this model. | ||
| 53 | * It allocates only on L1 castout or explicit prefetch, and | ||
| 54 | * does not have separate read/write events (but it does have | ||
| 55 | * separate instruction/data events). | ||
| 56 | */ | ||
| 57 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 58 | [C(OP_READ)] = { 0, 0 }, | ||
| 59 | [C(OP_WRITE)] = { 0, 0 }, | ||
| 60 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
| 61 | }, | ||
| 62 | /* | ||
| 63 | * There are data/instruction MMU misses, but that's a miss on | ||
| 64 | * the chip's internal level-one TLB which is probably not | ||
| 65 | * what the user wants. Instead, unified level-two TLB misses | ||
| 66 | * are reported here. | ||
| 67 | */ | ||
| 68 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 69 | [C(OP_READ)] = { 26, 66 }, | ||
| 70 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 71 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 72 | }, | ||
| 73 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 74 | [C(OP_READ)] = { 12, 15 }, | ||
| 75 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 76 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 77 | }, | ||
| 78 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 79 | [C(OP_READ)] = { -1, -1 }, | ||
| 80 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 81 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 82 | }, | ||
| 83 | }; | ||
| 84 | |||
| 85 | static int num_events = 128; | ||
| 86 | |||
| 87 | /* Upper half of event id is PMLCb, for threshold events */ | ||
| 88 | static u64 e500_xlate_event(u64 event_id) | ||
| 89 | { | ||
| 90 | u32 event_low = (u32)event_id; | ||
| 91 | u64 ret; | ||
| 92 | |||
| 93 | if (event_low >= num_events) | ||
| 94 | return 0; | ||
| 95 | |||
| 96 | ret = FSL_EMB_EVENT_VALID; | ||
| 97 | |||
| 98 | if (event_low >= 76 && event_low <= 81) { | ||
| 99 | ret |= FSL_EMB_EVENT_RESTRICTED; | ||
| 100 | ret |= event_id & | ||
| 101 | (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH); | ||
| 102 | } else if (event_id & | ||
| 103 | (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)) { | ||
| 104 | /* Threshold requested on non-threshold event */ | ||
| 105 | return 0; | ||
| 106 | } | ||
| 107 | |||
| 108 | return ret; | ||
| 109 | } | ||
| 110 | |||
| 111 | static struct fsl_emb_pmu e500_pmu = { | ||
| 112 | .name = "e500 family", | ||
| 113 | .n_counter = 4, | ||
| 114 | .n_restricted = 2, | ||
| 115 | .xlate_event = e500_xlate_event, | ||
| 116 | .n_generic = ARRAY_SIZE(e500_generic_events), | ||
| 117 | .generic_events = e500_generic_events, | ||
| 118 | .cache_events = &e500_cache_events, | ||
| 119 | }; | ||
| 120 | |||
| 121 | static int init_e500_pmu(void) | ||
| 122 | { | ||
| 123 | if (!cur_cpu_spec->oprofile_cpu_type) | ||
| 124 | return -ENODEV; | ||
| 125 | |||
| 126 | if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc")) | ||
| 127 | num_events = 256; | ||
| 128 | else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500")) | ||
| 129 | return -ENODEV; | ||
| 130 | |||
| 131 | return register_fsl_emb_pmu(&e500_pmu); | ||
| 132 | } | ||
| 133 | |||
| 134 | early_initcall(init_e500_pmu); | ||
diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c new file mode 100644 index 00000000000..2375b7eb1c7 --- /dev/null +++ b/arch/powerpc/kernel/init_task.c | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | #include <linux/mm.h> | ||
| 2 | #include <linux/module.h> | ||
| 3 | #include <linux/sched.h> | ||
| 4 | #include <linux/init.h> | ||
| 5 | #include <linux/init_task.h> | ||
| 6 | #include <linux/fs.h> | ||
| 7 | #include <linux/mqueue.h> | ||
| 8 | #include <asm/uaccess.h> | ||
| 9 | |||
| 10 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); | ||
| 11 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); | ||
| 12 | /* | ||
| 13 | * Initial thread structure. | ||
| 14 | * | ||
| 15 | * We need to make sure that this is 16384-byte aligned due to the | ||
| 16 | * way process stacks are handled. This is done by having a special | ||
| 17 | * "init_task" linker map entry.. | ||
| 18 | */ | ||
| 19 | union thread_union init_thread_union __init_task_data = | ||
| 20 | { INIT_THREAD_INFO(init_task) }; | ||
| 21 | |||
| 22 | /* | ||
| 23 | * Initial task structure. | ||
| 24 | * | ||
| 25 | * All other task structs will be allocated on slabs in fork.c | ||
| 26 | */ | ||
| 27 | struct task_struct init_task = INIT_TASK(init_task); | ||
| 28 | |||
| 29 | EXPORT_SYMBOL(init_task); | ||
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c new file mode 100644 index 00000000000..fe21b515ca4 --- /dev/null +++ b/arch/powerpc/kernel/mpc7450-pmu.c | |||
| @@ -0,0 +1,422 @@ | |||
| 1 | /* | ||
| 2 | * Performance counter support for MPC7450-family processors. | ||
| 3 | * | ||
| 4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public License | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the License, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | #include <linux/string.h> | ||
| 12 | #include <linux/perf_event.h> | ||
| 13 | #include <asm/reg.h> | ||
| 14 | #include <asm/cputable.h> | ||
| 15 | |||
| 16 | #define N_COUNTER 6 /* Number of hardware counters */ | ||
| 17 | #define MAX_ALT 3 /* Maximum number of event alternative codes */ | ||
| 18 | |||
| 19 | /* | ||
| 20 | * Bits in event code for MPC7450 family | ||
| 21 | */ | ||
| 22 | #define PM_THRMULT_MSKS 0x40000 | ||
| 23 | #define PM_THRESH_SH 12 | ||
| 24 | #define PM_THRESH_MSK 0x3f | ||
| 25 | #define PM_PMC_SH 8 | ||
| 26 | #define PM_PMC_MSK 7 | ||
| 27 | #define PM_PMCSEL_MSK 0x7f | ||
| 28 | |||
| 29 | /* | ||
| 30 | * Classify events according to how specific their PMC requirements are. | ||
| 31 | * Result is: | ||
| 32 | * 0: can go on any PMC | ||
| 33 | * 1: can go on PMCs 1-4 | ||
| 34 | * 2: can go on PMCs 1,2,4 | ||
| 35 | * 3: can go on PMCs 1 or 2 | ||
| 36 | * 4: can only go on one PMC | ||
| 37 | * -1: event code is invalid | ||
| 38 | */ | ||
| 39 | #define N_CLASSES 5 | ||
| 40 | |||
| 41 | static int mpc7450_classify_event(u32 event) | ||
| 42 | { | ||
| 43 | int pmc; | ||
| 44 | |||
| 45 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 46 | if (pmc) { | ||
| 47 | if (pmc > N_COUNTER) | ||
| 48 | return -1; | ||
| 49 | return 4; | ||
| 50 | } | ||
| 51 | event &= PM_PMCSEL_MSK; | ||
| 52 | if (event <= 1) | ||
| 53 | return 0; | ||
| 54 | if (event <= 7) | ||
| 55 | return 1; | ||
| 56 | if (event <= 13) | ||
| 57 | return 2; | ||
| 58 | if (event <= 22) | ||
| 59 | return 3; | ||
| 60 | return -1; | ||
| 61 | } | ||
| 62 | |||
| 63 | /* | ||
| 64 | * Events using threshold and possible threshold scale: | ||
| 65 | * code scale? name | ||
| 66 | * 11e N PM_INSTQ_EXCEED_CYC | ||
| 67 | * 11f N PM_ALTV_IQ_EXCEED_CYC | ||
| 68 | * 128 Y PM_DTLB_SEARCH_EXCEED_CYC | ||
| 69 | * 12b Y PM_LD_MISS_EXCEED_L1_CYC | ||
| 70 | * 220 N PM_CQ_EXCEED_CYC | ||
| 71 | * 30c N PM_GPR_RB_EXCEED_CYC | ||
| 72 | * 30d ? PM_FPR_IQ_EXCEED_CYC ? | ||
| 73 | * 311 Y PM_ITLB_SEARCH_EXCEED | ||
| 74 | * 410 N PM_GPR_IQ_EXCEED_CYC | ||
| 75 | */ | ||
| 76 | |||
| 77 | /* | ||
| 78 | * Return use of threshold and threshold scale bits: | ||
| 79 | * 0 = uses neither, 1 = uses threshold, 2 = uses both | ||
| 80 | */ | ||
| 81 | static int mpc7450_threshold_use(u32 event) | ||
| 82 | { | ||
| 83 | int pmc, sel; | ||
| 84 | |||
| 85 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 86 | sel = event & PM_PMCSEL_MSK; | ||
| 87 | switch (pmc) { | ||
| 88 | case 1: | ||
| 89 | if (sel == 0x1e || sel == 0x1f) | ||
| 90 | return 1; | ||
| 91 | if (sel == 0x28 || sel == 0x2b) | ||
| 92 | return 2; | ||
| 93 | break; | ||
| 94 | case 2: | ||
| 95 | if (sel == 0x20) | ||
| 96 | return 1; | ||
| 97 | break; | ||
| 98 | case 3: | ||
| 99 | if (sel == 0xc || sel == 0xd) | ||
| 100 | return 1; | ||
| 101 | if (sel == 0x11) | ||
| 102 | return 2; | ||
| 103 | break; | ||
| 104 | case 4: | ||
| 105 | if (sel == 0x10) | ||
| 106 | return 1; | ||
| 107 | break; | ||
| 108 | } | ||
| 109 | return 0; | ||
| 110 | } | ||
| 111 | |||
| 112 | /* | ||
| 113 | * Layout of constraint bits: | ||
| 114 | * 33222222222211111111110000000000 | ||
| 115 | * 10987654321098765432109876543210 | ||
| 116 | * |< >< > < > < ><><><><><><> | ||
| 117 | * TS TV G4 G3 G2P6P5P4P3P2P1 | ||
| 118 | * | ||
| 119 | * P1 - P6 | ||
| 120 | * 0 - 11: Count of events needing PMC1 .. PMC6 | ||
| 121 | * | ||
| 122 | * G2 | ||
| 123 | * 12 - 14: Count of events needing PMC1 or PMC2 | ||
| 124 | * | ||
| 125 | * G3 | ||
| 126 | * 16 - 18: Count of events needing PMC1, PMC2 or PMC4 | ||
| 127 | * | ||
| 128 | * G4 | ||
| 129 | * 20 - 23: Count of events needing PMC1, PMC2, PMC3 or PMC4 | ||
| 130 | * | ||
| 131 | * TV | ||
| 132 | * 24 - 29: Threshold value requested | ||
| 133 | * | ||
| 134 | * TS | ||
| 135 | * 30: Threshold scale value requested | ||
| 136 | */ | ||
| 137 | |||
| 138 | static u32 pmcbits[N_COUNTER][2] = { | ||
| 139 | { 0x00844002, 0x00111001 }, /* PMC1 mask, value: P1,G2,G3,G4 */ | ||
| 140 | { 0x00844008, 0x00111004 }, /* PMC2: P2,G2,G3,G4 */ | ||
| 141 | { 0x00800020, 0x00100010 }, /* PMC3: P3,G4 */ | ||
| 142 | { 0x00840080, 0x00110040 }, /* PMC4: P4,G3,G4 */ | ||
| 143 | { 0x00000200, 0x00000100 }, /* PMC5: P5 */ | ||
| 144 | { 0x00000800, 0x00000400 } /* PMC6: P6 */ | ||
| 145 | }; | ||
| 146 | |||
| 147 | static u32 classbits[N_CLASSES - 1][2] = { | ||
| 148 | { 0x00000000, 0x00000000 }, /* class 0: no constraint */ | ||
| 149 | { 0x00800000, 0x00100000 }, /* class 1: G4 */ | ||
| 150 | { 0x00040000, 0x00010000 }, /* class 2: G3 */ | ||
| 151 | { 0x00004000, 0x00001000 }, /* class 3: G2 */ | ||
| 152 | }; | ||
| 153 | |||
| 154 | static int mpc7450_get_constraint(u64 event, unsigned long *maskp, | ||
| 155 | unsigned long *valp) | ||
| 156 | { | ||
| 157 | int pmc, class; | ||
| 158 | u32 mask, value; | ||
| 159 | int thresh, tuse; | ||
| 160 | |||
| 161 | class = mpc7450_classify_event(event); | ||
| 162 | if (class < 0) | ||
| 163 | return -1; | ||
| 164 | if (class == 4) { | ||
| 165 | pmc = ((unsigned int)event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 166 | mask = pmcbits[pmc - 1][0]; | ||
| 167 | value = pmcbits[pmc - 1][1]; | ||
| 168 | } else { | ||
| 169 | mask = classbits[class][0]; | ||
| 170 | value = classbits[class][1]; | ||
| 171 | } | ||
| 172 | |||
| 173 | tuse = mpc7450_threshold_use(event); | ||
| 174 | if (tuse) { | ||
| 175 | thresh = ((unsigned int)event >> PM_THRESH_SH) & PM_THRESH_MSK; | ||
| 176 | mask |= 0x3f << 24; | ||
| 177 | value |= thresh << 24; | ||
| 178 | if (tuse == 2) { | ||
| 179 | mask |= 0x40000000; | ||
| 180 | if ((unsigned int)event & PM_THRMULT_MSKS) | ||
| 181 | value |= 0x40000000; | ||
| 182 | } | ||
| 183 | } | ||
| 184 | |||
| 185 | *maskp = mask; | ||
| 186 | *valp = value; | ||
| 187 | return 0; | ||
| 188 | } | ||
| 189 | |||
| 190 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
| 191 | { 0x217, 0x317 }, /* PM_L1_DCACHE_MISS */ | ||
| 192 | { 0x418, 0x50f, 0x60f }, /* PM_SNOOP_RETRY */ | ||
| 193 | { 0x502, 0x602 }, /* PM_L2_HIT */ | ||
| 194 | { 0x503, 0x603 }, /* PM_L3_HIT */ | ||
| 195 | { 0x504, 0x604 }, /* PM_L2_ICACHE_MISS */ | ||
| 196 | { 0x505, 0x605 }, /* PM_L3_ICACHE_MISS */ | ||
| 197 | { 0x506, 0x606 }, /* PM_L2_DCACHE_MISS */ | ||
| 198 | { 0x507, 0x607 }, /* PM_L3_DCACHE_MISS */ | ||
| 199 | { 0x50a, 0x623 }, /* PM_LD_HIT_L3 */ | ||
| 200 | { 0x50b, 0x624 }, /* PM_ST_HIT_L3 */ | ||
| 201 | { 0x50d, 0x60d }, /* PM_L2_TOUCH_HIT */ | ||
| 202 | { 0x50e, 0x60e }, /* PM_L3_TOUCH_HIT */ | ||
| 203 | { 0x512, 0x612 }, /* PM_INT_LOCAL */ | ||
| 204 | { 0x513, 0x61d }, /* PM_L2_MISS */ | ||
| 205 | { 0x514, 0x61e }, /* PM_L3_MISS */ | ||
| 206 | }; | ||
| 207 | |||
| 208 | /* | ||
| 209 | * Scan the alternatives table for a match and return the | ||
| 210 | * index into the alternatives table if found, else -1. | ||
| 211 | */ | ||
| 212 | static int find_alternative(u32 event) | ||
| 213 | { | ||
| 214 | int i, j; | ||
| 215 | |||
| 216 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
| 217 | if (event < event_alternatives[i][0]) | ||
| 218 | break; | ||
| 219 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
| 220 | if (event == event_alternatives[i][j]) | ||
| 221 | return i; | ||
| 222 | } | ||
| 223 | return -1; | ||
| 224 | } | ||
| 225 | |||
| 226 | static int mpc7450_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
| 227 | { | ||
| 228 | int i, j, nalt = 1; | ||
| 229 | u32 ae; | ||
| 230 | |||
| 231 | alt[0] = event; | ||
| 232 | nalt = 1; | ||
| 233 | i = find_alternative((u32)event); | ||
| 234 | if (i >= 0) { | ||
| 235 | for (j = 0; j < MAX_ALT; ++j) { | ||
| 236 | ae = event_alternatives[i][j]; | ||
| 237 | if (ae && ae != (u32)event) | ||
| 238 | alt[nalt++] = ae; | ||
| 239 | } | ||
| 240 | } | ||
| 241 | return nalt; | ||
| 242 | } | ||
| 243 | |||
| 244 | /* | ||
| 245 | * Bitmaps of which PMCs each class can use for classes 0 - 3. | ||
| 246 | * Bit i is set if PMC i+1 is usable. | ||
| 247 | */ | ||
| 248 | static const u8 classmap[N_CLASSES] = { | ||
| 249 | 0x3f, 0x0f, 0x0b, 0x03, 0 | ||
| 250 | }; | ||
| 251 | |||
| 252 | /* Bit position and width of each PMCSEL field */ | ||
| 253 | static const int pmcsel_shift[N_COUNTER] = { | ||
| 254 | 6, 0, 27, 22, 17, 11 | ||
| 255 | }; | ||
| 256 | static const u32 pmcsel_mask[N_COUNTER] = { | ||
| 257 | 0x7f, 0x3f, 0x1f, 0x1f, 0x1f, 0x3f | ||
| 258 | }; | ||
| 259 | |||
| 260 | /* | ||
| 261 | * Compute MMCR0/1/2 values for a set of events. | ||
| 262 | */ | ||
| 263 | static int mpc7450_compute_mmcr(u64 event[], int n_ev, | ||
| 264 | unsigned int hwc[], unsigned long mmcr[]) | ||
| 265 | { | ||
| 266 | u8 event_index[N_CLASSES][N_COUNTER]; | ||
| 267 | int n_classevent[N_CLASSES]; | ||
| 268 | int i, j, class, tuse; | ||
| 269 | u32 pmc_inuse = 0, pmc_avail; | ||
| 270 | u32 mmcr0 = 0, mmcr1 = 0, mmcr2 = 0; | ||
| 271 | u32 ev, pmc, thresh; | ||
| 272 | |||
| 273 | if (n_ev > N_COUNTER) | ||
| 274 | return -1; | ||
| 275 | |||
| 276 | /* First pass: count usage in each class */ | ||
| 277 | for (i = 0; i < N_CLASSES; ++i) | ||
| 278 | n_classevent[i] = 0; | ||
| 279 | for (i = 0; i < n_ev; ++i) { | ||
| 280 | class = mpc7450_classify_event(event[i]); | ||
| 281 | if (class < 0) | ||
| 282 | return -1; | ||
| 283 | j = n_classevent[class]++; | ||
| 284 | event_index[class][j] = i; | ||
| 285 | } | ||
| 286 | |||
| 287 | /* Second pass: allocate PMCs from most specific event to least */ | ||
| 288 | for (class = N_CLASSES - 1; class >= 0; --class) { | ||
| 289 | for (i = 0; i < n_classevent[class]; ++i) { | ||
| 290 | ev = event[event_index[class][i]]; | ||
| 291 | if (class == 4) { | ||
| 292 | pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 293 | if (pmc_inuse & (1 << (pmc - 1))) | ||
| 294 | return -1; | ||
| 295 | } else { | ||
| 296 | /* Find a suitable PMC */ | ||
| 297 | pmc_avail = classmap[class] & ~pmc_inuse; | ||
| 298 | if (!pmc_avail) | ||
| 299 | return -1; | ||
| 300 | pmc = ffs(pmc_avail); | ||
| 301 | } | ||
| 302 | pmc_inuse |= 1 << (pmc - 1); | ||
| 303 | |||
| 304 | tuse = mpc7450_threshold_use(ev); | ||
| 305 | if (tuse) { | ||
| 306 | thresh = (ev >> PM_THRESH_SH) & PM_THRESH_MSK; | ||
| 307 | mmcr0 |= thresh << 16; | ||
| 308 | if (tuse == 2 && (ev & PM_THRMULT_MSKS)) | ||
| 309 | mmcr2 = 0x80000000; | ||
| 310 | } | ||
| 311 | ev &= pmcsel_mask[pmc - 1]; | ||
| 312 | ev <<= pmcsel_shift[pmc - 1]; | ||
| 313 | if (pmc <= 2) | ||
| 314 | mmcr0 |= ev; | ||
| 315 | else | ||
| 316 | mmcr1 |= ev; | ||
| 317 | hwc[event_index[class][i]] = pmc - 1; | ||
| 318 | } | ||
| 319 | } | ||
| 320 | |||
| 321 | if (pmc_inuse & 1) | ||
| 322 | mmcr0 |= MMCR0_PMC1CE; | ||
| 323 | if (pmc_inuse & 0x3e) | ||
| 324 | mmcr0 |= MMCR0_PMCnCE; | ||
| 325 | |||
| 326 | /* Return MMCRx values */ | ||
| 327 | mmcr[0] = mmcr0; | ||
| 328 | mmcr[1] = mmcr1; | ||
| 329 | mmcr[2] = mmcr2; | ||
| 330 | return 0; | ||
| 331 | } | ||
| 332 | |||
| 333 | /* | ||
| 334 | * Disable counting by a PMC. | ||
| 335 | * Note that the pmc argument is 0-based here, not 1-based. | ||
| 336 | */ | ||
| 337 | static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
| 338 | { | ||
| 339 | if (pmc <= 1) | ||
| 340 | mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); | ||
| 341 | else | ||
| 342 | mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); | ||
| 343 | } | ||
| 344 | |||
| 345 | static int mpc7450_generic_events[] = { | ||
| 346 | [PERF_COUNT_HW_CPU_CYCLES] = 1, | ||
| 347 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, | ||
| 348 | [PERF_COUNT_HW_CACHE_MISSES] = 0x217, /* PM_L1_DCACHE_MISS */ | ||
| 349 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x122, /* PM_BR_CMPL */ | ||
| 350 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x41c, /* PM_BR_MPRED */ | ||
| 351 | }; | ||
| 352 | |||
| 353 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
| 354 | |||
| 355 | /* | ||
| 356 | * Table of generalized cache-related events. | ||
| 357 | * 0 means not supported, -1 means nonsensical, other values | ||
| 358 | * are event codes. | ||
| 359 | */ | ||
| 360 | static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
| 361 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 362 | [C(OP_READ)] = { 0, 0x225 }, | ||
| 363 | [C(OP_WRITE)] = { 0, 0x227 }, | ||
| 364 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
| 365 | }, | ||
| 366 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 367 | [C(OP_READ)] = { 0x129, 0x115 }, | ||
| 368 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 369 | [C(OP_PREFETCH)] = { 0x634, 0 }, | ||
| 370 | }, | ||
| 371 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 372 | [C(OP_READ)] = { 0, 0 }, | ||
| 373 | [C(OP_WRITE)] = { 0, 0 }, | ||
| 374 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
| 375 | }, | ||
| 376 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 377 | [C(OP_READ)] = { 0, 0x312 }, | ||
| 378 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 379 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 380 | }, | ||
| 381 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 382 | [C(OP_READ)] = { 0, 0x223 }, | ||
| 383 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 384 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 385 | }, | ||
| 386 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 387 | [C(OP_READ)] = { 0x122, 0x41c }, | ||
| 388 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 389 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 390 | }, | ||
| 391 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 392 | [C(OP_READ)] = { -1, -1 }, | ||
| 393 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 394 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 395 | }, | ||
| 396 | }; | ||
| 397 | |||
| 398 | struct power_pmu mpc7450_pmu = { | ||
| 399 | .name = "MPC7450 family", | ||
| 400 | .n_counter = N_COUNTER, | ||
| 401 | .max_alternatives = MAX_ALT, | ||
| 402 | .add_fields = 0x00111555ul, | ||
| 403 | .test_adder = 0x00301000ul, | ||
| 404 | .compute_mmcr = mpc7450_compute_mmcr, | ||
| 405 | .get_constraint = mpc7450_get_constraint, | ||
| 406 | .get_alternatives = mpc7450_get_alternatives, | ||
| 407 | .disable_pmc = mpc7450_disable_pmc, | ||
| 408 | .n_generic = ARRAY_SIZE(mpc7450_generic_events), | ||
| 409 | .generic_events = mpc7450_generic_events, | ||
| 410 | .cache_events = &mpc7450_cache_events, | ||
| 411 | }; | ||
| 412 | |||
| 413 | static int __init init_mpc7450_pmu(void) | ||
| 414 | { | ||
| 415 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
| 416 | strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450")) | ||
| 417 | return -ENODEV; | ||
| 418 | |||
| 419 | return register_power_pmu(&mpc7450_pmu); | ||
| 420 | } | ||
| 421 | |||
| 422 | early_initcall(init_mpc7450_pmu); | ||
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c new file mode 100644 index 00000000000..564c1d8bdb5 --- /dev/null +++ b/arch/powerpc/kernel/perf_callchain.c | |||
| @@ -0,0 +1,492 @@ | |||
| 1 | /* | ||
| 2 | * Performance counter callchain support - powerpc architecture code | ||
| 3 | * | ||
| 4 | * Copyright © 2009 Paul Mackerras, IBM Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public License | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the License, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | #include <linux/kernel.h> | ||
| 12 | #include <linux/sched.h> | ||
| 13 | #include <linux/perf_event.h> | ||
| 14 | #include <linux/percpu.h> | ||
| 15 | #include <linux/uaccess.h> | ||
| 16 | #include <linux/mm.h> | ||
| 17 | #include <asm/ptrace.h> | ||
| 18 | #include <asm/pgtable.h> | ||
| 19 | #include <asm/sigcontext.h> | ||
| 20 | #include <asm/ucontext.h> | ||
| 21 | #include <asm/vdso.h> | ||
| 22 | #ifdef CONFIG_PPC64 | ||
| 23 | #include "ppc32.h" | ||
| 24 | #endif | ||
| 25 | |||
| 26 | |||
| 27 | /* | ||
| 28 | * Is sp valid as the address of the next kernel stack frame after prev_sp? | ||
| 29 | * The next frame may be in a different stack area but should not go | ||
| 30 | * back down in the same stack area. | ||
| 31 | */ | ||
| 32 | static int valid_next_sp(unsigned long sp, unsigned long prev_sp) | ||
| 33 | { | ||
| 34 | if (sp & 0xf) | ||
| 35 | return 0; /* must be 16-byte aligned */ | ||
| 36 | if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) | ||
| 37 | return 0; | ||
| 38 | if (sp >= prev_sp + STACK_FRAME_OVERHEAD) | ||
| 39 | return 1; | ||
| 40 | /* | ||
| 41 | * sp could decrease when we jump off an interrupt stack | ||
| 42 | * back to the regular process stack. | ||
| 43 | */ | ||
| 44 | if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1))) | ||
| 45 | return 1; | ||
| 46 | return 0; | ||
| 47 | } | ||
| 48 | |||
| 49 | void | ||
| 50 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | ||
| 51 | { | ||
| 52 | unsigned long sp, next_sp; | ||
| 53 | unsigned long next_ip; | ||
| 54 | unsigned long lr; | ||
| 55 | long level = 0; | ||
| 56 | unsigned long *fp; | ||
| 57 | |||
| 58 | lr = regs->link; | ||
| 59 | sp = regs->gpr[1]; | ||
| 60 | perf_callchain_store(entry, regs->nip); | ||
| 61 | |||
| 62 | if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) | ||
| 63 | return; | ||
| 64 | |||
| 65 | for (;;) { | ||
| 66 | fp = (unsigned long *) sp; | ||
| 67 | next_sp = fp[0]; | ||
| 68 | |||
| 69 | if (next_sp == sp + STACK_INT_FRAME_SIZE && | ||
| 70 | fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { | ||
| 71 | /* | ||
| 72 | * This looks like an interrupt frame for an | ||
| 73 | * interrupt that occurred in the kernel | ||
| 74 | */ | ||
| 75 | regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD); | ||
| 76 | next_ip = regs->nip; | ||
| 77 | lr = regs->link; | ||
| 78 | level = 0; | ||
| 79 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); | ||
| 80 | |||
| 81 | } else { | ||
| 82 | if (level == 0) | ||
| 83 | next_ip = lr; | ||
| 84 | else | ||
| 85 | next_ip = fp[STACK_FRAME_LR_SAVE]; | ||
| 86 | |||
| 87 | /* | ||
| 88 | * We can't tell which of the first two addresses | ||
| 89 | * we get are valid, but we can filter out the | ||
| 90 | * obviously bogus ones here. We replace them | ||
| 91 | * with 0 rather than removing them entirely so | ||
| 92 | * that userspace can tell which is which. | ||
| 93 | */ | ||
| 94 | if ((level == 1 && next_ip == lr) || | ||
| 95 | (level <= 1 && !kernel_text_address(next_ip))) | ||
| 96 | next_ip = 0; | ||
| 97 | |||
| 98 | ++level; | ||
| 99 | } | ||
| 100 | |||
| 101 | perf_callchain_store(entry, next_ip); | ||
| 102 | if (!valid_next_sp(next_sp, sp)) | ||
| 103 | return; | ||
| 104 | sp = next_sp; | ||
| 105 | } | ||
| 106 | } | ||
| 107 | |||
| 108 | #ifdef CONFIG_PPC64 | ||
| 109 | /* | ||
| 110 | * On 64-bit we don't want to invoke hash_page on user addresses from | ||
| 111 | * interrupt context, so if the access faults, we read the page tables | ||
| 112 | * to find which page (if any) is mapped and access it directly. | ||
| 113 | */ | ||
| 114 | static int read_user_stack_slow(void __user *ptr, void *ret, int nb) | ||
| 115 | { | ||
| 116 | pgd_t *pgdir; | ||
| 117 | pte_t *ptep, pte; | ||
| 118 | unsigned shift; | ||
| 119 | unsigned long addr = (unsigned long) ptr; | ||
| 120 | unsigned long offset; | ||
| 121 | unsigned long pfn; | ||
| 122 | void *kaddr; | ||
| 123 | |||
| 124 | pgdir = current->mm->pgd; | ||
| 125 | if (!pgdir) | ||
| 126 | return -EFAULT; | ||
| 127 | |||
| 128 | ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift); | ||
| 129 | if (!shift) | ||
| 130 | shift = PAGE_SHIFT; | ||
| 131 | |||
| 132 | /* align address to page boundary */ | ||
| 133 | offset = addr & ((1UL << shift) - 1); | ||
| 134 | addr -= offset; | ||
| 135 | |||
| 136 | if (ptep == NULL) | ||
| 137 | return -EFAULT; | ||
| 138 | pte = *ptep; | ||
| 139 | if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER)) | ||
| 140 | return -EFAULT; | ||
| 141 | pfn = pte_pfn(pte); | ||
| 142 | if (!page_is_ram(pfn)) | ||
| 143 | return -EFAULT; | ||
| 144 | |||
| 145 | /* no highmem to worry about here */ | ||
| 146 | kaddr = pfn_to_kaddr(pfn); | ||
| 147 | memcpy(ret, kaddr + offset, nb); | ||
| 148 | return 0; | ||
| 149 | } | ||
| 150 | |||
| 151 | static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) | ||
| 152 | { | ||
| 153 | if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || | ||
| 154 | ((unsigned long)ptr & 7)) | ||
| 155 | return -EFAULT; | ||
| 156 | |||
| 157 | pagefault_disable(); | ||
| 158 | if (!__get_user_inatomic(*ret, ptr)) { | ||
| 159 | pagefault_enable(); | ||
| 160 | return 0; | ||
| 161 | } | ||
| 162 | pagefault_enable(); | ||
| 163 | |||
| 164 | return read_user_stack_slow(ptr, ret, 8); | ||
| 165 | } | ||
| 166 | |||
| 167 | static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) | ||
| 168 | { | ||
| 169 | if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || | ||
| 170 | ((unsigned long)ptr & 3)) | ||
| 171 | return -EFAULT; | ||
| 172 | |||
| 173 | pagefault_disable(); | ||
| 174 | if (!__get_user_inatomic(*ret, ptr)) { | ||
| 175 | pagefault_enable(); | ||
| 176 | return 0; | ||
| 177 | } | ||
| 178 | pagefault_enable(); | ||
| 179 | |||
| 180 | return read_user_stack_slow(ptr, ret, 4); | ||
| 181 | } | ||
| 182 | |||
| 183 | static inline int valid_user_sp(unsigned long sp, int is_64) | ||
| 184 | { | ||
| 185 | if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32) | ||
| 186 | return 0; | ||
| 187 | return 1; | ||
| 188 | } | ||
| 189 | |||
| 190 | /* | ||
| 191 | * 64-bit user processes use the same stack frame for RT and non-RT signals. | ||
| 192 | */ | ||
| 193 | struct signal_frame_64 { | ||
| 194 | char dummy[__SIGNAL_FRAMESIZE]; | ||
| 195 | struct ucontext uc; | ||
| 196 | unsigned long unused[2]; | ||
| 197 | unsigned int tramp[6]; | ||
| 198 | struct siginfo *pinfo; | ||
| 199 | void *puc; | ||
| 200 | struct siginfo info; | ||
| 201 | char abigap[288]; | ||
| 202 | }; | ||
| 203 | |||
| 204 | static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) | ||
| 205 | { | ||
| 206 | if (nip == fp + offsetof(struct signal_frame_64, tramp)) | ||
| 207 | return 1; | ||
| 208 | if (vdso64_rt_sigtramp && current->mm->context.vdso_base && | ||
| 209 | nip == current->mm->context.vdso_base + vdso64_rt_sigtramp) | ||
| 210 | return 1; | ||
| 211 | return 0; | ||
| 212 | } | ||
| 213 | |||
| 214 | /* | ||
| 215 | * Do some sanity checking on the signal frame pointed to by sp. | ||
| 216 | * We check the pinfo and puc pointers in the frame. | ||
| 217 | */ | ||
| 218 | static int sane_signal_64_frame(unsigned long sp) | ||
| 219 | { | ||
| 220 | struct signal_frame_64 __user *sf; | ||
| 221 | unsigned long pinfo, puc; | ||
| 222 | |||
| 223 | sf = (struct signal_frame_64 __user *) sp; | ||
| 224 | if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) || | ||
| 225 | read_user_stack_64((unsigned long __user *) &sf->puc, &puc)) | ||
| 226 | return 0; | ||
| 227 | return pinfo == (unsigned long) &sf->info && | ||
| 228 | puc == (unsigned long) &sf->uc; | ||
| 229 | } | ||
| 230 | |||
| 231 | static void perf_callchain_user_64(struct perf_callchain_entry *entry, | ||
| 232 | struct pt_regs *regs) | ||
| 233 | { | ||
| 234 | unsigned long sp, next_sp; | ||
| 235 | unsigned long next_ip; | ||
| 236 | unsigned long lr; | ||
| 237 | long level = 0; | ||
| 238 | struct signal_frame_64 __user *sigframe; | ||
| 239 | unsigned long __user *fp, *uregs; | ||
| 240 | |||
| 241 | next_ip = regs->nip; | ||
| 242 | lr = regs->link; | ||
| 243 | sp = regs->gpr[1]; | ||
| 244 | perf_callchain_store(entry, next_ip); | ||
| 245 | |||
| 246 | for (;;) { | ||
| 247 | fp = (unsigned long __user *) sp; | ||
| 248 | if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp)) | ||
| 249 | return; | ||
| 250 | if (level > 0 && read_user_stack_64(&fp[2], &next_ip)) | ||
| 251 | return; | ||
| 252 | |||
| 253 | /* | ||
| 254 | * Note: the next_sp - sp >= signal frame size check | ||
| 255 | * is true when next_sp < sp, which can happen when | ||
| 256 | * transitioning from an alternate signal stack to the | ||
| 257 | * normal stack. | ||
| 258 | */ | ||
| 259 | if (next_sp - sp >= sizeof(struct signal_frame_64) && | ||
| 260 | (is_sigreturn_64_address(next_ip, sp) || | ||
| 261 | (level <= 1 && is_sigreturn_64_address(lr, sp))) && | ||
| 262 | sane_signal_64_frame(sp)) { | ||
| 263 | /* | ||
| 264 | * This looks like an signal frame | ||
| 265 | */ | ||
| 266 | sigframe = (struct signal_frame_64 __user *) sp; | ||
| 267 | uregs = sigframe->uc.uc_mcontext.gp_regs; | ||
| 268 | if (read_user_stack_64(&uregs[PT_NIP], &next_ip) || | ||
| 269 | read_user_stack_64(&uregs[PT_LNK], &lr) || | ||
| 270 | read_user_stack_64(&uregs[PT_R1], &sp)) | ||
| 271 | return; | ||
| 272 | level = 0; | ||
| 273 | perf_callchain_store(entry, PERF_CONTEXT_USER); | ||
| 274 | perf_callchain_store(entry, next_ip); | ||
| 275 | continue; | ||
| 276 | } | ||
| 277 | |||
| 278 | if (level == 0) | ||
| 279 | next_ip = lr; | ||
| 280 | perf_callchain_store(entry, next_ip); | ||
| 281 | ++level; | ||
| 282 | sp = next_sp; | ||
| 283 | } | ||
| 284 | } | ||
| 285 | |||
| 286 | static inline int current_is_64bit(void) | ||
| 287 | { | ||
| 288 | /* | ||
| 289 | * We can't use test_thread_flag() here because we may be on an | ||
| 290 | * interrupt stack, and the thread flags don't get copied over | ||
| 291 | * from the thread_info on the main stack to the interrupt stack. | ||
| 292 | */ | ||
| 293 | return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT); | ||
| 294 | } | ||
| 295 | |||
| 296 | #else /* CONFIG_PPC64 */ | ||
| 297 | /* | ||
| 298 | * On 32-bit we just access the address and let hash_page create a | ||
| 299 | * HPTE if necessary, so there is no need to fall back to reading | ||
| 300 | * the page tables. Since this is called at interrupt level, | ||
| 301 | * do_page_fault() won't treat a DSI as a page fault. | ||
| 302 | */ | ||
| 303 | static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) | ||
| 304 | { | ||
| 305 | int rc; | ||
| 306 | |||
| 307 | if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || | ||
| 308 | ((unsigned long)ptr & 3)) | ||
| 309 | return -EFAULT; | ||
| 310 | |||
| 311 | pagefault_disable(); | ||
| 312 | rc = __get_user_inatomic(*ret, ptr); | ||
| 313 | pagefault_enable(); | ||
| 314 | |||
| 315 | return rc; | ||
| 316 | } | ||
| 317 | |||
| 318 | static inline void perf_callchain_user_64(struct perf_callchain_entry *entry, | ||
| 319 | struct pt_regs *regs) | ||
| 320 | { | ||
| 321 | } | ||
| 322 | |||
| 323 | static inline int current_is_64bit(void) | ||
| 324 | { | ||
| 325 | return 0; | ||
| 326 | } | ||
| 327 | |||
| 328 | static inline int valid_user_sp(unsigned long sp, int is_64) | ||
| 329 | { | ||
| 330 | if (!sp || (sp & 7) || sp > TASK_SIZE - 32) | ||
| 331 | return 0; | ||
| 332 | return 1; | ||
| 333 | } | ||
| 334 | |||
| 335 | #define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE | ||
| 336 | #define sigcontext32 sigcontext | ||
| 337 | #define mcontext32 mcontext | ||
| 338 | #define ucontext32 ucontext | ||
| 339 | #define compat_siginfo_t struct siginfo | ||
| 340 | |||
| 341 | #endif /* CONFIG_PPC64 */ | ||
| 342 | |||
| 343 | /* | ||
| 344 | * Layout for non-RT signal frames | ||
| 345 | */ | ||
| 346 | struct signal_frame_32 { | ||
| 347 | char dummy[__SIGNAL_FRAMESIZE32]; | ||
| 348 | struct sigcontext32 sctx; | ||
| 349 | struct mcontext32 mctx; | ||
| 350 | int abigap[56]; | ||
| 351 | }; | ||
| 352 | |||
| 353 | /* | ||
| 354 | * Layout for RT signal frames | ||
| 355 | */ | ||
| 356 | struct rt_signal_frame_32 { | ||
| 357 | char dummy[__SIGNAL_FRAMESIZE32 + 16]; | ||
| 358 | compat_siginfo_t info; | ||
| 359 | struct ucontext32 uc; | ||
| 360 | int abigap[56]; | ||
| 361 | }; | ||
| 362 | |||
| 363 | static int is_sigreturn_32_address(unsigned int nip, unsigned int fp) | ||
| 364 | { | ||
| 365 | if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad)) | ||
| 366 | return 1; | ||
| 367 | if (vdso32_sigtramp && current->mm->context.vdso_base && | ||
| 368 | nip == current->mm->context.vdso_base + vdso32_sigtramp) | ||
| 369 | return 1; | ||
| 370 | return 0; | ||
| 371 | } | ||
| 372 | |||
| 373 | static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) | ||
| 374 | { | ||
| 375 | if (nip == fp + offsetof(struct rt_signal_frame_32, | ||
| 376 | uc.uc_mcontext.mc_pad)) | ||
| 377 | return 1; | ||
| 378 | if (vdso32_rt_sigtramp && current->mm->context.vdso_base && | ||
| 379 | nip == current->mm->context.vdso_base + vdso32_rt_sigtramp) | ||
| 380 | return 1; | ||
| 381 | return 0; | ||
| 382 | } | ||
| 383 | |||
| 384 | static int sane_signal_32_frame(unsigned int sp) | ||
| 385 | { | ||
| 386 | struct signal_frame_32 __user *sf; | ||
| 387 | unsigned int regs; | ||
| 388 | |||
| 389 | sf = (struct signal_frame_32 __user *) (unsigned long) sp; | ||
| 390 | if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, ®s)) | ||
| 391 | return 0; | ||
| 392 | return regs == (unsigned long) &sf->mctx; | ||
| 393 | } | ||
| 394 | |||
| 395 | static int sane_rt_signal_32_frame(unsigned int sp) | ||
| 396 | { | ||
| 397 | struct rt_signal_frame_32 __user *sf; | ||
| 398 | unsigned int regs; | ||
| 399 | |||
| 400 | sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; | ||
| 401 | if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, ®s)) | ||
| 402 | return 0; | ||
| 403 | return regs == (unsigned long) &sf->uc.uc_mcontext; | ||
| 404 | } | ||
| 405 | |||
| 406 | static unsigned int __user *signal_frame_32_regs(unsigned int sp, | ||
| 407 | unsigned int next_sp, unsigned int next_ip) | ||
| 408 | { | ||
| 409 | struct mcontext32 __user *mctx = NULL; | ||
| 410 | struct signal_frame_32 __user *sf; | ||
| 411 | struct rt_signal_frame_32 __user *rt_sf; | ||
| 412 | |||
| 413 | /* | ||
| 414 | * Note: the next_sp - sp >= signal frame size check | ||
| 415 | * is true when next_sp < sp, for example, when | ||
| 416 | * transitioning from an alternate signal stack to the | ||
| 417 | * normal stack. | ||
| 418 | */ | ||
| 419 | if (next_sp - sp >= sizeof(struct signal_frame_32) && | ||
| 420 | is_sigreturn_32_address(next_ip, sp) && | ||
| 421 | sane_signal_32_frame(sp)) { | ||
| 422 | sf = (struct signal_frame_32 __user *) (unsigned long) sp; | ||
| 423 | mctx = &sf->mctx; | ||
| 424 | } | ||
| 425 | |||
| 426 | if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) && | ||
| 427 | is_rt_sigreturn_32_address(next_ip, sp) && | ||
| 428 | sane_rt_signal_32_frame(sp)) { | ||
| 429 | rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; | ||
| 430 | mctx = &rt_sf->uc.uc_mcontext; | ||
| 431 | } | ||
| 432 | |||
| 433 | if (!mctx) | ||
| 434 | return NULL; | ||
| 435 | return mctx->mc_gregs; | ||
| 436 | } | ||
| 437 | |||
| 438 | static void perf_callchain_user_32(struct perf_callchain_entry *entry, | ||
| 439 | struct pt_regs *regs) | ||
| 440 | { | ||
| 441 | unsigned int sp, next_sp; | ||
| 442 | unsigned int next_ip; | ||
| 443 | unsigned int lr; | ||
| 444 | long level = 0; | ||
| 445 | unsigned int __user *fp, *uregs; | ||
| 446 | |||
| 447 | next_ip = regs->nip; | ||
| 448 | lr = regs->link; | ||
| 449 | sp = regs->gpr[1]; | ||
| 450 | perf_callchain_store(entry, next_ip); | ||
| 451 | |||
| 452 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | ||
| 453 | fp = (unsigned int __user *) (unsigned long) sp; | ||
| 454 | if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp)) | ||
| 455 | return; | ||
| 456 | if (level > 0 && read_user_stack_32(&fp[1], &next_ip)) | ||
| 457 | return; | ||
| 458 | |||
| 459 | uregs = signal_frame_32_regs(sp, next_sp, next_ip); | ||
| 460 | if (!uregs && level <= 1) | ||
| 461 | uregs = signal_frame_32_regs(sp, next_sp, lr); | ||
| 462 | if (uregs) { | ||
| 463 | /* | ||
| 464 | * This looks like an signal frame, so restart | ||
| 465 | * the stack trace with the values in it. | ||
| 466 | */ | ||
| 467 | if (read_user_stack_32(&uregs[PT_NIP], &next_ip) || | ||
| 468 | read_user_stack_32(&uregs[PT_LNK], &lr) || | ||
| 469 | read_user_stack_32(&uregs[PT_R1], &sp)) | ||
| 470 | return; | ||
| 471 | level = 0; | ||
| 472 | perf_callchain_store(entry, PERF_CONTEXT_USER); | ||
| 473 | perf_callchain_store(entry, next_ip); | ||
| 474 | continue; | ||
| 475 | } | ||
| 476 | |||
| 477 | if (level == 0) | ||
| 478 | next_ip = lr; | ||
| 479 | perf_callchain_store(entry, next_ip); | ||
| 480 | ++level; | ||
| 481 | sp = next_sp; | ||
| 482 | } | ||
| 483 | } | ||
| 484 | |||
| 485 | void | ||
| 486 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | ||
| 487 | { | ||
| 488 | if (current_is_64bit()) | ||
| 489 | perf_callchain_user_64(entry, regs); | ||
| 490 | else | ||
| 491 | perf_callchain_user_32(entry, regs); | ||
| 492 | } | ||
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c new file mode 100644 index 00000000000..10a140f82cb --- /dev/null +++ b/arch/powerpc/kernel/perf_event.c | |||
| @@ -0,0 +1,1432 @@ | |||
| 1 | /* | ||
| 2 | * Performance event support - powerpc architecture code | ||
| 3 | * | ||
| 4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public License | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the License, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | #include <linux/kernel.h> | ||
| 12 | #include <linux/sched.h> | ||
| 13 | #include <linux/perf_event.h> | ||
| 14 | #include <linux/percpu.h> | ||
| 15 | #include <linux/hardirq.h> | ||
| 16 | #include <asm/reg.h> | ||
| 17 | #include <asm/pmc.h> | ||
| 18 | #include <asm/machdep.h> | ||
| 19 | #include <asm/firmware.h> | ||
| 20 | #include <asm/ptrace.h> | ||
| 21 | |||
| 22 | struct cpu_hw_events { | ||
| 23 | int n_events; | ||
| 24 | int n_percpu; | ||
| 25 | int disabled; | ||
| 26 | int n_added; | ||
| 27 | int n_limited; | ||
| 28 | u8 pmcs_enabled; | ||
| 29 | struct perf_event *event[MAX_HWEVENTS]; | ||
| 30 | u64 events[MAX_HWEVENTS]; | ||
| 31 | unsigned int flags[MAX_HWEVENTS]; | ||
| 32 | unsigned long mmcr[3]; | ||
| 33 | struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS]; | ||
| 34 | u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; | ||
| 35 | u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; | ||
| 36 | unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; | ||
| 37 | unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; | ||
| 38 | |||
| 39 | unsigned int group_flag; | ||
| 40 | int n_txn_start; | ||
| 41 | }; | ||
| 42 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | ||
| 43 | |||
| 44 | struct power_pmu *ppmu; | ||
| 45 | |||
| 46 | /* | ||
| 47 | * Normally, to ignore kernel events we set the FCS (freeze counters | ||
| 48 | * in supervisor mode) bit in MMCR0, but if the kernel runs with the | ||
| 49 | * hypervisor bit set in the MSR, or if we are running on a processor | ||
| 50 | * where the hypervisor bit is forced to 1 (as on Apple G5 processors), | ||
| 51 | * then we need to use the FCHV bit to ignore kernel events. | ||
| 52 | */ | ||
| 53 | static unsigned int freeze_events_kernel = MMCR0_FCS; | ||
| 54 | |||
| 55 | /* | ||
| 56 | * 32-bit doesn't have MMCRA but does have an MMCR2, | ||
| 57 | * and a few other names are different. | ||
| 58 | */ | ||
| 59 | #ifdef CONFIG_PPC32 | ||
| 60 | |||
| 61 | #define MMCR0_FCHV 0 | ||
| 62 | #define MMCR0_PMCjCE MMCR0_PMCnCE | ||
| 63 | |||
| 64 | #define SPRN_MMCRA SPRN_MMCR2 | ||
| 65 | #define MMCRA_SAMPLE_ENABLE 0 | ||
| 66 | |||
| 67 | static inline unsigned long perf_ip_adjust(struct pt_regs *regs) | ||
| 68 | { | ||
| 69 | return 0; | ||
| 70 | } | ||
| 71 | static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } | ||
| 72 | static inline u32 perf_get_misc_flags(struct pt_regs *regs) | ||
| 73 | { | ||
| 74 | return 0; | ||
| 75 | } | ||
| 76 | static inline void perf_read_regs(struct pt_regs *regs) { } | ||
| 77 | static inline int perf_intr_is_nmi(struct pt_regs *regs) | ||
| 78 | { | ||
| 79 | return 0; | ||
| 80 | } | ||
| 81 | |||
| 82 | #endif /* CONFIG_PPC32 */ | ||
| 83 | |||
| 84 | /* | ||
| 85 | * Things that are specific to 64-bit implementations. | ||
| 86 | */ | ||
| 87 | #ifdef CONFIG_PPC64 | ||
| 88 | |||
| 89 | static inline unsigned long perf_ip_adjust(struct pt_regs *regs) | ||
| 90 | { | ||
| 91 | unsigned long mmcra = regs->dsisr; | ||
| 92 | |||
| 93 | if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { | ||
| 94 | unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; | ||
| 95 | if (slot > 1) | ||
| 96 | return 4 * (slot - 1); | ||
| 97 | } | ||
| 98 | return 0; | ||
| 99 | } | ||
| 100 | |||
| 101 | /* | ||
| 102 | * The user wants a data address recorded. | ||
| 103 | * If we're not doing instruction sampling, give them the SDAR | ||
| 104 | * (sampled data address). If we are doing instruction sampling, then | ||
| 105 | * only give them the SDAR if it corresponds to the instruction | ||
| 106 | * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC | ||
| 107 | * bit in MMCRA. | ||
| 108 | */ | ||
| 109 | static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) | ||
| 110 | { | ||
| 111 | unsigned long mmcra = regs->dsisr; | ||
| 112 | unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? | ||
| 113 | POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; | ||
| 114 | |||
| 115 | if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) | ||
| 116 | *addrp = mfspr(SPRN_SDAR); | ||
| 117 | } | ||
| 118 | |||
| 119 | static inline u32 perf_get_misc_flags(struct pt_regs *regs) | ||
| 120 | { | ||
| 121 | unsigned long mmcra = regs->dsisr; | ||
| 122 | unsigned long sihv = MMCRA_SIHV; | ||
| 123 | unsigned long sipr = MMCRA_SIPR; | ||
| 124 | |||
| 125 | if (TRAP(regs) != 0xf00) | ||
| 126 | return 0; /* not a PMU interrupt */ | ||
| 127 | |||
| 128 | if (ppmu->flags & PPMU_ALT_SIPR) { | ||
| 129 | sihv = POWER6_MMCRA_SIHV; | ||
| 130 | sipr = POWER6_MMCRA_SIPR; | ||
| 131 | } | ||
| 132 | |||
| 133 | /* PR has priority over HV, so order below is important */ | ||
| 134 | if (mmcra & sipr) | ||
| 135 | return PERF_RECORD_MISC_USER; | ||
| 136 | if ((mmcra & sihv) && (freeze_events_kernel != MMCR0_FCHV)) | ||
| 137 | return PERF_RECORD_MISC_HYPERVISOR; | ||
| 138 | return PERF_RECORD_MISC_KERNEL; | ||
| 139 | } | ||
| 140 | |||
| 141 | /* | ||
| 142 | * Overload regs->dsisr to store MMCRA so we only need to read it once | ||
| 143 | * on each interrupt. | ||
| 144 | */ | ||
| 145 | static inline void perf_read_regs(struct pt_regs *regs) | ||
| 146 | { | ||
| 147 | regs->dsisr = mfspr(SPRN_MMCRA); | ||
| 148 | } | ||
| 149 | |||
| 150 | /* | ||
| 151 | * If interrupts were soft-disabled when a PMU interrupt occurs, treat | ||
| 152 | * it as an NMI. | ||
| 153 | */ | ||
| 154 | static inline int perf_intr_is_nmi(struct pt_regs *regs) | ||
| 155 | { | ||
| 156 | return !regs->softe; | ||
| 157 | } | ||
| 158 | |||
| 159 | #endif /* CONFIG_PPC64 */ | ||
| 160 | |||
| 161 | static void perf_event_interrupt(struct pt_regs *regs); | ||
| 162 | |||
| 163 | void perf_event_print_debug(void) | ||
| 164 | { | ||
| 165 | } | ||
| 166 | |||
| 167 | /* | ||
| 168 | * Read one performance monitor counter (PMC). | ||
| 169 | */ | ||
| 170 | static unsigned long read_pmc(int idx) | ||
| 171 | { | ||
| 172 | unsigned long val; | ||
| 173 | |||
| 174 | switch (idx) { | ||
| 175 | case 1: | ||
| 176 | val = mfspr(SPRN_PMC1); | ||
| 177 | break; | ||
| 178 | case 2: | ||
| 179 | val = mfspr(SPRN_PMC2); | ||
| 180 | break; | ||
| 181 | case 3: | ||
| 182 | val = mfspr(SPRN_PMC3); | ||
| 183 | break; | ||
| 184 | case 4: | ||
| 185 | val = mfspr(SPRN_PMC4); | ||
| 186 | break; | ||
| 187 | case 5: | ||
| 188 | val = mfspr(SPRN_PMC5); | ||
| 189 | break; | ||
| 190 | case 6: | ||
| 191 | val = mfspr(SPRN_PMC6); | ||
| 192 | break; | ||
| 193 | #ifdef CONFIG_PPC64 | ||
| 194 | case 7: | ||
| 195 | val = mfspr(SPRN_PMC7); | ||
| 196 | break; | ||
| 197 | case 8: | ||
| 198 | val = mfspr(SPRN_PMC8); | ||
| 199 | break; | ||
| 200 | #endif /* CONFIG_PPC64 */ | ||
| 201 | default: | ||
| 202 | printk(KERN_ERR "oops trying to read PMC%d\n", idx); | ||
| 203 | val = 0; | ||
| 204 | } | ||
| 205 | return val; | ||
| 206 | } | ||
| 207 | |||
| 208 | /* | ||
| 209 | * Write one PMC. | ||
| 210 | */ | ||
| 211 | static void write_pmc(int idx, unsigned long val) | ||
| 212 | { | ||
| 213 | switch (idx) { | ||
| 214 | case 1: | ||
| 215 | mtspr(SPRN_PMC1, val); | ||
| 216 | break; | ||
| 217 | case 2: | ||
| 218 | mtspr(SPRN_PMC2, val); | ||
| 219 | break; | ||
| 220 | case 3: | ||
| 221 | mtspr(SPRN_PMC3, val); | ||
| 222 | break; | ||
| 223 | case 4: | ||
| 224 | mtspr(SPRN_PMC4, val); | ||
| 225 | break; | ||
| 226 | case 5: | ||
| 227 | mtspr(SPRN_PMC5, val); | ||
| 228 | break; | ||
| 229 | case 6: | ||
| 230 | mtspr(SPRN_PMC6, val); | ||
| 231 | break; | ||
| 232 | #ifdef CONFIG_PPC64 | ||
| 233 | case 7: | ||
| 234 | mtspr(SPRN_PMC7, val); | ||
| 235 | break; | ||
| 236 | case 8: | ||
| 237 | mtspr(SPRN_PMC8, val); | ||
| 238 | break; | ||
| 239 | #endif /* CONFIG_PPC64 */ | ||
| 240 | default: | ||
| 241 | printk(KERN_ERR "oops trying to write PMC%d\n", idx); | ||
| 242 | } | ||
| 243 | } | ||
| 244 | |||
| 245 | /* | ||
| 246 | * Check if a set of events can all go on the PMU at once. | ||
| 247 | * If they can't, this will look at alternative codes for the events | ||
| 248 | * and see if any combination of alternative codes is feasible. | ||
| 249 | * The feasible set is returned in event_id[]. | ||
| 250 | */ | ||
| 251 | static int power_check_constraints(struct cpu_hw_events *cpuhw, | ||
| 252 | u64 event_id[], unsigned int cflags[], | ||
| 253 | int n_ev) | ||
| 254 | { | ||
| 255 | unsigned long mask, value, nv; | ||
| 256 | unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS]; | ||
| 257 | int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS]; | ||
| 258 | int i, j; | ||
| 259 | unsigned long addf = ppmu->add_fields; | ||
| 260 | unsigned long tadd = ppmu->test_adder; | ||
| 261 | |||
| 262 | if (n_ev > ppmu->n_counter) | ||
| 263 | return -1; | ||
| 264 | |||
| 265 | /* First see if the events will go on as-is */ | ||
| 266 | for (i = 0; i < n_ev; ++i) { | ||
| 267 | if ((cflags[i] & PPMU_LIMITED_PMC_REQD) | ||
| 268 | && !ppmu->limited_pmc_event(event_id[i])) { | ||
| 269 | ppmu->get_alternatives(event_id[i], cflags[i], | ||
| 270 | cpuhw->alternatives[i]); | ||
| 271 | event_id[i] = cpuhw->alternatives[i][0]; | ||
| 272 | } | ||
| 273 | if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0], | ||
| 274 | &cpuhw->avalues[i][0])) | ||
| 275 | return -1; | ||
| 276 | } | ||
| 277 | value = mask = 0; | ||
| 278 | for (i = 0; i < n_ev; ++i) { | ||
| 279 | nv = (value | cpuhw->avalues[i][0]) + | ||
| 280 | (value & cpuhw->avalues[i][0] & addf); | ||
| 281 | if ((((nv + tadd) ^ value) & mask) != 0 || | ||
| 282 | (((nv + tadd) ^ cpuhw->avalues[i][0]) & | ||
| 283 | cpuhw->amasks[i][0]) != 0) | ||
| 284 | break; | ||
| 285 | value = nv; | ||
| 286 | mask |= cpuhw->amasks[i][0]; | ||
| 287 | } | ||
| 288 | if (i == n_ev) | ||
| 289 | return 0; /* all OK */ | ||
| 290 | |||
| 291 | /* doesn't work, gather alternatives... */ | ||
| 292 | if (!ppmu->get_alternatives) | ||
| 293 | return -1; | ||
| 294 | for (i = 0; i < n_ev; ++i) { | ||
| 295 | choice[i] = 0; | ||
| 296 | n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i], | ||
| 297 | cpuhw->alternatives[i]); | ||
| 298 | for (j = 1; j < n_alt[i]; ++j) | ||
| 299 | ppmu->get_constraint(cpuhw->alternatives[i][j], | ||
| 300 | &cpuhw->amasks[i][j], | ||
| 301 | &cpuhw->avalues[i][j]); | ||
| 302 | } | ||
| 303 | |||
| 304 | /* enumerate all possibilities and see if any will work */ | ||
| 305 | i = 0; | ||
| 306 | j = -1; | ||
| 307 | value = mask = nv = 0; | ||
| 308 | while (i < n_ev) { | ||
| 309 | if (j >= 0) { | ||
| 310 | /* we're backtracking, restore context */ | ||
| 311 | value = svalues[i]; | ||
| 312 | mask = smasks[i]; | ||
| 313 | j = choice[i]; | ||
| 314 | } | ||
| 315 | /* | ||
| 316 | * See if any alternative k for event_id i, | ||
| 317 | * where k > j, will satisfy the constraints. | ||
| 318 | */ | ||
| 319 | while (++j < n_alt[i]) { | ||
| 320 | nv = (value | cpuhw->avalues[i][j]) + | ||
| 321 | (value & cpuhw->avalues[i][j] & addf); | ||
| 322 | if ((((nv + tadd) ^ value) & mask) == 0 && | ||
| 323 | (((nv + tadd) ^ cpuhw->avalues[i][j]) | ||
| 324 | & cpuhw->amasks[i][j]) == 0) | ||
| 325 | break; | ||
| 326 | } | ||
| 327 | if (j >= n_alt[i]) { | ||
| 328 | /* | ||
| 329 | * No feasible alternative, backtrack | ||
| 330 | * to event_id i-1 and continue enumerating its | ||
| 331 | * alternatives from where we got up to. | ||
| 332 | */ | ||
| 333 | if (--i < 0) | ||
| 334 | return -1; | ||
| 335 | } else { | ||
| 336 | /* | ||
| 337 | * Found a feasible alternative for event_id i, | ||
| 338 | * remember where we got up to with this event_id, | ||
| 339 | * go on to the next event_id, and start with | ||
| 340 | * the first alternative for it. | ||
| 341 | */ | ||
| 342 | choice[i] = j; | ||
| 343 | svalues[i] = value; | ||
| 344 | smasks[i] = mask; | ||
| 345 | value = nv; | ||
| 346 | mask |= cpuhw->amasks[i][j]; | ||
| 347 | ++i; | ||
| 348 | j = -1; | ||
| 349 | } | ||
| 350 | } | ||
| 351 | |||
| 352 | /* OK, we have a feasible combination, tell the caller the solution */ | ||
| 353 | for (i = 0; i < n_ev; ++i) | ||
| 354 | event_id[i] = cpuhw->alternatives[i][choice[i]]; | ||
| 355 | return 0; | ||
| 356 | } | ||
| 357 | |||
| 358 | /* | ||
| 359 | * Check if newly-added events have consistent settings for | ||
| 360 | * exclude_{user,kernel,hv} with each other and any previously | ||
| 361 | * added events. | ||
| 362 | */ | ||
| 363 | static int check_excludes(struct perf_event **ctrs, unsigned int cflags[], | ||
| 364 | int n_prev, int n_new) | ||
| 365 | { | ||
| 366 | int eu = 0, ek = 0, eh = 0; | ||
| 367 | int i, n, first; | ||
| 368 | struct perf_event *event; | ||
| 369 | |||
| 370 | n = n_prev + n_new; | ||
| 371 | if (n <= 1) | ||
| 372 | return 0; | ||
| 373 | |||
| 374 | first = 1; | ||
| 375 | for (i = 0; i < n; ++i) { | ||
| 376 | if (cflags[i] & PPMU_LIMITED_PMC_OK) { | ||
| 377 | cflags[i] &= ~PPMU_LIMITED_PMC_REQD; | ||
| 378 | continue; | ||
| 379 | } | ||
| 380 | event = ctrs[i]; | ||
| 381 | if (first) { | ||
| 382 | eu = event->attr.exclude_user; | ||
| 383 | ek = event->attr.exclude_kernel; | ||
| 384 | eh = event->attr.exclude_hv; | ||
| 385 | first = 0; | ||
| 386 | } else if (event->attr.exclude_user != eu || | ||
| 387 | event->attr.exclude_kernel != ek || | ||
| 388 | event->attr.exclude_hv != eh) { | ||
| 389 | return -EAGAIN; | ||
| 390 | } | ||
| 391 | } | ||
| 392 | |||
| 393 | if (eu || ek || eh) | ||
| 394 | for (i = 0; i < n; ++i) | ||
| 395 | if (cflags[i] & PPMU_LIMITED_PMC_OK) | ||
| 396 | cflags[i] |= PPMU_LIMITED_PMC_REQD; | ||
| 397 | |||
| 398 | return 0; | ||
| 399 | } | ||
| 400 | |||
| 401 | static u64 check_and_compute_delta(u64 prev, u64 val) | ||
| 402 | { | ||
| 403 | u64 delta = (val - prev) & 0xfffffffful; | ||
| 404 | |||
| 405 | /* | ||
| 406 | * POWER7 can roll back counter values, if the new value is smaller | ||
| 407 | * than the previous value it will cause the delta and the counter to | ||
| 408 | * have bogus values unless we rolled a counter over. If a coutner is | ||
| 409 | * rolled back, it will be smaller, but within 256, which is the maximum | ||
| 410 | * number of events to rollback at once. If we dectect a rollback | ||
| 411 | * return 0. This can lead to a small lack of precision in the | ||
| 412 | * counters. | ||
| 413 | */ | ||
| 414 | if (prev > val && (prev - val) < 256) | ||
| 415 | delta = 0; | ||
| 416 | |||
| 417 | return delta; | ||
| 418 | } | ||
| 419 | |||
| 420 | static void power_pmu_read(struct perf_event *event) | ||
| 421 | { | ||
| 422 | s64 val, delta, prev; | ||
| 423 | |||
| 424 | if (event->hw.state & PERF_HES_STOPPED) | ||
| 425 | return; | ||
| 426 | |||
| 427 | if (!event->hw.idx) | ||
| 428 | return; | ||
| 429 | /* | ||
| 430 | * Performance monitor interrupts come even when interrupts | ||
| 431 | * are soft-disabled, as long as interrupts are hard-enabled. | ||
| 432 | * Therefore we treat them like NMIs. | ||
| 433 | */ | ||
| 434 | do { | ||
| 435 | prev = local64_read(&event->hw.prev_count); | ||
| 436 | barrier(); | ||
| 437 | val = read_pmc(event->hw.idx); | ||
| 438 | delta = check_and_compute_delta(prev, val); | ||
| 439 | if (!delta) | ||
| 440 | return; | ||
| 441 | } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); | ||
| 442 | |||
| 443 | local64_add(delta, &event->count); | ||
| 444 | local64_sub(delta, &event->hw.period_left); | ||
| 445 | } | ||
| 446 | |||
| 447 | /* | ||
| 448 | * On some machines, PMC5 and PMC6 can't be written, don't respect | ||
| 449 | * the freeze conditions, and don't generate interrupts. This tells | ||
| 450 | * us if `event' is using such a PMC. | ||
| 451 | */ | ||
| 452 | static int is_limited_pmc(int pmcnum) | ||
| 453 | { | ||
| 454 | return (ppmu->flags & PPMU_LIMITED_PMC5_6) | ||
| 455 | && (pmcnum == 5 || pmcnum == 6); | ||
| 456 | } | ||
| 457 | |||
| 458 | static void freeze_limited_counters(struct cpu_hw_events *cpuhw, | ||
| 459 | unsigned long pmc5, unsigned long pmc6) | ||
| 460 | { | ||
| 461 | struct perf_event *event; | ||
| 462 | u64 val, prev, delta; | ||
| 463 | int i; | ||
| 464 | |||
| 465 | for (i = 0; i < cpuhw->n_limited; ++i) { | ||
| 466 | event = cpuhw->limited_counter[i]; | ||
| 467 | if (!event->hw.idx) | ||
| 468 | continue; | ||
| 469 | val = (event->hw.idx == 5) ? pmc5 : pmc6; | ||
| 470 | prev = local64_read(&event->hw.prev_count); | ||
| 471 | event->hw.idx = 0; | ||
| 472 | delta = check_and_compute_delta(prev, val); | ||
| 473 | if (delta) | ||
| 474 | local64_add(delta, &event->count); | ||
| 475 | } | ||
| 476 | } | ||
| 477 | |||
| 478 | static void thaw_limited_counters(struct cpu_hw_events *cpuhw, | ||
| 479 | unsigned long pmc5, unsigned long pmc6) | ||
| 480 | { | ||
| 481 | struct perf_event *event; | ||
| 482 | u64 val, prev; | ||
| 483 | int i; | ||
| 484 | |||
| 485 | for (i = 0; i < cpuhw->n_limited; ++i) { | ||
| 486 | event = cpuhw->limited_counter[i]; | ||
| 487 | event->hw.idx = cpuhw->limited_hwidx[i]; | ||
| 488 | val = (event->hw.idx == 5) ? pmc5 : pmc6; | ||
| 489 | prev = local64_read(&event->hw.prev_count); | ||
| 490 | if (check_and_compute_delta(prev, val)) | ||
| 491 | local64_set(&event->hw.prev_count, val); | ||
| 492 | perf_event_update_userpage(event); | ||
| 493 | } | ||
| 494 | } | ||
| 495 | |||
| 496 | /* | ||
| 497 | * Since limited events don't respect the freeze conditions, we | ||
| 498 | * have to read them immediately after freezing or unfreezing the | ||
| 499 | * other events. We try to keep the values from the limited | ||
| 500 | * events as consistent as possible by keeping the delay (in | ||
| 501 | * cycles and instructions) between freezing/unfreezing and reading | ||
| 502 | * the limited events as small and consistent as possible. | ||
| 503 | * Therefore, if any limited events are in use, we read them | ||
| 504 | * both, and always in the same order, to minimize variability, | ||
| 505 | * and do it inside the same asm that writes MMCR0. | ||
| 506 | */ | ||
| 507 | static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) | ||
| 508 | { | ||
| 509 | unsigned long pmc5, pmc6; | ||
| 510 | |||
| 511 | if (!cpuhw->n_limited) { | ||
| 512 | mtspr(SPRN_MMCR0, mmcr0); | ||
| 513 | return; | ||
| 514 | } | ||
| 515 | |||
| 516 | /* | ||
| 517 | * Write MMCR0, then read PMC5 and PMC6 immediately. | ||
| 518 | * To ensure we don't get a performance monitor interrupt | ||
| 519 | * between writing MMCR0 and freezing/thawing the limited | ||
| 520 | * events, we first write MMCR0 with the event overflow | ||
| 521 | * interrupt enable bits turned off. | ||
| 522 | */ | ||
| 523 | asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" | ||
| 524 | : "=&r" (pmc5), "=&r" (pmc6) | ||
| 525 | : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), | ||
| 526 | "i" (SPRN_MMCR0), | ||
| 527 | "i" (SPRN_PMC5), "i" (SPRN_PMC6)); | ||
| 528 | |||
| 529 | if (mmcr0 & MMCR0_FC) | ||
| 530 | freeze_limited_counters(cpuhw, pmc5, pmc6); | ||
| 531 | else | ||
| 532 | thaw_limited_counters(cpuhw, pmc5, pmc6); | ||
| 533 | |||
| 534 | /* | ||
| 535 | * Write the full MMCR0 including the event overflow interrupt | ||
| 536 | * enable bits, if necessary. | ||
| 537 | */ | ||
| 538 | if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) | ||
| 539 | mtspr(SPRN_MMCR0, mmcr0); | ||
| 540 | } | ||
| 541 | |||
| 542 | /* | ||
| 543 | * Disable all events to prevent PMU interrupts and to allow | ||
| 544 | * events to be added or removed. | ||
| 545 | */ | ||
| 546 | static void power_pmu_disable(struct pmu *pmu) | ||
| 547 | { | ||
| 548 | struct cpu_hw_events *cpuhw; | ||
| 549 | unsigned long flags; | ||
| 550 | |||
| 551 | if (!ppmu) | ||
| 552 | return; | ||
| 553 | local_irq_save(flags); | ||
| 554 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
| 555 | |||
| 556 | if (!cpuhw->disabled) { | ||
| 557 | cpuhw->disabled = 1; | ||
| 558 | cpuhw->n_added = 0; | ||
| 559 | |||
| 560 | /* | ||
| 561 | * Check if we ever enabled the PMU on this cpu. | ||
| 562 | */ | ||
| 563 | if (!cpuhw->pmcs_enabled) { | ||
| 564 | ppc_enable_pmcs(); | ||
| 565 | cpuhw->pmcs_enabled = 1; | ||
| 566 | } | ||
| 567 | |||
| 568 | /* | ||
| 569 | * Disable instruction sampling if it was enabled | ||
| 570 | */ | ||
| 571 | if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { | ||
| 572 | mtspr(SPRN_MMCRA, | ||
| 573 | cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
| 574 | mb(); | ||
| 575 | } | ||
| 576 | |||
| 577 | /* | ||
| 578 | * Set the 'freeze counters' bit. | ||
| 579 | * The barrier is to make sure the mtspr has been | ||
| 580 | * executed and the PMU has frozen the events | ||
| 581 | * before we return. | ||
| 582 | */ | ||
| 583 | write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); | ||
| 584 | mb(); | ||
| 585 | } | ||
| 586 | local_irq_restore(flags); | ||
| 587 | } | ||
| 588 | |||
| 589 | /* | ||
| 590 | * Re-enable all events if disable == 0. | ||
| 591 | * If we were previously disabled and events were added, then | ||
| 592 | * put the new config on the PMU. | ||
| 593 | */ | ||
| 594 | static void power_pmu_enable(struct pmu *pmu) | ||
| 595 | { | ||
| 596 | struct perf_event *event; | ||
| 597 | struct cpu_hw_events *cpuhw; | ||
| 598 | unsigned long flags; | ||
| 599 | long i; | ||
| 600 | unsigned long val; | ||
| 601 | s64 left; | ||
| 602 | unsigned int hwc_index[MAX_HWEVENTS]; | ||
| 603 | int n_lim; | ||
| 604 | int idx; | ||
| 605 | |||
| 606 | if (!ppmu) | ||
| 607 | return; | ||
| 608 | local_irq_save(flags); | ||
| 609 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
| 610 | if (!cpuhw->disabled) { | ||
| 611 | local_irq_restore(flags); | ||
| 612 | return; | ||
| 613 | } | ||
| 614 | cpuhw->disabled = 0; | ||
| 615 | |||
| 616 | /* | ||
| 617 | * If we didn't change anything, or only removed events, | ||
| 618 | * no need to recalculate MMCR* settings and reset the PMCs. | ||
| 619 | * Just reenable the PMU with the current MMCR* settings | ||
| 620 | * (possibly updated for removal of events). | ||
| 621 | */ | ||
| 622 | if (!cpuhw->n_added) { | ||
| 623 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
| 624 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); | ||
| 625 | if (cpuhw->n_events == 0) | ||
| 626 | ppc_set_pmu_inuse(0); | ||
| 627 | goto out_enable; | ||
| 628 | } | ||
| 629 | |||
| 630 | /* | ||
| 631 | * Compute MMCR* values for the new set of events | ||
| 632 | */ | ||
| 633 | if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, | ||
| 634 | cpuhw->mmcr)) { | ||
| 635 | /* shouldn't ever get here */ | ||
| 636 | printk(KERN_ERR "oops compute_mmcr failed\n"); | ||
| 637 | goto out; | ||
| 638 | } | ||
| 639 | |||
| 640 | /* | ||
| 641 | * Add in MMCR0 freeze bits corresponding to the | ||
| 642 | * attr.exclude_* bits for the first event. | ||
| 643 | * We have already checked that all events have the | ||
| 644 | * same values for these bits as the first event. | ||
| 645 | */ | ||
| 646 | event = cpuhw->event[0]; | ||
| 647 | if (event->attr.exclude_user) | ||
| 648 | cpuhw->mmcr[0] |= MMCR0_FCP; | ||
| 649 | if (event->attr.exclude_kernel) | ||
| 650 | cpuhw->mmcr[0] |= freeze_events_kernel; | ||
| 651 | if (event->attr.exclude_hv) | ||
| 652 | cpuhw->mmcr[0] |= MMCR0_FCHV; | ||
| 653 | |||
| 654 | /* | ||
| 655 | * Write the new configuration to MMCR* with the freeze | ||
| 656 | * bit set and set the hardware events to their initial values. | ||
| 657 | * Then unfreeze the events. | ||
| 658 | */ | ||
| 659 | ppc_set_pmu_inuse(1); | ||
| 660 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
| 661 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); | ||
| 662 | mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) | ||
| 663 | | MMCR0_FC); | ||
| 664 | |||
| 665 | /* | ||
| 666 | * Read off any pre-existing events that need to move | ||
| 667 | * to another PMC. | ||
| 668 | */ | ||
| 669 | for (i = 0; i < cpuhw->n_events; ++i) { | ||
| 670 | event = cpuhw->event[i]; | ||
| 671 | if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) { | ||
| 672 | power_pmu_read(event); | ||
| 673 | write_pmc(event->hw.idx, 0); | ||
| 674 | event->hw.idx = 0; | ||
| 675 | } | ||
| 676 | } | ||
| 677 | |||
| 678 | /* | ||
| 679 | * Initialize the PMCs for all the new and moved events. | ||
| 680 | */ | ||
| 681 | cpuhw->n_limited = n_lim = 0; | ||
| 682 | for (i = 0; i < cpuhw->n_events; ++i) { | ||
| 683 | event = cpuhw->event[i]; | ||
| 684 | if (event->hw.idx) | ||
| 685 | continue; | ||
| 686 | idx = hwc_index[i] + 1; | ||
| 687 | if (is_limited_pmc(idx)) { | ||
| 688 | cpuhw->limited_counter[n_lim] = event; | ||
| 689 | cpuhw->limited_hwidx[n_lim] = idx; | ||
| 690 | ++n_lim; | ||
| 691 | continue; | ||
| 692 | } | ||
| 693 | val = 0; | ||
| 694 | if (event->hw.sample_period) { | ||
| 695 | left = local64_read(&event->hw.period_left); | ||
| 696 | if (left < 0x80000000L) | ||
| 697 | val = 0x80000000L - left; | ||
| 698 | } | ||
| 699 | local64_set(&event->hw.prev_count, val); | ||
| 700 | event->hw.idx = idx; | ||
| 701 | if (event->hw.state & PERF_HES_STOPPED) | ||
| 702 | val = 0; | ||
| 703 | write_pmc(idx, val); | ||
| 704 | perf_event_update_userpage(event); | ||
| 705 | } | ||
| 706 | cpuhw->n_limited = n_lim; | ||
| 707 | cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; | ||
| 708 | |||
| 709 | out_enable: | ||
| 710 | mb(); | ||
| 711 | write_mmcr0(cpuhw, cpuhw->mmcr[0]); | ||
| 712 | |||
| 713 | /* | ||
| 714 | * Enable instruction sampling if necessary | ||
| 715 | */ | ||
| 716 | if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { | ||
| 717 | mb(); | ||
| 718 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); | ||
| 719 | } | ||
| 720 | |||
| 721 | out: | ||
| 722 | local_irq_restore(flags); | ||
| 723 | } | ||
| 724 | |||
| 725 | static int collect_events(struct perf_event *group, int max_count, | ||
| 726 | struct perf_event *ctrs[], u64 *events, | ||
| 727 | unsigned int *flags) | ||
| 728 | { | ||
| 729 | int n = 0; | ||
| 730 | struct perf_event *event; | ||
| 731 | |||
| 732 | if (!is_software_event(group)) { | ||
| 733 | if (n >= max_count) | ||
| 734 | return -1; | ||
| 735 | ctrs[n] = group; | ||
| 736 | flags[n] = group->hw.event_base; | ||
| 737 | events[n++] = group->hw.config; | ||
| 738 | } | ||
| 739 | list_for_each_entry(event, &group->sibling_list, group_entry) { | ||
| 740 | if (!is_software_event(event) && | ||
| 741 | event->state != PERF_EVENT_STATE_OFF) { | ||
| 742 | if (n >= max_count) | ||
| 743 | return -1; | ||
| 744 | ctrs[n] = event; | ||
| 745 | flags[n] = event->hw.event_base; | ||
| 746 | events[n++] = event->hw.config; | ||
| 747 | } | ||
| 748 | } | ||
| 749 | return n; | ||
| 750 | } | ||
| 751 | |||
| 752 | /* | ||
| 753 | * Add a event to the PMU. | ||
| 754 | * If all events are not already frozen, then we disable and | ||
| 755 | * re-enable the PMU in order to get hw_perf_enable to do the | ||
| 756 | * actual work of reconfiguring the PMU. | ||
| 757 | */ | ||
| 758 | static int power_pmu_add(struct perf_event *event, int ef_flags) | ||
| 759 | { | ||
| 760 | struct cpu_hw_events *cpuhw; | ||
| 761 | unsigned long flags; | ||
| 762 | int n0; | ||
| 763 | int ret = -EAGAIN; | ||
| 764 | |||
| 765 | local_irq_save(flags); | ||
| 766 | perf_pmu_disable(event->pmu); | ||
| 767 | |||
| 768 | /* | ||
| 769 | * Add the event to the list (if there is room) | ||
| 770 | * and check whether the total set is still feasible. | ||
| 771 | */ | ||
| 772 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
| 773 | n0 = cpuhw->n_events; | ||
| 774 | if (n0 >= ppmu->n_counter) | ||
| 775 | goto out; | ||
| 776 | cpuhw->event[n0] = event; | ||
| 777 | cpuhw->events[n0] = event->hw.config; | ||
| 778 | cpuhw->flags[n0] = event->hw.event_base; | ||
| 779 | |||
| 780 | if (!(ef_flags & PERF_EF_START)) | ||
| 781 | event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
| 782 | |||
| 783 | /* | ||
| 784 | * If group events scheduling transaction was started, | ||
| 785 | * skip the schedulability test here, it will be performed | ||
| 786 | * at commit time(->commit_txn) as a whole | ||
| 787 | */ | ||
| 788 | if (cpuhw->group_flag & PERF_EVENT_TXN) | ||
| 789 | goto nocheck; | ||
| 790 | |||
| 791 | if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) | ||
| 792 | goto out; | ||
| 793 | if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) | ||
| 794 | goto out; | ||
| 795 | event->hw.config = cpuhw->events[n0]; | ||
| 796 | |||
| 797 | nocheck: | ||
| 798 | ++cpuhw->n_events; | ||
| 799 | ++cpuhw->n_added; | ||
| 800 | |||
| 801 | ret = 0; | ||
| 802 | out: | ||
| 803 | perf_pmu_enable(event->pmu); | ||
| 804 | local_irq_restore(flags); | ||
| 805 | return ret; | ||
| 806 | } | ||
| 807 | |||
| 808 | /* | ||
| 809 | * Remove a event from the PMU. | ||
| 810 | */ | ||
| 811 | static void power_pmu_del(struct perf_event *event, int ef_flags) | ||
| 812 | { | ||
| 813 | struct cpu_hw_events *cpuhw; | ||
| 814 | long i; | ||
| 815 | unsigned long flags; | ||
| 816 | |||
| 817 | local_irq_save(flags); | ||
| 818 | perf_pmu_disable(event->pmu); | ||
| 819 | |||
| 820 | power_pmu_read(event); | ||
| 821 | |||
| 822 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
| 823 | for (i = 0; i < cpuhw->n_events; ++i) { | ||
| 824 | if (event == cpuhw->event[i]) { | ||
| 825 | while (++i < cpuhw->n_events) { | ||
| 826 | cpuhw->event[i-1] = cpuhw->event[i]; | ||
| 827 | cpuhw->events[i-1] = cpuhw->events[i]; | ||
| 828 | cpuhw->flags[i-1] = cpuhw->flags[i]; | ||
| 829 | } | ||
| 830 | --cpuhw->n_events; | ||
| 831 | ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr); | ||
| 832 | if (event->hw.idx) { | ||
| 833 | write_pmc(event->hw.idx, 0); | ||
| 834 | event->hw.idx = 0; | ||
| 835 | } | ||
| 836 | perf_event_update_userpage(event); | ||
| 837 | break; | ||
| 838 | } | ||
| 839 | } | ||
| 840 | for (i = 0; i < cpuhw->n_limited; ++i) | ||
| 841 | if (event == cpuhw->limited_counter[i]) | ||
| 842 | break; | ||
| 843 | if (i < cpuhw->n_limited) { | ||
| 844 | while (++i < cpuhw->n_limited) { | ||
| 845 | cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i]; | ||
| 846 | cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; | ||
| 847 | } | ||
| 848 | --cpuhw->n_limited; | ||
| 849 | } | ||
| 850 | if (cpuhw->n_events == 0) { | ||
| 851 | /* disable exceptions if no events are running */ | ||
| 852 | cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); | ||
| 853 | } | ||
| 854 | |||
| 855 | perf_pmu_enable(event->pmu); | ||
| 856 | local_irq_restore(flags); | ||
| 857 | } | ||
| 858 | |||
| 859 | /* | ||
| 860 | * POWER-PMU does not support disabling individual counters, hence | ||
| 861 | * program their cycle counter to their max value and ignore the interrupts. | ||
| 862 | */ | ||
| 863 | |||
| 864 | static void power_pmu_start(struct perf_event *event, int ef_flags) | ||
| 865 | { | ||
| 866 | unsigned long flags; | ||
| 867 | s64 left; | ||
| 868 | |||
| 869 | if (!event->hw.idx || !event->hw.sample_period) | ||
| 870 | return; | ||
| 871 | |||
| 872 | if (!(event->hw.state & PERF_HES_STOPPED)) | ||
| 873 | return; | ||
| 874 | |||
| 875 | if (ef_flags & PERF_EF_RELOAD) | ||
| 876 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
| 877 | |||
| 878 | local_irq_save(flags); | ||
| 879 | perf_pmu_disable(event->pmu); | ||
| 880 | |||
| 881 | event->hw.state = 0; | ||
| 882 | left = local64_read(&event->hw.period_left); | ||
| 883 | write_pmc(event->hw.idx, left); | ||
| 884 | |||
| 885 | perf_event_update_userpage(event); | ||
| 886 | perf_pmu_enable(event->pmu); | ||
| 887 | local_irq_restore(flags); | ||
| 888 | } | ||
| 889 | |||
| 890 | static void power_pmu_stop(struct perf_event *event, int ef_flags) | ||
| 891 | { | ||
| 892 | unsigned long flags; | ||
| 893 | |||
| 894 | if (!event->hw.idx || !event->hw.sample_period) | ||
| 895 | return; | ||
| 896 | |||
| 897 | if (event->hw.state & PERF_HES_STOPPED) | ||
| 898 | return; | ||
| 899 | |||
| 900 | local_irq_save(flags); | ||
| 901 | perf_pmu_disable(event->pmu); | ||
| 902 | |||
| 903 | power_pmu_read(event); | ||
| 904 | event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
| 905 | write_pmc(event->hw.idx, 0); | ||
| 906 | |||
| 907 | perf_event_update_userpage(event); | ||
| 908 | perf_pmu_enable(event->pmu); | ||
| 909 | local_irq_restore(flags); | ||
| 910 | } | ||
| 911 | |||
| 912 | /* | ||
| 913 | * Start group events scheduling transaction | ||
| 914 | * Set the flag to make pmu::enable() not perform the | ||
| 915 | * schedulability test, it will be performed at commit time | ||
| 916 | */ | ||
| 917 | void power_pmu_start_txn(struct pmu *pmu) | ||
| 918 | { | ||
| 919 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
| 920 | |||
| 921 | perf_pmu_disable(pmu); | ||
| 922 | cpuhw->group_flag |= PERF_EVENT_TXN; | ||
| 923 | cpuhw->n_txn_start = cpuhw->n_events; | ||
| 924 | } | ||
| 925 | |||
| 926 | /* | ||
| 927 | * Stop group events scheduling transaction | ||
| 928 | * Clear the flag and pmu::enable() will perform the | ||
| 929 | * schedulability test. | ||
| 930 | */ | ||
| 931 | void power_pmu_cancel_txn(struct pmu *pmu) | ||
| 932 | { | ||
| 933 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
| 934 | |||
| 935 | cpuhw->group_flag &= ~PERF_EVENT_TXN; | ||
| 936 | perf_pmu_enable(pmu); | ||
| 937 | } | ||
| 938 | |||
| 939 | /* | ||
| 940 | * Commit group events scheduling transaction | ||
| 941 | * Perform the group schedulability test as a whole | ||
| 942 | * Return 0 if success | ||
| 943 | */ | ||
| 944 | int power_pmu_commit_txn(struct pmu *pmu) | ||
| 945 | { | ||
| 946 | struct cpu_hw_events *cpuhw; | ||
| 947 | long i, n; | ||
| 948 | |||
| 949 | if (!ppmu) | ||
| 950 | return -EAGAIN; | ||
| 951 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
| 952 | n = cpuhw->n_events; | ||
| 953 | if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) | ||
| 954 | return -EAGAIN; | ||
| 955 | i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n); | ||
| 956 | if (i < 0) | ||
| 957 | return -EAGAIN; | ||
| 958 | |||
| 959 | for (i = cpuhw->n_txn_start; i < n; ++i) | ||
| 960 | cpuhw->event[i]->hw.config = cpuhw->events[i]; | ||
| 961 | |||
| 962 | cpuhw->group_flag &= ~PERF_EVENT_TXN; | ||
| 963 | perf_pmu_enable(pmu); | ||
| 964 | return 0; | ||
| 965 | } | ||
| 966 | |||
| 967 | /* | ||
| 968 | * Return 1 if we might be able to put event on a limited PMC, | ||
| 969 | * or 0 if not. | ||
| 970 | * A event can only go on a limited PMC if it counts something | ||
| 971 | * that a limited PMC can count, doesn't require interrupts, and | ||
| 972 | * doesn't exclude any processor mode. | ||
| 973 | */ | ||
| 974 | static int can_go_on_limited_pmc(struct perf_event *event, u64 ev, | ||
| 975 | unsigned int flags) | ||
| 976 | { | ||
| 977 | int n; | ||
| 978 | u64 alt[MAX_EVENT_ALTERNATIVES]; | ||
| 979 | |||
| 980 | if (event->attr.exclude_user | ||
| 981 | || event->attr.exclude_kernel | ||
| 982 | || event->attr.exclude_hv | ||
| 983 | || event->attr.sample_period) | ||
| 984 | return 0; | ||
| 985 | |||
| 986 | if (ppmu->limited_pmc_event(ev)) | ||
| 987 | return 1; | ||
| 988 | |||
| 989 | /* | ||
| 990 | * The requested event_id isn't on a limited PMC already; | ||
| 991 | * see if any alternative code goes on a limited PMC. | ||
| 992 | */ | ||
| 993 | if (!ppmu->get_alternatives) | ||
| 994 | return 0; | ||
| 995 | |||
| 996 | flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; | ||
| 997 | n = ppmu->get_alternatives(ev, flags, alt); | ||
| 998 | |||
| 999 | return n > 0; | ||
| 1000 | } | ||
| 1001 | |||
| 1002 | /* | ||
| 1003 | * Find an alternative event_id that goes on a normal PMC, if possible, | ||
| 1004 | * and return the event_id code, or 0 if there is no such alternative. | ||
| 1005 | * (Note: event_id code 0 is "don't count" on all machines.) | ||
| 1006 | */ | ||
| 1007 | static u64 normal_pmc_alternative(u64 ev, unsigned long flags) | ||
| 1008 | { | ||
| 1009 | u64 alt[MAX_EVENT_ALTERNATIVES]; | ||
| 1010 | int n; | ||
| 1011 | |||
| 1012 | flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); | ||
| 1013 | n = ppmu->get_alternatives(ev, flags, alt); | ||
| 1014 | if (!n) | ||
| 1015 | return 0; | ||
| 1016 | return alt[0]; | ||
| 1017 | } | ||
| 1018 | |||
| 1019 | /* Number of perf_events counting hardware events */ | ||
| 1020 | static atomic_t num_events; | ||
| 1021 | /* Used to avoid races in calling reserve/release_pmc_hardware */ | ||
| 1022 | static DEFINE_MUTEX(pmc_reserve_mutex); | ||
| 1023 | |||
| 1024 | /* | ||
| 1025 | * Release the PMU if this is the last perf_event. | ||
| 1026 | */ | ||
| 1027 | static void hw_perf_event_destroy(struct perf_event *event) | ||
| 1028 | { | ||
| 1029 | if (!atomic_add_unless(&num_events, -1, 1)) { | ||
| 1030 | mutex_lock(&pmc_reserve_mutex); | ||
| 1031 | if (atomic_dec_return(&num_events) == 0) | ||
| 1032 | release_pmc_hardware(); | ||
| 1033 | mutex_unlock(&pmc_reserve_mutex); | ||
| 1034 | } | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | /* | ||
| 1038 | * Translate a generic cache event_id config to a raw event_id code. | ||
| 1039 | */ | ||
| 1040 | static int hw_perf_cache_event(u64 config, u64 *eventp) | ||
| 1041 | { | ||
| 1042 | unsigned long type, op, result; | ||
| 1043 | int ev; | ||
| 1044 | |||
| 1045 | if (!ppmu->cache_events) | ||
| 1046 | return -EINVAL; | ||
| 1047 | |||
| 1048 | /* unpack config */ | ||
| 1049 | type = config & 0xff; | ||
| 1050 | op = (config >> 8) & 0xff; | ||
| 1051 | result = (config >> 16) & 0xff; | ||
| 1052 | |||
| 1053 | if (type >= PERF_COUNT_HW_CACHE_MAX || | ||
| 1054 | op >= PERF_COUNT_HW_CACHE_OP_MAX || | ||
| 1055 | result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
| 1056 | return -EINVAL; | ||
| 1057 | |||
| 1058 | ev = (*ppmu->cache_events)[type][op][result]; | ||
| 1059 | if (ev == 0) | ||
| 1060 | return -EOPNOTSUPP; | ||
| 1061 | if (ev == -1) | ||
| 1062 | return -EINVAL; | ||
| 1063 | *eventp = ev; | ||
| 1064 | return 0; | ||
| 1065 | } | ||
| 1066 | |||
| 1067 | static int power_pmu_event_init(struct perf_event *event) | ||
| 1068 | { | ||
| 1069 | u64 ev; | ||
| 1070 | unsigned long flags; | ||
| 1071 | struct perf_event *ctrs[MAX_HWEVENTS]; | ||
| 1072 | u64 events[MAX_HWEVENTS]; | ||
| 1073 | unsigned int cflags[MAX_HWEVENTS]; | ||
| 1074 | int n; | ||
| 1075 | int err; | ||
| 1076 | struct cpu_hw_events *cpuhw; | ||
| 1077 | |||
| 1078 | if (!ppmu) | ||
| 1079 | return -ENOENT; | ||
| 1080 | |||
| 1081 | switch (event->attr.type) { | ||
| 1082 | case PERF_TYPE_HARDWARE: | ||
| 1083 | ev = event->attr.config; | ||
| 1084 | if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) | ||
| 1085 | return -EOPNOTSUPP; | ||
| 1086 | ev = ppmu->generic_events[ev]; | ||
| 1087 | break; | ||
| 1088 | case PERF_TYPE_HW_CACHE: | ||
| 1089 | err = hw_perf_cache_event(event->attr.config, &ev); | ||
| 1090 | if (err) | ||
| 1091 | return err; | ||
| 1092 | break; | ||
| 1093 | case PERF_TYPE_RAW: | ||
| 1094 | ev = event->attr.config; | ||
| 1095 | break; | ||
| 1096 | default: | ||
| 1097 | return -ENOENT; | ||
| 1098 | } | ||
| 1099 | |||
| 1100 | event->hw.config_base = ev; | ||
| 1101 | event->hw.idx = 0; | ||
| 1102 | |||
| 1103 | /* | ||
| 1104 | * If we are not running on a hypervisor, force the | ||
| 1105 | * exclude_hv bit to 0 so that we don't care what | ||
| 1106 | * the user set it to. | ||
| 1107 | */ | ||
| 1108 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | ||
| 1109 | event->attr.exclude_hv = 0; | ||
| 1110 | |||
| 1111 | /* | ||
| 1112 | * If this is a per-task event, then we can use | ||
| 1113 | * PM_RUN_* events interchangeably with their non RUN_* | ||
| 1114 | * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. | ||
| 1115 | * XXX we should check if the task is an idle task. | ||
| 1116 | */ | ||
| 1117 | flags = 0; | ||
| 1118 | if (event->attach_state & PERF_ATTACH_TASK) | ||
| 1119 | flags |= PPMU_ONLY_COUNT_RUN; | ||
| 1120 | |||
| 1121 | /* | ||
| 1122 | * If this machine has limited events, check whether this | ||
| 1123 | * event_id could go on a limited event. | ||
| 1124 | */ | ||
| 1125 | if (ppmu->flags & PPMU_LIMITED_PMC5_6) { | ||
| 1126 | if (can_go_on_limited_pmc(event, ev, flags)) { | ||
| 1127 | flags |= PPMU_LIMITED_PMC_OK; | ||
| 1128 | } else if (ppmu->limited_pmc_event(ev)) { | ||
| 1129 | /* | ||
| 1130 | * The requested event_id is on a limited PMC, | ||
| 1131 | * but we can't use a limited PMC; see if any | ||
| 1132 | * alternative goes on a normal PMC. | ||
| 1133 | */ | ||
| 1134 | ev = normal_pmc_alternative(ev, flags); | ||
| 1135 | if (!ev) | ||
| 1136 | return -EINVAL; | ||
| 1137 | } | ||
| 1138 | } | ||
| 1139 | |||
| 1140 | /* | ||
| 1141 | * If this is in a group, check if it can go on with all the | ||
| 1142 | * other hardware events in the group. We assume the event | ||
| 1143 | * hasn't been linked into its leader's sibling list at this point. | ||
| 1144 | */ | ||
| 1145 | n = 0; | ||
| 1146 | if (event->group_leader != event) { | ||
| 1147 | n = collect_events(event->group_leader, ppmu->n_counter - 1, | ||
| 1148 | ctrs, events, cflags); | ||
| 1149 | if (n < 0) | ||
| 1150 | return -EINVAL; | ||
| 1151 | } | ||
| 1152 | events[n] = ev; | ||
| 1153 | ctrs[n] = event; | ||
| 1154 | cflags[n] = flags; | ||
| 1155 | if (check_excludes(ctrs, cflags, n, 1)) | ||
| 1156 | return -EINVAL; | ||
| 1157 | |||
| 1158 | cpuhw = &get_cpu_var(cpu_hw_events); | ||
| 1159 | err = power_check_constraints(cpuhw, events, cflags, n + 1); | ||
| 1160 | put_cpu_var(cpu_hw_events); | ||
| 1161 | if (err) | ||
| 1162 | return -EINVAL; | ||
| 1163 | |||
| 1164 | event->hw.config = events[n]; | ||
| 1165 | event->hw.event_base = cflags[n]; | ||
| 1166 | event->hw.last_period = event->hw.sample_period; | ||
| 1167 | local64_set(&event->hw.period_left, event->hw.last_period); | ||
| 1168 | |||
| 1169 | /* | ||
| 1170 | * See if we need to reserve the PMU. | ||
| 1171 | * If no events are currently in use, then we have to take a | ||
| 1172 | * mutex to ensure that we don't race with another task doing | ||
| 1173 | * reserve_pmc_hardware or release_pmc_hardware. | ||
| 1174 | */ | ||
| 1175 | err = 0; | ||
| 1176 | if (!atomic_inc_not_zero(&num_events)) { | ||
| 1177 | mutex_lock(&pmc_reserve_mutex); | ||
| 1178 | if (atomic_read(&num_events) == 0 && | ||
| 1179 | reserve_pmc_hardware(perf_event_interrupt)) | ||
| 1180 | err = -EBUSY; | ||
| 1181 | else | ||
| 1182 | atomic_inc(&num_events); | ||
| 1183 | mutex_unlock(&pmc_reserve_mutex); | ||
| 1184 | } | ||
| 1185 | event->destroy = hw_perf_event_destroy; | ||
| 1186 | |||
| 1187 | return err; | ||
| 1188 | } | ||
| 1189 | |||
| 1190 | struct pmu power_pmu = { | ||
| 1191 | .pmu_enable = power_pmu_enable, | ||
| 1192 | .pmu_disable = power_pmu_disable, | ||
| 1193 | .event_init = power_pmu_event_init, | ||
| 1194 | .add = power_pmu_add, | ||
| 1195 | .del = power_pmu_del, | ||
| 1196 | .start = power_pmu_start, | ||
| 1197 | .stop = power_pmu_stop, | ||
| 1198 | .read = power_pmu_read, | ||
| 1199 | .start_txn = power_pmu_start_txn, | ||
| 1200 | .cancel_txn = power_pmu_cancel_txn, | ||
| 1201 | .commit_txn = power_pmu_commit_txn, | ||
| 1202 | }; | ||
| 1203 | |||
| 1204 | /* | ||
| 1205 | * A counter has overflowed; update its count and record | ||
| 1206 | * things if requested. Note that interrupts are hard-disabled | ||
| 1207 | * here so there is no possibility of being interrupted. | ||
| 1208 | */ | ||
| 1209 | static void record_and_restart(struct perf_event *event, unsigned long val, | ||
| 1210 | struct pt_regs *regs) | ||
| 1211 | { | ||
| 1212 | u64 period = event->hw.sample_period; | ||
| 1213 | s64 prev, delta, left; | ||
| 1214 | int record = 0; | ||
| 1215 | |||
| 1216 | if (event->hw.state & PERF_HES_STOPPED) { | ||
| 1217 | write_pmc(event->hw.idx, 0); | ||
| 1218 | return; | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | /* we don't have to worry about interrupts here */ | ||
| 1222 | prev = local64_read(&event->hw.prev_count); | ||
| 1223 | delta = check_and_compute_delta(prev, val); | ||
| 1224 | local64_add(delta, &event->count); | ||
| 1225 | |||
| 1226 | /* | ||
| 1227 | * See if the total period for this event has expired, | ||
| 1228 | * and update for the next period. | ||
| 1229 | */ | ||
| 1230 | val = 0; | ||
| 1231 | left = local64_read(&event->hw.period_left) - delta; | ||
| 1232 | if (period) { | ||
| 1233 | if (left <= 0) { | ||
| 1234 | left += period; | ||
| 1235 | if (left <= 0) | ||
| 1236 | left = period; | ||
| 1237 | record = 1; | ||
| 1238 | event->hw.last_period = event->hw.sample_period; | ||
| 1239 | } | ||
| 1240 | if (left < 0x80000000LL) | ||
| 1241 | val = 0x80000000LL - left; | ||
| 1242 | } | ||
| 1243 | |||
| 1244 | write_pmc(event->hw.idx, val); | ||
| 1245 | local64_set(&event->hw.prev_count, val); | ||
| 1246 | local64_set(&event->hw.period_left, left); | ||
| 1247 | perf_event_update_userpage(event); | ||
| 1248 | |||
| 1249 | /* | ||
| 1250 | * Finally record data if requested. | ||
| 1251 | */ | ||
| 1252 | if (record) { | ||
| 1253 | struct perf_sample_data data; | ||
| 1254 | |||
| 1255 | perf_sample_data_init(&data, ~0ULL); | ||
| 1256 | data.period = event->hw.last_period; | ||
| 1257 | |||
| 1258 | if (event->attr.sample_type & PERF_SAMPLE_ADDR) | ||
| 1259 | perf_get_data_addr(regs, &data.addr); | ||
| 1260 | |||
| 1261 | if (perf_event_overflow(event, &data, regs)) | ||
| 1262 | power_pmu_stop(event, 0); | ||
| 1263 | } | ||
| 1264 | } | ||
| 1265 | |||
| 1266 | /* | ||
| 1267 | * Called from generic code to get the misc flags (i.e. processor mode) | ||
| 1268 | * for an event_id. | ||
| 1269 | */ | ||
| 1270 | unsigned long perf_misc_flags(struct pt_regs *regs) | ||
| 1271 | { | ||
| 1272 | u32 flags = perf_get_misc_flags(regs); | ||
| 1273 | |||
| 1274 | if (flags) | ||
| 1275 | return flags; | ||
| 1276 | return user_mode(regs) ? PERF_RECORD_MISC_USER : | ||
| 1277 | PERF_RECORD_MISC_KERNEL; | ||
| 1278 | } | ||
| 1279 | |||
| 1280 | /* | ||
| 1281 | * Called from generic code to get the instruction pointer | ||
| 1282 | * for an event_id. | ||
| 1283 | */ | ||
| 1284 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
| 1285 | { | ||
| 1286 | unsigned long ip; | ||
| 1287 | |||
| 1288 | if (TRAP(regs) != 0xf00) | ||
| 1289 | return regs->nip; /* not a PMU interrupt */ | ||
| 1290 | |||
| 1291 | ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs); | ||
| 1292 | return ip; | ||
| 1293 | } | ||
| 1294 | |||
| 1295 | static bool pmc_overflow(unsigned long val) | ||
| 1296 | { | ||
| 1297 | if ((int)val < 0) | ||
| 1298 | return true; | ||
| 1299 | |||
| 1300 | /* | ||
| 1301 | * Events on POWER7 can roll back if a speculative event doesn't | ||
| 1302 | * eventually complete. Unfortunately in some rare cases they will | ||
| 1303 | * raise a performance monitor exception. We need to catch this to | ||
| 1304 | * ensure we reset the PMC. In all cases the PMC will be 256 or less | ||
| 1305 | * cycles from overflow. | ||
| 1306 | * | ||
| 1307 | * We only do this if the first pass fails to find any overflowing | ||
| 1308 | * PMCs because a user might set a period of less than 256 and we | ||
| 1309 | * don't want to mistakenly reset them. | ||
| 1310 | */ | ||
| 1311 | if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256)) | ||
| 1312 | return true; | ||
| 1313 | |||
| 1314 | return false; | ||
| 1315 | } | ||
| 1316 | |||
| 1317 | /* | ||
| 1318 | * Performance monitor interrupt stuff | ||
| 1319 | */ | ||
| 1320 | static void perf_event_interrupt(struct pt_regs *regs) | ||
| 1321 | { | ||
| 1322 | int i; | ||
| 1323 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
| 1324 | struct perf_event *event; | ||
| 1325 | unsigned long val; | ||
| 1326 | int found = 0; | ||
| 1327 | int nmi; | ||
| 1328 | |||
| 1329 | if (cpuhw->n_limited) | ||
| 1330 | freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), | ||
| 1331 | mfspr(SPRN_PMC6)); | ||
| 1332 | |||
| 1333 | perf_read_regs(regs); | ||
| 1334 | |||
| 1335 | nmi = perf_intr_is_nmi(regs); | ||
| 1336 | if (nmi) | ||
| 1337 | nmi_enter(); | ||
| 1338 | else | ||
| 1339 | irq_enter(); | ||
| 1340 | |||
| 1341 | for (i = 0; i < cpuhw->n_events; ++i) { | ||
| 1342 | event = cpuhw->event[i]; | ||
| 1343 | if (!event->hw.idx || is_limited_pmc(event->hw.idx)) | ||
| 1344 | continue; | ||
| 1345 | val = read_pmc(event->hw.idx); | ||
| 1346 | if ((int)val < 0) { | ||
| 1347 | /* event has overflowed */ | ||
| 1348 | found = 1; | ||
| 1349 | record_and_restart(event, val, regs); | ||
| 1350 | } | ||
| 1351 | } | ||
| 1352 | |||
| 1353 | /* | ||
| 1354 | * In case we didn't find and reset the event that caused | ||
| 1355 | * the interrupt, scan all events and reset any that are | ||
| 1356 | * negative, to avoid getting continual interrupts. | ||
| 1357 | * Any that we processed in the previous loop will not be negative. | ||
| 1358 | */ | ||
| 1359 | if (!found) { | ||
| 1360 | for (i = 0; i < ppmu->n_counter; ++i) { | ||
| 1361 | if (is_limited_pmc(i + 1)) | ||
| 1362 | continue; | ||
| 1363 | val = read_pmc(i + 1); | ||
| 1364 | if (pmc_overflow(val)) | ||
| 1365 | write_pmc(i + 1, 0); | ||
| 1366 | } | ||
| 1367 | } | ||
| 1368 | |||
| 1369 | /* | ||
| 1370 | * Reset MMCR0 to its normal value. This will set PMXE and | ||
| 1371 | * clear FC (freeze counters) and PMAO (perf mon alert occurred) | ||
| 1372 | * and thus allow interrupts to occur again. | ||
| 1373 | * XXX might want to use MSR.PM to keep the events frozen until | ||
| 1374 | * we get back out of this interrupt. | ||
| 1375 | */ | ||
| 1376 | write_mmcr0(cpuhw, cpuhw->mmcr[0]); | ||
| 1377 | |||
| 1378 | if (nmi) | ||
| 1379 | nmi_exit(); | ||
| 1380 | else | ||
| 1381 | irq_exit(); | ||
| 1382 | } | ||
| 1383 | |||
| 1384 | static void power_pmu_setup(int cpu) | ||
| 1385 | { | ||
| 1386 | struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); | ||
| 1387 | |||
| 1388 | if (!ppmu) | ||
| 1389 | return; | ||
| 1390 | memset(cpuhw, 0, sizeof(*cpuhw)); | ||
| 1391 | cpuhw->mmcr[0] = MMCR0_FC; | ||
| 1392 | } | ||
| 1393 | |||
| 1394 | static int __cpuinit | ||
| 1395 | power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | ||
| 1396 | { | ||
| 1397 | unsigned int cpu = (long)hcpu; | ||
| 1398 | |||
| 1399 | switch (action & ~CPU_TASKS_FROZEN) { | ||
| 1400 | case CPU_UP_PREPARE: | ||
| 1401 | power_pmu_setup(cpu); | ||
| 1402 | break; | ||
| 1403 | |||
| 1404 | default: | ||
| 1405 | break; | ||
| 1406 | } | ||
| 1407 | |||
| 1408 | return NOTIFY_OK; | ||
| 1409 | } | ||
| 1410 | |||
| 1411 | int __cpuinit register_power_pmu(struct power_pmu *pmu) | ||
| 1412 | { | ||
| 1413 | if (ppmu) | ||
| 1414 | return -EBUSY; /* something's already registered */ | ||
| 1415 | |||
| 1416 | ppmu = pmu; | ||
| 1417 | pr_info("%s performance monitor hardware support registered\n", | ||
| 1418 | pmu->name); | ||
| 1419 | |||
| 1420 | #ifdef MSR_HV | ||
| 1421 | /* | ||
| 1422 | * Use FCHV to ignore kernel events if MSR.HV is set. | ||
| 1423 | */ | ||
| 1424 | if (mfmsr() & MSR_HV) | ||
| 1425 | freeze_events_kernel = MMCR0_FCHV; | ||
| 1426 | #endif /* CONFIG_PPC64 */ | ||
| 1427 | |||
| 1428 | perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW); | ||
| 1429 | perf_cpu_notifier(power_pmu_notifier); | ||
| 1430 | |||
| 1431 | return 0; | ||
| 1432 | } | ||
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c new file mode 100644 index 00000000000..0a6d2a9d569 --- /dev/null +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c | |||
| @@ -0,0 +1,688 @@ | |||
| 1 | /* | ||
| 2 | * Performance event support - Freescale Embedded Performance Monitor | ||
| 3 | * | ||
| 4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
| 5 | * Copyright 2010 Freescale Semiconductor, Inc. | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License | ||
| 9 | * as published by the Free Software Foundation; either version | ||
| 10 | * 2 of the License, or (at your option) any later version. | ||
| 11 | */ | ||
| 12 | #include <linux/kernel.h> | ||
| 13 | #include <linux/sched.h> | ||
| 14 | #include <linux/perf_event.h> | ||
| 15 | #include <linux/percpu.h> | ||
| 16 | #include <linux/hardirq.h> | ||
| 17 | #include <asm/reg_fsl_emb.h> | ||
| 18 | #include <asm/pmc.h> | ||
| 19 | #include <asm/machdep.h> | ||
| 20 | #include <asm/firmware.h> | ||
| 21 | #include <asm/ptrace.h> | ||
| 22 | |||
| 23 | struct cpu_hw_events { | ||
| 24 | int n_events; | ||
| 25 | int disabled; | ||
| 26 | u8 pmcs_enabled; | ||
| 27 | struct perf_event *event[MAX_HWEVENTS]; | ||
| 28 | }; | ||
| 29 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | ||
| 30 | |||
| 31 | static struct fsl_emb_pmu *ppmu; | ||
| 32 | |||
| 33 | /* Number of perf_events counting hardware events */ | ||
| 34 | static atomic_t num_events; | ||
| 35 | /* Used to avoid races in calling reserve/release_pmc_hardware */ | ||
| 36 | static DEFINE_MUTEX(pmc_reserve_mutex); | ||
| 37 | |||
| 38 | /* | ||
| 39 | * If interrupts were soft-disabled when a PMU interrupt occurs, treat | ||
| 40 | * it as an NMI. | ||
| 41 | */ | ||
| 42 | static inline int perf_intr_is_nmi(struct pt_regs *regs) | ||
| 43 | { | ||
| 44 | #ifdef __powerpc64__ | ||
| 45 | return !regs->softe; | ||
| 46 | #else | ||
| 47 | return 0; | ||
| 48 | #endif | ||
| 49 | } | ||
| 50 | |||
| 51 | static void perf_event_interrupt(struct pt_regs *regs); | ||
| 52 | |||
| 53 | /* | ||
| 54 | * Read one performance monitor counter (PMC). | ||
| 55 | */ | ||
| 56 | static unsigned long read_pmc(int idx) | ||
| 57 | { | ||
| 58 | unsigned long val; | ||
| 59 | |||
| 60 | switch (idx) { | ||
| 61 | case 0: | ||
| 62 | val = mfpmr(PMRN_PMC0); | ||
| 63 | break; | ||
| 64 | case 1: | ||
| 65 | val = mfpmr(PMRN_PMC1); | ||
| 66 | break; | ||
| 67 | case 2: | ||
| 68 | val = mfpmr(PMRN_PMC2); | ||
| 69 | break; | ||
| 70 | case 3: | ||
| 71 | val = mfpmr(PMRN_PMC3); | ||
| 72 | break; | ||
| 73 | default: | ||
| 74 | printk(KERN_ERR "oops trying to read PMC%d\n", idx); | ||
| 75 | val = 0; | ||
| 76 | } | ||
| 77 | return val; | ||
| 78 | } | ||
| 79 | |||
| 80 | /* | ||
| 81 | * Write one PMC. | ||
| 82 | */ | ||
| 83 | static void write_pmc(int idx, unsigned long val) | ||
| 84 | { | ||
| 85 | switch (idx) { | ||
| 86 | case 0: | ||
| 87 | mtpmr(PMRN_PMC0, val); | ||
| 88 | break; | ||
| 89 | case 1: | ||
| 90 | mtpmr(PMRN_PMC1, val); | ||
| 91 | break; | ||
| 92 | case 2: | ||
| 93 | mtpmr(PMRN_PMC2, val); | ||
| 94 | break; | ||
| 95 | case 3: | ||
| 96 | mtpmr(PMRN_PMC3, val); | ||
| 97 | break; | ||
| 98 | default: | ||
| 99 | printk(KERN_ERR "oops trying to write PMC%d\n", idx); | ||
| 100 | } | ||
| 101 | |||
| 102 | isync(); | ||
| 103 | } | ||
| 104 | |||
| 105 | /* | ||
| 106 | * Write one local control A register | ||
| 107 | */ | ||
| 108 | static void write_pmlca(int idx, unsigned long val) | ||
| 109 | { | ||
| 110 | switch (idx) { | ||
| 111 | case 0: | ||
| 112 | mtpmr(PMRN_PMLCA0, val); | ||
| 113 | break; | ||
| 114 | case 1: | ||
| 115 | mtpmr(PMRN_PMLCA1, val); | ||
| 116 | break; | ||
| 117 | case 2: | ||
| 118 | mtpmr(PMRN_PMLCA2, val); | ||
| 119 | break; | ||
| 120 | case 3: | ||
| 121 | mtpmr(PMRN_PMLCA3, val); | ||
| 122 | break; | ||
| 123 | default: | ||
| 124 | printk(KERN_ERR "oops trying to write PMLCA%d\n", idx); | ||
| 125 | } | ||
| 126 | |||
| 127 | isync(); | ||
| 128 | } | ||
| 129 | |||
| 130 | /* | ||
| 131 | * Write one local control B register | ||
| 132 | */ | ||
| 133 | static void write_pmlcb(int idx, unsigned long val) | ||
| 134 | { | ||
| 135 | switch (idx) { | ||
| 136 | case 0: | ||
| 137 | mtpmr(PMRN_PMLCB0, val); | ||
| 138 | break; | ||
| 139 | case 1: | ||
| 140 | mtpmr(PMRN_PMLCB1, val); | ||
| 141 | break; | ||
| 142 | case 2: | ||
| 143 | mtpmr(PMRN_PMLCB2, val); | ||
| 144 | break; | ||
| 145 | case 3: | ||
| 146 | mtpmr(PMRN_PMLCB3, val); | ||
| 147 | break; | ||
| 148 | default: | ||
| 149 | printk(KERN_ERR "oops trying to write PMLCB%d\n", idx); | ||
| 150 | } | ||
| 151 | |||
| 152 | isync(); | ||
| 153 | } | ||
| 154 | |||
| 155 | static void fsl_emb_pmu_read(struct perf_event *event) | ||
| 156 | { | ||
| 157 | s64 val, delta, prev; | ||
| 158 | |||
| 159 | if (event->hw.state & PERF_HES_STOPPED) | ||
| 160 | return; | ||
| 161 | |||
| 162 | /* | ||
| 163 | * Performance monitor interrupts come even when interrupts | ||
| 164 | * are soft-disabled, as long as interrupts are hard-enabled. | ||
| 165 | * Therefore we treat them like NMIs. | ||
| 166 | */ | ||
| 167 | do { | ||
| 168 | prev = local64_read(&event->hw.prev_count); | ||
| 169 | barrier(); | ||
| 170 | val = read_pmc(event->hw.idx); | ||
| 171 | } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); | ||
| 172 | |||
| 173 | /* The counters are only 32 bits wide */ | ||
| 174 | delta = (val - prev) & 0xfffffffful; | ||
| 175 | local64_add(delta, &event->count); | ||
| 176 | local64_sub(delta, &event->hw.period_left); | ||
| 177 | } | ||
| 178 | |||
| 179 | /* | ||
| 180 | * Disable all events to prevent PMU interrupts and to allow | ||
| 181 | * events to be added or removed. | ||
| 182 | */ | ||
| 183 | static void fsl_emb_pmu_disable(struct pmu *pmu) | ||
| 184 | { | ||
| 185 | struct cpu_hw_events *cpuhw; | ||
| 186 | unsigned long flags; | ||
| 187 | |||
| 188 | local_irq_save(flags); | ||
| 189 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
| 190 | |||
| 191 | if (!cpuhw->disabled) { | ||
| 192 | cpuhw->disabled = 1; | ||
| 193 | |||
| 194 | /* | ||
| 195 | * Check if we ever enabled the PMU on this cpu. | ||
| 196 | */ | ||
| 197 | if (!cpuhw->pmcs_enabled) { | ||
| 198 | ppc_enable_pmcs(); | ||
| 199 | cpuhw->pmcs_enabled = 1; | ||
| 200 | } | ||
| 201 | |||
| 202 | if (atomic_read(&num_events)) { | ||
| 203 | /* | ||
| 204 | * Set the 'freeze all counters' bit, and disable | ||
| 205 | * interrupts. The barrier is to make sure the | ||
| 206 | * mtpmr has been executed and the PMU has frozen | ||
| 207 | * the events before we return. | ||
| 208 | */ | ||
| 209 | |||
| 210 | mtpmr(PMRN_PMGC0, PMGC0_FAC); | ||
| 211 | isync(); | ||
| 212 | } | ||
| 213 | } | ||
| 214 | local_irq_restore(flags); | ||
| 215 | } | ||
| 216 | |||
| 217 | /* | ||
| 218 | * Re-enable all events if disable == 0. | ||
| 219 | * If we were previously disabled and events were added, then | ||
| 220 | * put the new config on the PMU. | ||
| 221 | */ | ||
| 222 | static void fsl_emb_pmu_enable(struct pmu *pmu) | ||
| 223 | { | ||
| 224 | struct cpu_hw_events *cpuhw; | ||
| 225 | unsigned long flags; | ||
| 226 | |||
| 227 | local_irq_save(flags); | ||
| 228 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
| 229 | if (!cpuhw->disabled) | ||
| 230 | goto out; | ||
| 231 | |||
| 232 | cpuhw->disabled = 0; | ||
| 233 | ppc_set_pmu_inuse(cpuhw->n_events != 0); | ||
| 234 | |||
| 235 | if (cpuhw->n_events > 0) { | ||
| 236 | mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); | ||
| 237 | isync(); | ||
| 238 | } | ||
| 239 | |||
| 240 | out: | ||
| 241 | local_irq_restore(flags); | ||
| 242 | } | ||
| 243 | |||
| 244 | static int collect_events(struct perf_event *group, int max_count, | ||
| 245 | struct perf_event *ctrs[]) | ||
| 246 | { | ||
| 247 | int n = 0; | ||
| 248 | struct perf_event *event; | ||
| 249 | |||
| 250 | if (!is_software_event(group)) { | ||
| 251 | if (n >= max_count) | ||
| 252 | return -1; | ||
| 253 | ctrs[n] = group; | ||
| 254 | n++; | ||
| 255 | } | ||
| 256 | list_for_each_entry(event, &group->sibling_list, group_entry) { | ||
| 257 | if (!is_software_event(event) && | ||
| 258 | event->state != PERF_EVENT_STATE_OFF) { | ||
| 259 | if (n >= max_count) | ||
| 260 | return -1; | ||
| 261 | ctrs[n] = event; | ||
| 262 | n++; | ||
| 263 | } | ||
| 264 | } | ||
| 265 | return n; | ||
| 266 | } | ||
| 267 | |||
| 268 | /* context locked on entry */ | ||
| 269 | static int fsl_emb_pmu_add(struct perf_event *event, int flags) | ||
| 270 | { | ||
| 271 | struct cpu_hw_events *cpuhw; | ||
| 272 | int ret = -EAGAIN; | ||
| 273 | int num_counters = ppmu->n_counter; | ||
| 274 | u64 val; | ||
| 275 | int i; | ||
| 276 | |||
| 277 | perf_pmu_disable(event->pmu); | ||
| 278 | cpuhw = &get_cpu_var(cpu_hw_events); | ||
| 279 | |||
| 280 | if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) | ||
| 281 | num_counters = ppmu->n_restricted; | ||
| 282 | |||
| 283 | /* | ||
| 284 | * Allocate counters from top-down, so that restricted-capable | ||
| 285 | * counters are kept free as long as possible. | ||
| 286 | */ | ||
| 287 | for (i = num_counters - 1; i >= 0; i--) { | ||
| 288 | if (cpuhw->event[i]) | ||
| 289 | continue; | ||
| 290 | |||
| 291 | break; | ||
| 292 | } | ||
| 293 | |||
| 294 | if (i < 0) | ||
| 295 | goto out; | ||
| 296 | |||
| 297 | event->hw.idx = i; | ||
| 298 | cpuhw->event[i] = event; | ||
| 299 | ++cpuhw->n_events; | ||
| 300 | |||
| 301 | val = 0; | ||
| 302 | if (event->hw.sample_period) { | ||
| 303 | s64 left = local64_read(&event->hw.period_left); | ||
| 304 | if (left < 0x80000000L) | ||
| 305 | val = 0x80000000L - left; | ||
| 306 | } | ||
| 307 | local64_set(&event->hw.prev_count, val); | ||
| 308 | |||
| 309 | if (!(flags & PERF_EF_START)) { | ||
| 310 | event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
| 311 | val = 0; | ||
| 312 | } | ||
| 313 | |||
| 314 | write_pmc(i, val); | ||
| 315 | perf_event_update_userpage(event); | ||
| 316 | |||
| 317 | write_pmlcb(i, event->hw.config >> 32); | ||
| 318 | write_pmlca(i, event->hw.config_base); | ||
| 319 | |||
| 320 | ret = 0; | ||
| 321 | out: | ||
| 322 | put_cpu_var(cpu_hw_events); | ||
| 323 | perf_pmu_enable(event->pmu); | ||
| 324 | return ret; | ||
| 325 | } | ||
| 326 | |||
| 327 | /* context locked on entry */ | ||
| 328 | static void fsl_emb_pmu_del(struct perf_event *event, int flags) | ||
| 329 | { | ||
| 330 | struct cpu_hw_events *cpuhw; | ||
| 331 | int i = event->hw.idx; | ||
| 332 | |||
| 333 | perf_pmu_disable(event->pmu); | ||
| 334 | if (i < 0) | ||
| 335 | goto out; | ||
| 336 | |||
| 337 | fsl_emb_pmu_read(event); | ||
| 338 | |||
| 339 | cpuhw = &get_cpu_var(cpu_hw_events); | ||
| 340 | |||
| 341 | WARN_ON(event != cpuhw->event[event->hw.idx]); | ||
| 342 | |||
| 343 | write_pmlca(i, 0); | ||
| 344 | write_pmlcb(i, 0); | ||
| 345 | write_pmc(i, 0); | ||
| 346 | |||
| 347 | cpuhw->event[i] = NULL; | ||
| 348 | event->hw.idx = -1; | ||
| 349 | |||
| 350 | /* | ||
| 351 | * TODO: if at least one restricted event exists, and we | ||
| 352 | * just freed up a non-restricted-capable counter, and | ||
| 353 | * there is a restricted-capable counter occupied by | ||
| 354 | * a non-restricted event, migrate that event to the | ||
| 355 | * vacated counter. | ||
| 356 | */ | ||
| 357 | |||
| 358 | cpuhw->n_events--; | ||
| 359 | |||
| 360 | out: | ||
| 361 | perf_pmu_enable(event->pmu); | ||
| 362 | put_cpu_var(cpu_hw_events); | ||
| 363 | } | ||
| 364 | |||
| 365 | static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags) | ||
| 366 | { | ||
| 367 | unsigned long flags; | ||
| 368 | s64 left; | ||
| 369 | |||
| 370 | if (event->hw.idx < 0 || !event->hw.sample_period) | ||
| 371 | return; | ||
| 372 | |||
| 373 | if (!(event->hw.state & PERF_HES_STOPPED)) | ||
| 374 | return; | ||
| 375 | |||
| 376 | if (ef_flags & PERF_EF_RELOAD) | ||
| 377 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
| 378 | |||
| 379 | local_irq_save(flags); | ||
| 380 | perf_pmu_disable(event->pmu); | ||
| 381 | |||
| 382 | event->hw.state = 0; | ||
| 383 | left = local64_read(&event->hw.period_left); | ||
| 384 | write_pmc(event->hw.idx, left); | ||
| 385 | |||
| 386 | perf_event_update_userpage(event); | ||
| 387 | perf_pmu_enable(event->pmu); | ||
| 388 | local_irq_restore(flags); | ||
| 389 | } | ||
| 390 | |||
| 391 | static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags) | ||
| 392 | { | ||
| 393 | unsigned long flags; | ||
| 394 | |||
| 395 | if (event->hw.idx < 0 || !event->hw.sample_period) | ||
| 396 | return; | ||
| 397 | |||
| 398 | if (event->hw.state & PERF_HES_STOPPED) | ||
| 399 | return; | ||
| 400 | |||
| 401 | local_irq_save(flags); | ||
| 402 | perf_pmu_disable(event->pmu); | ||
| 403 | |||
| 404 | fsl_emb_pmu_read(event); | ||
| 405 | event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
| 406 | write_pmc(event->hw.idx, 0); | ||
| 407 | |||
| 408 | perf_event_update_userpage(event); | ||
| 409 | perf_pmu_enable(event->pmu); | ||
| 410 | local_irq_restore(flags); | ||
| 411 | } | ||
| 412 | |||
| 413 | /* | ||
| 414 | * Release the PMU if this is the last perf_event. | ||
| 415 | */ | ||
| 416 | static void hw_perf_event_destroy(struct perf_event *event) | ||
| 417 | { | ||
| 418 | if (!atomic_add_unless(&num_events, -1, 1)) { | ||
| 419 | mutex_lock(&pmc_reserve_mutex); | ||
| 420 | if (atomic_dec_return(&num_events) == 0) | ||
| 421 | release_pmc_hardware(); | ||
| 422 | mutex_unlock(&pmc_reserve_mutex); | ||
| 423 | } | ||
| 424 | } | ||
| 425 | |||
| 426 | /* | ||
| 427 | * Translate a generic cache event_id config to a raw event_id code. | ||
| 428 | */ | ||
| 429 | static int hw_perf_cache_event(u64 config, u64 *eventp) | ||
| 430 | { | ||
| 431 | unsigned long type, op, result; | ||
| 432 | int ev; | ||
| 433 | |||
| 434 | if (!ppmu->cache_events) | ||
| 435 | return -EINVAL; | ||
| 436 | |||
| 437 | /* unpack config */ | ||
| 438 | type = config & 0xff; | ||
| 439 | op = (config >> 8) & 0xff; | ||
| 440 | result = (config >> 16) & 0xff; | ||
| 441 | |||
| 442 | if (type >= PERF_COUNT_HW_CACHE_MAX || | ||
| 443 | op >= PERF_COUNT_HW_CACHE_OP_MAX || | ||
| 444 | result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
| 445 | return -EINVAL; | ||
| 446 | |||
| 447 | ev = (*ppmu->cache_events)[type][op][result]; | ||
| 448 | if (ev == 0) | ||
| 449 | return -EOPNOTSUPP; | ||
| 450 | if (ev == -1) | ||
| 451 | return -EINVAL; | ||
| 452 | *eventp = ev; | ||
| 453 | return 0; | ||
| 454 | } | ||
| 455 | |||
| 456 | static int fsl_emb_pmu_event_init(struct perf_event *event) | ||
| 457 | { | ||
| 458 | u64 ev; | ||
| 459 | struct perf_event *events[MAX_HWEVENTS]; | ||
| 460 | int n; | ||
| 461 | int err; | ||
| 462 | int num_restricted; | ||
| 463 | int i; | ||
| 464 | |||
| 465 | switch (event->attr.type) { | ||
| 466 | case PERF_TYPE_HARDWARE: | ||
| 467 | ev = event->attr.config; | ||
| 468 | if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) | ||
| 469 | return -EOPNOTSUPP; | ||
| 470 | ev = ppmu->generic_events[ev]; | ||
| 471 | break; | ||
| 472 | |||
| 473 | case PERF_TYPE_HW_CACHE: | ||
| 474 | err = hw_perf_cache_event(event->attr.config, &ev); | ||
| 475 | if (err) | ||
| 476 | return err; | ||
| 477 | break; | ||
| 478 | |||
| 479 | case PERF_TYPE_RAW: | ||
| 480 | ev = event->attr.config; | ||
| 481 | break; | ||
| 482 | |||
| 483 | default: | ||
| 484 | return -ENOENT; | ||
| 485 | } | ||
| 486 | |||
| 487 | event->hw.config = ppmu->xlate_event(ev); | ||
| 488 | if (!(event->hw.config & FSL_EMB_EVENT_VALID)) | ||
| 489 | return -EINVAL; | ||
| 490 | |||
| 491 | /* | ||
| 492 | * If this is in a group, check if it can go on with all the | ||
| 493 | * other hardware events in the group. We assume the event | ||
| 494 | * hasn't been linked into its leader's sibling list at this point. | ||
| 495 | */ | ||
| 496 | n = 0; | ||
| 497 | if (event->group_leader != event) { | ||
| 498 | n = collect_events(event->group_leader, | ||
| 499 | ppmu->n_counter - 1, events); | ||
| 500 | if (n < 0) | ||
| 501 | return -EINVAL; | ||
| 502 | } | ||
| 503 | |||
| 504 | if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { | ||
| 505 | num_restricted = 0; | ||
| 506 | for (i = 0; i < n; i++) { | ||
| 507 | if (events[i]->hw.config & FSL_EMB_EVENT_RESTRICTED) | ||
| 508 | num_restricted++; | ||
| 509 | } | ||
| 510 | |||
| 511 | if (num_restricted >= ppmu->n_restricted) | ||
| 512 | return -EINVAL; | ||
| 513 | } | ||
| 514 | |||
| 515 | event->hw.idx = -1; | ||
| 516 | |||
| 517 | event->hw.config_base = PMLCA_CE | PMLCA_FCM1 | | ||
| 518 | (u32)((ev << 16) & PMLCA_EVENT_MASK); | ||
| 519 | |||
| 520 | if (event->attr.exclude_user) | ||
| 521 | event->hw.config_base |= PMLCA_FCU; | ||
| 522 | if (event->attr.exclude_kernel) | ||
| 523 | event->hw.config_base |= PMLCA_FCS; | ||
| 524 | if (event->attr.exclude_idle) | ||
| 525 | return -ENOTSUPP; | ||
| 526 | |||
| 527 | event->hw.last_period = event->hw.sample_period; | ||
| 528 | local64_set(&event->hw.period_left, event->hw.last_period); | ||
| 529 | |||
| 530 | /* | ||
| 531 | * See if we need to reserve the PMU. | ||
| 532 | * If no events are currently in use, then we have to take a | ||
| 533 | * mutex to ensure that we don't race with another task doing | ||
| 534 | * reserve_pmc_hardware or release_pmc_hardware. | ||
| 535 | */ | ||
| 536 | err = 0; | ||
| 537 | if (!atomic_inc_not_zero(&num_events)) { | ||
| 538 | mutex_lock(&pmc_reserve_mutex); | ||
| 539 | if (atomic_read(&num_events) == 0 && | ||
| 540 | reserve_pmc_hardware(perf_event_interrupt)) | ||
| 541 | err = -EBUSY; | ||
| 542 | else | ||
| 543 | atomic_inc(&num_events); | ||
| 544 | mutex_unlock(&pmc_reserve_mutex); | ||
| 545 | |||
| 546 | mtpmr(PMRN_PMGC0, PMGC0_FAC); | ||
| 547 | isync(); | ||
| 548 | } | ||
| 549 | event->destroy = hw_perf_event_destroy; | ||
| 550 | |||
| 551 | return err; | ||
| 552 | } | ||
| 553 | |||
| 554 | static struct pmu fsl_emb_pmu = { | ||
| 555 | .pmu_enable = fsl_emb_pmu_enable, | ||
| 556 | .pmu_disable = fsl_emb_pmu_disable, | ||
| 557 | .event_init = fsl_emb_pmu_event_init, | ||
| 558 | .add = fsl_emb_pmu_add, | ||
| 559 | .del = fsl_emb_pmu_del, | ||
| 560 | .start = fsl_emb_pmu_start, | ||
| 561 | .stop = fsl_emb_pmu_stop, | ||
| 562 | .read = fsl_emb_pmu_read, | ||
| 563 | }; | ||
| 564 | |||
| 565 | /* | ||
| 566 | * A counter has overflowed; update its count and record | ||
| 567 | * things if requested. Note that interrupts are hard-disabled | ||
| 568 | * here so there is no possibility of being interrupted. | ||
| 569 | */ | ||
| 570 | static void record_and_restart(struct perf_event *event, unsigned long val, | ||
| 571 | struct pt_regs *regs) | ||
| 572 | { | ||
| 573 | u64 period = event->hw.sample_period; | ||
| 574 | s64 prev, delta, left; | ||
| 575 | int record = 0; | ||
| 576 | |||
| 577 | if (event->hw.state & PERF_HES_STOPPED) { | ||
| 578 | write_pmc(event->hw.idx, 0); | ||
| 579 | return; | ||
| 580 | } | ||
| 581 | |||
| 582 | /* we don't have to worry about interrupts here */ | ||
| 583 | prev = local64_read(&event->hw.prev_count); | ||
| 584 | delta = (val - prev) & 0xfffffffful; | ||
| 585 | local64_add(delta, &event->count); | ||
| 586 | |||
| 587 | /* | ||
| 588 | * See if the total period for this event has expired, | ||
| 589 | * and update for the next period. | ||
| 590 | */ | ||
| 591 | val = 0; | ||
| 592 | left = local64_read(&event->hw.period_left) - delta; | ||
| 593 | if (period) { | ||
| 594 | if (left <= 0) { | ||
| 595 | left += period; | ||
| 596 | if (left <= 0) | ||
| 597 | left = period; | ||
| 598 | record = 1; | ||
| 599 | event->hw.last_period = event->hw.sample_period; | ||
| 600 | } | ||
| 601 | if (left < 0x80000000LL) | ||
| 602 | val = 0x80000000LL - left; | ||
| 603 | } | ||
| 604 | |||
| 605 | write_pmc(event->hw.idx, val); | ||
| 606 | local64_set(&event->hw.prev_count, val); | ||
| 607 | local64_set(&event->hw.period_left, left); | ||
| 608 | perf_event_update_userpage(event); | ||
| 609 | |||
| 610 | /* | ||
| 611 | * Finally record data if requested. | ||
| 612 | */ | ||
| 613 | if (record) { | ||
| 614 | struct perf_sample_data data; | ||
| 615 | |||
| 616 | perf_sample_data_init(&data, 0); | ||
| 617 | data.period = event->hw.last_period; | ||
| 618 | |||
| 619 | if (perf_event_overflow(event, &data, regs)) | ||
| 620 | fsl_emb_pmu_stop(event, 0); | ||
| 621 | } | ||
| 622 | } | ||
| 623 | |||
| 624 | static void perf_event_interrupt(struct pt_regs *regs) | ||
| 625 | { | ||
| 626 | int i; | ||
| 627 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
| 628 | struct perf_event *event; | ||
| 629 | unsigned long val; | ||
| 630 | int found = 0; | ||
| 631 | int nmi; | ||
| 632 | |||
| 633 | nmi = perf_intr_is_nmi(regs); | ||
| 634 | if (nmi) | ||
| 635 | nmi_enter(); | ||
| 636 | else | ||
| 637 | irq_enter(); | ||
| 638 | |||
| 639 | for (i = 0; i < ppmu->n_counter; ++i) { | ||
| 640 | event = cpuhw->event[i]; | ||
| 641 | |||
| 642 | val = read_pmc(i); | ||
| 643 | if ((int)val < 0) { | ||
| 644 | if (event) { | ||
| 645 | /* event has overflowed */ | ||
| 646 | found = 1; | ||
| 647 | record_and_restart(event, val, regs); | ||
| 648 | } else { | ||
| 649 | /* | ||
| 650 | * Disabled counter is negative, | ||
| 651 | * reset it just in case. | ||
| 652 | */ | ||
| 653 | write_pmc(i, 0); | ||
| 654 | } | ||
| 655 | } | ||
| 656 | } | ||
| 657 | |||
| 658 | /* PMM will keep counters frozen until we return from the interrupt. */ | ||
| 659 | mtmsr(mfmsr() | MSR_PMM); | ||
| 660 | mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); | ||
| 661 | isync(); | ||
| 662 | |||
| 663 | if (nmi) | ||
| 664 | nmi_exit(); | ||
| 665 | else | ||
| 666 | irq_exit(); | ||
| 667 | } | ||
| 668 | |||
| 669 | void hw_perf_event_setup(int cpu) | ||
| 670 | { | ||
| 671 | struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); | ||
| 672 | |||
| 673 | memset(cpuhw, 0, sizeof(*cpuhw)); | ||
| 674 | } | ||
| 675 | |||
| 676 | int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) | ||
| 677 | { | ||
| 678 | if (ppmu) | ||
| 679 | return -EBUSY; /* something's already registered */ | ||
| 680 | |||
| 681 | ppmu = pmu; | ||
| 682 | pr_info("%s performance monitor hardware support registered\n", | ||
| 683 | pmu->name); | ||
| 684 | |||
| 685 | perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW); | ||
| 686 | |||
| 687 | return 0; | ||
| 688 | } | ||
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c new file mode 100644 index 00000000000..b4f1dda4d08 --- /dev/null +++ b/arch/powerpc/kernel/power4-pmu.c | |||
| @@ -0,0 +1,621 @@ | |||
| 1 | /* | ||
| 2 | * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors. | ||
| 3 | * | ||
| 4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public License | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the License, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | #include <linux/kernel.h> | ||
| 12 | #include <linux/perf_event.h> | ||
| 13 | #include <linux/string.h> | ||
| 14 | #include <asm/reg.h> | ||
| 15 | #include <asm/cputable.h> | ||
| 16 | |||
| 17 | /* | ||
| 18 | * Bits in event code for POWER4 | ||
| 19 | */ | ||
| 20 | #define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ | ||
| 21 | #define PM_PMC_MSK 0xf | ||
| 22 | #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ | ||
| 23 | #define PM_UNIT_MSK 0xf | ||
| 24 | #define PM_LOWER_SH 6 | ||
| 25 | #define PM_LOWER_MSK 1 | ||
| 26 | #define PM_LOWER_MSKS 0x40 | ||
| 27 | #define PM_BYTE_SH 4 /* Byte number of event bus to use */ | ||
| 28 | #define PM_BYTE_MSK 3 | ||
| 29 | #define PM_PMCSEL_MSK 7 | ||
| 30 | |||
| 31 | /* | ||
| 32 | * Unit code values | ||
| 33 | */ | ||
| 34 | #define PM_FPU 1 | ||
| 35 | #define PM_ISU1 2 | ||
| 36 | #define PM_IFU 3 | ||
| 37 | #define PM_IDU0 4 | ||
| 38 | #define PM_ISU1_ALT 6 | ||
| 39 | #define PM_ISU2 7 | ||
| 40 | #define PM_IFU_ALT 8 | ||
| 41 | #define PM_LSU0 9 | ||
| 42 | #define PM_LSU1 0xc | ||
| 43 | #define PM_GPS 0xf | ||
| 44 | |||
| 45 | /* | ||
| 46 | * Bits in MMCR0 for POWER4 | ||
| 47 | */ | ||
| 48 | #define MMCR0_PMC1SEL_SH 8 | ||
| 49 | #define MMCR0_PMC2SEL_SH 1 | ||
| 50 | #define MMCR_PMCSEL_MSK 0x1f | ||
| 51 | |||
| 52 | /* | ||
| 53 | * Bits in MMCR1 for POWER4 | ||
| 54 | */ | ||
| 55 | #define MMCR1_TTM0SEL_SH 62 | ||
| 56 | #define MMCR1_TTC0SEL_SH 61 | ||
| 57 | #define MMCR1_TTM1SEL_SH 59 | ||
| 58 | #define MMCR1_TTC1SEL_SH 58 | ||
| 59 | #define MMCR1_TTM2SEL_SH 56 | ||
| 60 | #define MMCR1_TTC2SEL_SH 55 | ||
| 61 | #define MMCR1_TTM3SEL_SH 53 | ||
| 62 | #define MMCR1_TTC3SEL_SH 52 | ||
| 63 | #define MMCR1_TTMSEL_MSK 3 | ||
| 64 | #define MMCR1_TD_CP_DBG0SEL_SH 50 | ||
| 65 | #define MMCR1_TD_CP_DBG1SEL_SH 48 | ||
| 66 | #define MMCR1_TD_CP_DBG2SEL_SH 46 | ||
| 67 | #define MMCR1_TD_CP_DBG3SEL_SH 44 | ||
| 68 | #define MMCR1_DEBUG0SEL_SH 43 | ||
| 69 | #define MMCR1_DEBUG1SEL_SH 42 | ||
| 70 | #define MMCR1_DEBUG2SEL_SH 41 | ||
| 71 | #define MMCR1_DEBUG3SEL_SH 40 | ||
| 72 | #define MMCR1_PMC1_ADDER_SEL_SH 39 | ||
| 73 | #define MMCR1_PMC2_ADDER_SEL_SH 38 | ||
| 74 | #define MMCR1_PMC6_ADDER_SEL_SH 37 | ||
| 75 | #define MMCR1_PMC5_ADDER_SEL_SH 36 | ||
| 76 | #define MMCR1_PMC8_ADDER_SEL_SH 35 | ||
| 77 | #define MMCR1_PMC7_ADDER_SEL_SH 34 | ||
| 78 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
| 79 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
| 80 | #define MMCR1_PMC3SEL_SH 27 | ||
| 81 | #define MMCR1_PMC4SEL_SH 22 | ||
| 82 | #define MMCR1_PMC5SEL_SH 17 | ||
| 83 | #define MMCR1_PMC6SEL_SH 12 | ||
| 84 | #define MMCR1_PMC7SEL_SH 7 | ||
| 85 | #define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */ | ||
| 86 | |||
| 87 | static short mmcr1_adder_bits[8] = { | ||
| 88 | MMCR1_PMC1_ADDER_SEL_SH, | ||
| 89 | MMCR1_PMC2_ADDER_SEL_SH, | ||
| 90 | MMCR1_PMC3_ADDER_SEL_SH, | ||
| 91 | MMCR1_PMC4_ADDER_SEL_SH, | ||
| 92 | MMCR1_PMC5_ADDER_SEL_SH, | ||
| 93 | MMCR1_PMC6_ADDER_SEL_SH, | ||
| 94 | MMCR1_PMC7_ADDER_SEL_SH, | ||
| 95 | MMCR1_PMC8_ADDER_SEL_SH | ||
| 96 | }; | ||
| 97 | |||
| 98 | /* | ||
| 99 | * Bits in MMCRA | ||
| 100 | */ | ||
| 101 | #define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */ | ||
| 102 | |||
| 103 | /* | ||
| 104 | * Layout of constraint bits: | ||
| 105 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
| 106 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
| 107 | * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><> | ||
| 108 | * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 | ||
| 109 | * \SMPL ||\TTC3SEL | ||
| 110 | * |\TTC_IFU_SEL | ||
| 111 | * \TTM2SEL0 | ||
| 112 | * | ||
| 113 | * SMPL - SAMPLE_ENABLE constraint | ||
| 114 | * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000 | ||
| 115 | * | ||
| 116 | * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2 | ||
| 117 | * 55: UC1 error 0x0080_0000_0000_0000 | ||
| 118 | * 54: FPU events needed 0x0040_0000_0000_0000 | ||
| 119 | * 53: ISU1 events needed 0x0020_0000_0000_0000 | ||
| 120 | * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000 | ||
| 121 | * | ||
| 122 | * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0 | ||
| 123 | * 51: UC2 error 0x0008_0000_0000_0000 | ||
| 124 | * 50: FPU events needed 0x0004_0000_0000_0000 | ||
| 125 | * 49: IFU events needed 0x0002_0000_0000_0000 | ||
| 126 | * 48: LSU0 events needed 0x0001_0000_0000_0000 | ||
| 127 | * | ||
| 128 | * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1 | ||
| 129 | * 47: UC3 error 0x8000_0000_0000 | ||
| 130 | * 46: LSU0 events needed 0x4000_0000_0000 | ||
| 131 | * 45: IFU events needed 0x2000_0000_0000 | ||
| 132 | * 44: IDU0|ISU2 events needed 0x1000_0000_0000 | ||
| 133 | * 43: ISU1 events needed 0x0800_0000_0000 | ||
| 134 | * | ||
| 135 | * TTM2SEL0 | ||
| 136 | * 42: 0 = IDU0 events needed | ||
| 137 | * 1 = ISU2 events needed 0x0400_0000_0000 | ||
| 138 | * | ||
| 139 | * TTC_IFU_SEL | ||
| 140 | * 41: 0 = IFU.U events needed | ||
| 141 | * 1 = IFU.L events needed 0x0200_0000_0000 | ||
| 142 | * | ||
| 143 | * TTC3SEL | ||
| 144 | * 40: 0 = LSU1.U events needed | ||
| 145 | * 1 = LSU1.L events needed 0x0100_0000_0000 | ||
| 146 | * | ||
| 147 | * PS1 | ||
| 148 | * 39: PS1 error 0x0080_0000_0000 | ||
| 149 | * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 | ||
| 150 | * | ||
| 151 | * PS2 | ||
| 152 | * 35: PS2 error 0x0008_0000_0000 | ||
| 153 | * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 | ||
| 154 | * | ||
| 155 | * B0 | ||
| 156 | * 28-31: Byte 0 event source 0xf000_0000 | ||
| 157 | * 1 = FPU | ||
| 158 | * 2 = ISU1 | ||
| 159 | * 3 = IFU | ||
| 160 | * 4 = IDU0 | ||
| 161 | * 7 = ISU2 | ||
| 162 | * 9 = LSU0 | ||
| 163 | * c = LSU1 | ||
| 164 | * f = GPS | ||
| 165 | * | ||
| 166 | * B1, B2, B3 | ||
| 167 | * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources | ||
| 168 | * | ||
| 169 | * P8 | ||
| 170 | * 15: P8 error 0x8000 | ||
| 171 | * 14-15: Count of events needing PMC8 | ||
| 172 | * | ||
| 173 | * P1..P7 | ||
| 174 | * 0-13: Count of events needing PMC1..PMC7 | ||
| 175 | * | ||
| 176 | * Note: this doesn't allow events using IFU.U to be combined with events | ||
| 177 | * using IFU.L, though that is feasible (using TTM0 and TTM2). However | ||
| 178 | * there are no listed events for IFU.L (they are debug events not | ||
| 179 | * verified for performance monitoring) so this shouldn't cause a | ||
| 180 | * problem. | ||
| 181 | */ | ||
| 182 | |||
| 183 | static struct unitinfo { | ||
| 184 | unsigned long value, mask; | ||
| 185 | int unit; | ||
| 186 | int lowerbit; | ||
| 187 | } p4_unitinfo[16] = { | ||
| 188 | [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 }, | ||
| 189 | [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 }, | ||
| 190 | [PM_ISU1_ALT] = | ||
| 191 | { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 }, | ||
| 192 | [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 }, | ||
| 193 | [PM_IFU_ALT] = | ||
| 194 | { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 }, | ||
| 195 | [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 }, | ||
| 196 | [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 }, | ||
| 197 | [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 }, | ||
| 198 | [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 }, | ||
| 199 | [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 } | ||
| 200 | }; | ||
| 201 | |||
| 202 | static unsigned char direct_marked_event[8] = { | ||
| 203 | (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ | ||
| 204 | (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ | ||
| 205 | (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */ | ||
| 206 | (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ | ||
| 207 | (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */ | ||
| 208 | (1<<3) | (1<<4) | (1<<5), | ||
| 209 | /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ | ||
| 210 | (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ | ||
| 211 | (1<<4), /* PMC8: PM_MRK_LSU_FIN */ | ||
| 212 | }; | ||
| 213 | |||
| 214 | /* | ||
| 215 | * Returns 1 if event counts things relating to marked instructions | ||
| 216 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
| 217 | */ | ||
| 218 | static int p4_marked_instr_event(u64 event) | ||
| 219 | { | ||
| 220 | int pmc, psel, unit, byte, bit; | ||
| 221 | unsigned int mask; | ||
| 222 | |||
| 223 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 224 | psel = event & PM_PMCSEL_MSK; | ||
| 225 | if (pmc) { | ||
| 226 | if (direct_marked_event[pmc - 1] & (1 << psel)) | ||
| 227 | return 1; | ||
| 228 | if (psel == 0) /* add events */ | ||
| 229 | bit = (pmc <= 4)? pmc - 1: 8 - pmc; | ||
| 230 | else if (psel == 6) /* decode events */ | ||
| 231 | bit = 4; | ||
| 232 | else | ||
| 233 | return 0; | ||
| 234 | } else | ||
| 235 | bit = psel; | ||
| 236 | |||
| 237 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 238 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 239 | mask = 0; | ||
| 240 | switch (unit) { | ||
| 241 | case PM_LSU1: | ||
| 242 | if (event & PM_LOWER_MSKS) | ||
| 243 | mask = 1 << 28; /* byte 7 bit 4 */ | ||
| 244 | else | ||
| 245 | mask = 6 << 24; /* byte 3 bits 1 and 2 */ | ||
| 246 | break; | ||
| 247 | case PM_LSU0: | ||
| 248 | /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */ | ||
| 249 | mask = 0x083dff00; | ||
| 250 | } | ||
| 251 | return (mask >> (byte * 8 + bit)) & 1; | ||
| 252 | } | ||
| 253 | |||
| 254 | static int p4_get_constraint(u64 event, unsigned long *maskp, | ||
| 255 | unsigned long *valp) | ||
| 256 | { | ||
| 257 | int pmc, byte, unit, lower, sh; | ||
| 258 | unsigned long mask = 0, value = 0; | ||
| 259 | int grp = -1; | ||
| 260 | |||
| 261 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 262 | if (pmc) { | ||
| 263 | if (pmc > 8) | ||
| 264 | return -1; | ||
| 265 | sh = (pmc - 1) * 2; | ||
| 266 | mask |= 2 << sh; | ||
| 267 | value |= 1 << sh; | ||
| 268 | grp = ((pmc - 1) >> 1) & 1; | ||
| 269 | } | ||
| 270 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 271 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 272 | if (unit) { | ||
| 273 | lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK; | ||
| 274 | |||
| 275 | /* | ||
| 276 | * Bus events on bytes 0 and 2 can be counted | ||
| 277 | * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. | ||
| 278 | */ | ||
| 279 | if (!pmc) | ||
| 280 | grp = byte & 1; | ||
| 281 | |||
| 282 | if (!p4_unitinfo[unit].unit) | ||
| 283 | return -1; | ||
| 284 | mask |= p4_unitinfo[unit].mask; | ||
| 285 | value |= p4_unitinfo[unit].value; | ||
| 286 | sh = p4_unitinfo[unit].lowerbit; | ||
| 287 | if (sh > 1) | ||
| 288 | value |= (unsigned long)lower << sh; | ||
| 289 | else if (lower != sh) | ||
| 290 | return -1; | ||
| 291 | unit = p4_unitinfo[unit].unit; | ||
| 292 | |||
| 293 | /* Set byte lane select field */ | ||
| 294 | mask |= 0xfULL << (28 - 4 * byte); | ||
| 295 | value |= (unsigned long)unit << (28 - 4 * byte); | ||
| 296 | } | ||
| 297 | if (grp == 0) { | ||
| 298 | /* increment PMC1/2/5/6 field */ | ||
| 299 | mask |= 0x8000000000ull; | ||
| 300 | value |= 0x1000000000ull; | ||
| 301 | } else { | ||
| 302 | /* increment PMC3/4/7/8 field */ | ||
| 303 | mask |= 0x800000000ull; | ||
| 304 | value |= 0x100000000ull; | ||
| 305 | } | ||
| 306 | |||
| 307 | /* Marked instruction events need sample_enable set */ | ||
| 308 | if (p4_marked_instr_event(event)) { | ||
| 309 | mask |= 1ull << 56; | ||
| 310 | value |= 1ull << 56; | ||
| 311 | } | ||
| 312 | |||
| 313 | /* PMCSEL=6 decode events on byte 2 need sample_enable clear */ | ||
| 314 | if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2) | ||
| 315 | mask |= 1ull << 56; | ||
| 316 | |||
| 317 | *maskp = mask; | ||
| 318 | *valp = value; | ||
| 319 | return 0; | ||
| 320 | } | ||
| 321 | |||
| 322 | static unsigned int ppc_inst_cmpl[] = { | ||
| 323 | 0x1001, 0x4001, 0x6001, 0x7001, 0x8001 | ||
| 324 | }; | ||
| 325 | |||
| 326 | static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
| 327 | { | ||
| 328 | int i, j, na; | ||
| 329 | |||
| 330 | alt[0] = event; | ||
| 331 | na = 1; | ||
| 332 | |||
| 333 | /* 2 possibilities for PM_GRP_DISP_REJECT */ | ||
| 334 | if (event == 0x8003 || event == 0x0224) { | ||
| 335 | alt[1] = event ^ (0x8003 ^ 0x0224); | ||
| 336 | return 2; | ||
| 337 | } | ||
| 338 | |||
| 339 | /* 2 possibilities for PM_ST_MISS_L1 */ | ||
| 340 | if (event == 0x0c13 || event == 0x0c23) { | ||
| 341 | alt[1] = event ^ (0x0c13 ^ 0x0c23); | ||
| 342 | return 2; | ||
| 343 | } | ||
| 344 | |||
| 345 | /* several possibilities for PM_INST_CMPL */ | ||
| 346 | for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) { | ||
| 347 | if (event == ppc_inst_cmpl[i]) { | ||
| 348 | for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j) | ||
| 349 | if (j != i) | ||
| 350 | alt[na++] = ppc_inst_cmpl[j]; | ||
| 351 | break; | ||
| 352 | } | ||
| 353 | } | ||
| 354 | |||
| 355 | return na; | ||
| 356 | } | ||
| 357 | |||
| 358 | static int p4_compute_mmcr(u64 event[], int n_ev, | ||
| 359 | unsigned int hwc[], unsigned long mmcr[]) | ||
| 360 | { | ||
| 361 | unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; | ||
| 362 | unsigned int pmc, unit, byte, psel, lower; | ||
| 363 | unsigned int ttm, grp; | ||
| 364 | unsigned int pmc_inuse = 0; | ||
| 365 | unsigned int pmc_grp_use[2]; | ||
| 366 | unsigned char busbyte[4]; | ||
| 367 | unsigned char unituse[16]; | ||
| 368 | unsigned int unitlower = 0; | ||
| 369 | int i; | ||
| 370 | |||
| 371 | if (n_ev > 8) | ||
| 372 | return -1; | ||
| 373 | |||
| 374 | /* First pass to count resource use */ | ||
| 375 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
| 376 | memset(busbyte, 0, sizeof(busbyte)); | ||
| 377 | memset(unituse, 0, sizeof(unituse)); | ||
| 378 | for (i = 0; i < n_ev; ++i) { | ||
| 379 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 380 | if (pmc) { | ||
| 381 | if (pmc_inuse & (1 << (pmc - 1))) | ||
| 382 | return -1; | ||
| 383 | pmc_inuse |= 1 << (pmc - 1); | ||
| 384 | /* count 1/2/5/6 vs 3/4/7/8 use */ | ||
| 385 | ++pmc_grp_use[((pmc - 1) >> 1) & 1]; | ||
| 386 | } | ||
| 387 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 388 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 389 | lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK; | ||
| 390 | if (unit) { | ||
| 391 | if (!pmc) | ||
| 392 | ++pmc_grp_use[byte & 1]; | ||
| 393 | if (unit == 6 || unit == 8) | ||
| 394 | /* map alt ISU1/IFU codes: 6->2, 8->3 */ | ||
| 395 | unit = (unit >> 1) - 1; | ||
| 396 | if (busbyte[byte] && busbyte[byte] != unit) | ||
| 397 | return -1; | ||
| 398 | busbyte[byte] = unit; | ||
| 399 | lower <<= unit; | ||
| 400 | if (unituse[unit] && lower != (unitlower & lower)) | ||
| 401 | return -1; | ||
| 402 | unituse[unit] = 1; | ||
| 403 | unitlower |= lower; | ||
| 404 | } | ||
| 405 | } | ||
| 406 | if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) | ||
| 407 | return -1; | ||
| 408 | |||
| 409 | /* | ||
| 410 | * Assign resources and set multiplexer selects. | ||
| 411 | * | ||
| 412 | * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2. | ||
| 413 | * Each TTMx can only select one unit, but since | ||
| 414 | * units 2 and 6 are both ISU1, and 3 and 8 are both IFU, | ||
| 415 | * we have some choices. | ||
| 416 | */ | ||
| 417 | if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) { | ||
| 418 | unituse[6] = 1; /* Move 2 to 6 */ | ||
| 419 | unituse[2] = 0; | ||
| 420 | } | ||
| 421 | if (unituse[3] & (unituse[1] | unituse[2])) { | ||
| 422 | unituse[8] = 1; /* Move 3 to 8 */ | ||
| 423 | unituse[3] = 0; | ||
| 424 | unitlower = (unitlower & ~8) | ((unitlower & 8) << 5); | ||
| 425 | } | ||
| 426 | /* Check only one unit per TTMx */ | ||
| 427 | if (unituse[1] + unituse[2] + unituse[3] > 1 || | ||
| 428 | unituse[4] + unituse[6] + unituse[7] > 1 || | ||
| 429 | unituse[8] + unituse[9] > 1 || | ||
| 430 | (unituse[5] | unituse[10] | unituse[11] | | ||
| 431 | unituse[13] | unituse[14])) | ||
| 432 | return -1; | ||
| 433 | |||
| 434 | /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ | ||
| 435 | mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2]) | ||
| 436 | << MMCR1_TTM0SEL_SH; | ||
| 437 | mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2) | ||
| 438 | << MMCR1_TTM1SEL_SH; | ||
| 439 | mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH; | ||
| 440 | |||
| 441 | /* Set TTCxSEL fields. */ | ||
| 442 | if (unitlower & 0xe) | ||
| 443 | mmcr1 |= 1ull << MMCR1_TTC0SEL_SH; | ||
| 444 | if (unitlower & 0xf0) | ||
| 445 | mmcr1 |= 1ull << MMCR1_TTC1SEL_SH; | ||
| 446 | if (unitlower & 0xf00) | ||
| 447 | mmcr1 |= 1ull << MMCR1_TTC2SEL_SH; | ||
| 448 | if (unitlower & 0x7000) | ||
| 449 | mmcr1 |= 1ull << MMCR1_TTC3SEL_SH; | ||
| 450 | |||
| 451 | /* Set byte lane select fields. */ | ||
| 452 | for (byte = 0; byte < 4; ++byte) { | ||
| 453 | unit = busbyte[byte]; | ||
| 454 | if (!unit) | ||
| 455 | continue; | ||
| 456 | if (unit == 0xf) { | ||
| 457 | /* special case for GPS */ | ||
| 458 | mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte); | ||
| 459 | } else { | ||
| 460 | if (!unituse[unit]) | ||
| 461 | ttm = unit - 1; /* 2->1, 3->2 */ | ||
| 462 | else | ||
| 463 | ttm = unit >> 2; | ||
| 464 | mmcr1 |= (unsigned long)ttm | ||
| 465 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
| 466 | } | ||
| 467 | } | ||
| 468 | |||
| 469 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
| 470 | for (i = 0; i < n_ev; ++i) { | ||
| 471 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 472 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 473 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 474 | psel = event[i] & PM_PMCSEL_MSK; | ||
| 475 | if (!pmc) { | ||
| 476 | /* Bus event or 00xxx direct event (off or cycles) */ | ||
| 477 | if (unit) | ||
| 478 | psel |= 0x10 | ((byte & 2) << 2); | ||
| 479 | for (pmc = 0; pmc < 8; ++pmc) { | ||
| 480 | if (pmc_inuse & (1 << pmc)) | ||
| 481 | continue; | ||
| 482 | grp = (pmc >> 1) & 1; | ||
| 483 | if (unit) { | ||
| 484 | if (grp == (byte & 1)) | ||
| 485 | break; | ||
| 486 | } else if (pmc_grp_use[grp] < 4) { | ||
| 487 | ++pmc_grp_use[grp]; | ||
| 488 | break; | ||
| 489 | } | ||
| 490 | } | ||
| 491 | pmc_inuse |= 1 << pmc; | ||
| 492 | } else { | ||
| 493 | /* Direct event */ | ||
| 494 | --pmc; | ||
| 495 | if (psel == 0 && (byte & 2)) | ||
| 496 | /* add events on higher-numbered bus */ | ||
| 497 | mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; | ||
| 498 | else if (psel == 6 && byte == 3) | ||
| 499 | /* seem to need to set sample_enable here */ | ||
| 500 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
| 501 | psel |= 8; | ||
| 502 | } | ||
| 503 | if (pmc <= 1) | ||
| 504 | mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc); | ||
| 505 | else | ||
| 506 | mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); | ||
| 507 | if (pmc == 7) /* PMC8 */ | ||
| 508 | mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH; | ||
| 509 | hwc[i] = pmc; | ||
| 510 | if (p4_marked_instr_event(event[i])) | ||
| 511 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
| 512 | } | ||
| 513 | |||
| 514 | if (pmc_inuse & 1) | ||
| 515 | mmcr0 |= MMCR0_PMC1CE; | ||
| 516 | if (pmc_inuse & 0xfe) | ||
| 517 | mmcr0 |= MMCR0_PMCjCE; | ||
| 518 | |||
| 519 | mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ | ||
| 520 | |||
| 521 | /* Return MMCRx values */ | ||
| 522 | mmcr[0] = mmcr0; | ||
| 523 | mmcr[1] = mmcr1; | ||
| 524 | mmcr[2] = mmcra; | ||
| 525 | return 0; | ||
| 526 | } | ||
| 527 | |||
| 528 | static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
| 529 | { | ||
| 530 | /* | ||
| 531 | * Setting the PMCxSEL field to 0 disables PMC x. | ||
| 532 | * (Note that pmc is 0-based here, not 1-based.) | ||
| 533 | */ | ||
| 534 | if (pmc <= 1) { | ||
| 535 | mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc)); | ||
| 536 | } else { | ||
| 537 | mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2))); | ||
| 538 | if (pmc == 7) | ||
| 539 | mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH); | ||
| 540 | } | ||
| 541 | } | ||
| 542 | |||
| 543 | static int p4_generic_events[] = { | ||
| 544 | [PERF_COUNT_HW_CPU_CYCLES] = 7, | ||
| 545 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001, | ||
| 546 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */ | ||
| 547 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */ | ||
| 548 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */ | ||
| 549 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */ | ||
| 550 | }; | ||
| 551 | |||
| 552 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
| 553 | |||
| 554 | /* | ||
| 555 | * Table of generalized cache-related events. | ||
| 556 | * 0 means not supported, -1 means nonsensical, other values | ||
| 557 | * are event codes. | ||
| 558 | */ | ||
| 559 | static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
| 560 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 561 | [C(OP_READ)] = { 0x8c10, 0x3c10 }, | ||
| 562 | [C(OP_WRITE)] = { 0x7c10, 0xc13 }, | ||
| 563 | [C(OP_PREFETCH)] = { 0xc35, 0 }, | ||
| 564 | }, | ||
| 565 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 566 | [C(OP_READ)] = { 0, 0 }, | ||
| 567 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 568 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
| 569 | }, | ||
| 570 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 571 | [C(OP_READ)] = { 0, 0 }, | ||
| 572 | [C(OP_WRITE)] = { 0, 0 }, | ||
| 573 | [C(OP_PREFETCH)] = { 0xc34, 0 }, | ||
| 574 | }, | ||
| 575 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 576 | [C(OP_READ)] = { 0, 0x904 }, | ||
| 577 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 578 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 579 | }, | ||
| 580 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 581 | [C(OP_READ)] = { 0, 0x900 }, | ||
| 582 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 583 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 584 | }, | ||
| 585 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 586 | [C(OP_READ)] = { 0x330, 0x331 }, | ||
| 587 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 588 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 589 | }, | ||
| 590 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 591 | [C(OP_READ)] = { -1, -1 }, | ||
| 592 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 593 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 594 | }, | ||
| 595 | }; | ||
| 596 | |||
| 597 | static struct power_pmu power4_pmu = { | ||
| 598 | .name = "POWER4/4+", | ||
| 599 | .n_counter = 8, | ||
| 600 | .max_alternatives = 5, | ||
| 601 | .add_fields = 0x0000001100005555ul, | ||
| 602 | .test_adder = 0x0011083300000000ul, | ||
| 603 | .compute_mmcr = p4_compute_mmcr, | ||
| 604 | .get_constraint = p4_get_constraint, | ||
| 605 | .get_alternatives = p4_get_alternatives, | ||
| 606 | .disable_pmc = p4_disable_pmc, | ||
| 607 | .n_generic = ARRAY_SIZE(p4_generic_events), | ||
| 608 | .generic_events = p4_generic_events, | ||
| 609 | .cache_events = &power4_cache_events, | ||
| 610 | }; | ||
| 611 | |||
| 612 | static int __init init_power4_pmu(void) | ||
| 613 | { | ||
| 614 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
| 615 | strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4")) | ||
| 616 | return -ENODEV; | ||
| 617 | |||
| 618 | return register_power_pmu(&power4_pmu); | ||
| 619 | } | ||
| 620 | |||
| 621 | early_initcall(init_power4_pmu); | ||
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c new file mode 100644 index 00000000000..a8757baa28f --- /dev/null +++ b/arch/powerpc/kernel/power5+-pmu.c | |||
| @@ -0,0 +1,690 @@ | |||
| 1 | /* | ||
| 2 | * Performance counter support for POWER5+/++ (not POWER5) processors. | ||
| 3 | * | ||
| 4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public License | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the License, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | #include <linux/kernel.h> | ||
| 12 | #include <linux/perf_event.h> | ||
| 13 | #include <linux/string.h> | ||
| 14 | #include <asm/reg.h> | ||
| 15 | #include <asm/cputable.h> | ||
| 16 | |||
| 17 | /* | ||
| 18 | * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) | ||
| 19 | */ | ||
| 20 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
| 21 | #define PM_PMC_MSK 0xf | ||
| 22 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
| 23 | #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ | ||
| 24 | #define PM_UNIT_MSK 0xf | ||
| 25 | #define PM_BYTE_SH 12 /* Byte number of event bus to use */ | ||
| 26 | #define PM_BYTE_MSK 7 | ||
| 27 | #define PM_GRS_SH 8 /* Storage subsystem mux select */ | ||
| 28 | #define PM_GRS_MSK 7 | ||
| 29 | #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ | ||
| 30 | #define PM_PMCSEL_MSK 0x7f | ||
| 31 | |||
| 32 | /* Values in PM_UNIT field */ | ||
| 33 | #define PM_FPU 0 | ||
| 34 | #define PM_ISU0 1 | ||
| 35 | #define PM_IFU 2 | ||
| 36 | #define PM_ISU1 3 | ||
| 37 | #define PM_IDU 4 | ||
| 38 | #define PM_ISU0_ALT 6 | ||
| 39 | #define PM_GRS 7 | ||
| 40 | #define PM_LSU0 8 | ||
| 41 | #define PM_LSU1 0xc | ||
| 42 | #define PM_LASTUNIT 0xc | ||
| 43 | |||
| 44 | /* | ||
| 45 | * Bits in MMCR1 for POWER5+ | ||
| 46 | */ | ||
| 47 | #define MMCR1_TTM0SEL_SH 62 | ||
| 48 | #define MMCR1_TTM1SEL_SH 60 | ||
| 49 | #define MMCR1_TTM2SEL_SH 58 | ||
| 50 | #define MMCR1_TTM3SEL_SH 56 | ||
| 51 | #define MMCR1_TTMSEL_MSK 3 | ||
| 52 | #define MMCR1_TD_CP_DBG0SEL_SH 54 | ||
| 53 | #define MMCR1_TD_CP_DBG1SEL_SH 52 | ||
| 54 | #define MMCR1_TD_CP_DBG2SEL_SH 50 | ||
| 55 | #define MMCR1_TD_CP_DBG3SEL_SH 48 | ||
| 56 | #define MMCR1_GRS_L2SEL_SH 46 | ||
| 57 | #define MMCR1_GRS_L2SEL_MSK 3 | ||
| 58 | #define MMCR1_GRS_L3SEL_SH 44 | ||
| 59 | #define MMCR1_GRS_L3SEL_MSK 3 | ||
| 60 | #define MMCR1_GRS_MCSEL_SH 41 | ||
| 61 | #define MMCR1_GRS_MCSEL_MSK 7 | ||
| 62 | #define MMCR1_GRS_FABSEL_SH 39 | ||
| 63 | #define MMCR1_GRS_FABSEL_MSK 3 | ||
| 64 | #define MMCR1_PMC1_ADDER_SEL_SH 35 | ||
| 65 | #define MMCR1_PMC2_ADDER_SEL_SH 34 | ||
| 66 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
| 67 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
| 68 | #define MMCR1_PMC1SEL_SH 25 | ||
| 69 | #define MMCR1_PMC2SEL_SH 17 | ||
| 70 | #define MMCR1_PMC3SEL_SH 9 | ||
| 71 | #define MMCR1_PMC4SEL_SH 1 | ||
| 72 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
| 73 | #define MMCR1_PMCSEL_MSK 0x7f | ||
| 74 | |||
| 75 | /* | ||
| 76 | * Layout of constraint bits: | ||
| 77 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
| 78 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
| 79 | * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><> | ||
| 80 | * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1 | ||
| 81 | * | ||
| 82 | * NC - number of counters | ||
| 83 | * 51: NC error 0x0008_0000_0000_0000 | ||
| 84 | * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 | ||
| 85 | * | ||
| 86 | * G0..G3 - GRS mux constraints | ||
| 87 | * 46-47: GRS_L2SEL value | ||
| 88 | * 44-45: GRS_L3SEL value | ||
| 89 | * 41-44: GRS_MCSEL value | ||
| 90 | * 39-40: GRS_FABSEL value | ||
| 91 | * Note that these match up with their bit positions in MMCR1 | ||
| 92 | * | ||
| 93 | * T0 - TTM0 constraint | ||
| 94 | * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000 | ||
| 95 | * | ||
| 96 | * T1 - TTM1 constraint | ||
| 97 | * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000 | ||
| 98 | * | ||
| 99 | * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS | ||
| 100 | * 33: UC3 error 0x02_0000_0000 | ||
| 101 | * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000 | ||
| 102 | * 31: ISU0 events needed 0x01_8000_0000 | ||
| 103 | * 30: IDU|GRS events needed 0x00_4000_0000 | ||
| 104 | * | ||
| 105 | * B0 | ||
| 106 | * 24-27: Byte 0 event source 0x0f00_0000 | ||
| 107 | * Encoding as for the event code | ||
| 108 | * | ||
| 109 | * B1, B2, B3 | ||
| 110 | * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources | ||
| 111 | * | ||
| 112 | * P6 | ||
| 113 | * 11: P6 error 0x800 | ||
| 114 | * 10-11: Count of events needing PMC6 | ||
| 115 | * | ||
| 116 | * P1..P5 | ||
| 117 | * 0-9: Count of events needing PMC1..PMC5 | ||
| 118 | */ | ||
| 119 | |||
| 120 | static const int grsel_shift[8] = { | ||
| 121 | MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, | ||
| 122 | MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, | ||
| 123 | MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH | ||
| 124 | }; | ||
| 125 | |||
| 126 | /* Masks and values for using events from the various units */ | ||
| 127 | static unsigned long unit_cons[PM_LASTUNIT+1][2] = { | ||
| 128 | [PM_FPU] = { 0x3200000000ul, 0x0100000000ul }, | ||
| 129 | [PM_ISU0] = { 0x0200000000ul, 0x0080000000ul }, | ||
| 130 | [PM_ISU1] = { 0x3200000000ul, 0x3100000000ul }, | ||
| 131 | [PM_IFU] = { 0x3200000000ul, 0x2100000000ul }, | ||
| 132 | [PM_IDU] = { 0x0e00000000ul, 0x0040000000ul }, | ||
| 133 | [PM_GRS] = { 0x0e00000000ul, 0x0c40000000ul }, | ||
| 134 | }; | ||
| 135 | |||
| 136 | static int power5p_get_constraint(u64 event, unsigned long *maskp, | ||
| 137 | unsigned long *valp) | ||
| 138 | { | ||
| 139 | int pmc, byte, unit, sh; | ||
| 140 | int bit, fmask; | ||
| 141 | unsigned long mask = 0, value = 0; | ||
| 142 | |||
| 143 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 144 | if (pmc) { | ||
| 145 | if (pmc > 6) | ||
| 146 | return -1; | ||
| 147 | sh = (pmc - 1) * 2; | ||
| 148 | mask |= 2 << sh; | ||
| 149 | value |= 1 << sh; | ||
| 150 | if (pmc >= 5 && !(event == 0x500009 || event == 0x600005)) | ||
| 151 | return -1; | ||
| 152 | } | ||
| 153 | if (event & PM_BUSEVENT_MSK) { | ||
| 154 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 155 | if (unit > PM_LASTUNIT) | ||
| 156 | return -1; | ||
| 157 | if (unit == PM_ISU0_ALT) | ||
| 158 | unit = PM_ISU0; | ||
| 159 | mask |= unit_cons[unit][0]; | ||
| 160 | value |= unit_cons[unit][1]; | ||
| 161 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 162 | if (byte >= 4) { | ||
| 163 | if (unit != PM_LSU1) | ||
| 164 | return -1; | ||
| 165 | /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ | ||
| 166 | ++unit; | ||
| 167 | byte &= 3; | ||
| 168 | } | ||
| 169 | if (unit == PM_GRS) { | ||
| 170 | bit = event & 7; | ||
| 171 | fmask = (bit == 6)? 7: 3; | ||
| 172 | sh = grsel_shift[bit]; | ||
| 173 | mask |= (unsigned long)fmask << sh; | ||
| 174 | value |= (unsigned long)((event >> PM_GRS_SH) & fmask) | ||
| 175 | << sh; | ||
| 176 | } | ||
| 177 | /* Set byte lane select field */ | ||
| 178 | mask |= 0xfUL << (24 - 4 * byte); | ||
| 179 | value |= (unsigned long)unit << (24 - 4 * byte); | ||
| 180 | } | ||
| 181 | if (pmc < 5) { | ||
| 182 | /* need a counter from PMC1-4 set */ | ||
| 183 | mask |= 0x8000000000000ul; | ||
| 184 | value |= 0x1000000000000ul; | ||
| 185 | } | ||
| 186 | *maskp = mask; | ||
| 187 | *valp = value; | ||
| 188 | return 0; | ||
| 189 | } | ||
| 190 | |||
| 191 | static int power5p_limited_pmc_event(u64 event) | ||
| 192 | { | ||
| 193 | int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 194 | |||
| 195 | return pmc == 5 || pmc == 6; | ||
| 196 | } | ||
| 197 | |||
| 198 | #define MAX_ALT 3 /* at most 3 alternatives for any event */ | ||
| 199 | |||
| 200 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
| 201 | { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */ | ||
| 202 | { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ | ||
| 203 | { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */ | ||
| 204 | { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */ | ||
| 205 | { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ | ||
| 206 | { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */ | ||
| 207 | { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */ | ||
| 208 | { 0x100005, 0x600005 }, /* PM_RUN_CYC */ | ||
| 209 | { 0x100009, 0x200009 }, /* PM_INST_CMPL */ | ||
| 210 | { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */ | ||
| 211 | { 0x300009, 0x400009 }, /* PM_INST_DISP */ | ||
| 212 | }; | ||
| 213 | |||
| 214 | /* | ||
| 215 | * Scan the alternatives table for a match and return the | ||
| 216 | * index into the alternatives table if found, else -1. | ||
| 217 | */ | ||
| 218 | static int find_alternative(unsigned int event) | ||
| 219 | { | ||
| 220 | int i, j; | ||
| 221 | |||
| 222 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
| 223 | if (event < event_alternatives[i][0]) | ||
| 224 | break; | ||
| 225 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
| 226 | if (event == event_alternatives[i][j]) | ||
| 227 | return i; | ||
| 228 | } | ||
| 229 | return -1; | ||
| 230 | } | ||
| 231 | |||
| 232 | static const unsigned char bytedecode_alternatives[4][4] = { | ||
| 233 | /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, | ||
| 234 | /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, | ||
| 235 | /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, | ||
| 236 | /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } | ||
| 237 | }; | ||
| 238 | |||
| 239 | /* | ||
| 240 | * Some direct events for decodes of event bus byte 3 have alternative | ||
| 241 | * PMCSEL values on other counters. This returns the alternative | ||
| 242 | * event code for those that do, or -1 otherwise. This also handles | ||
| 243 | * alternative PCMSEL values for add events. | ||
| 244 | */ | ||
| 245 | static s64 find_alternative_bdecode(u64 event) | ||
| 246 | { | ||
| 247 | int pmc, altpmc, pp, j; | ||
| 248 | |||
| 249 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 250 | if (pmc == 0 || pmc > 4) | ||
| 251 | return -1; | ||
| 252 | altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ | ||
| 253 | pp = event & PM_PMCSEL_MSK; | ||
| 254 | for (j = 0; j < 4; ++j) { | ||
| 255 | if (bytedecode_alternatives[pmc - 1][j] == pp) { | ||
| 256 | return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | | ||
| 257 | (altpmc << PM_PMC_SH) | | ||
| 258 | bytedecode_alternatives[altpmc - 1][j]; | ||
| 259 | } | ||
| 260 | } | ||
| 261 | |||
| 262 | /* new decode alternatives for power5+ */ | ||
| 263 | if (pmc == 1 && (pp == 0x0d || pp == 0x0e)) | ||
| 264 | return event + (2 << PM_PMC_SH) + (0x2e - 0x0d); | ||
| 265 | if (pmc == 3 && (pp == 0x2e || pp == 0x2f)) | ||
| 266 | return event - (2 << PM_PMC_SH) - (0x2e - 0x0d); | ||
| 267 | |||
| 268 | /* alternative add event encodings */ | ||
| 269 | if (pp == 0x10 || pp == 0x28) | ||
| 270 | return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) | | ||
| 271 | (altpmc << PM_PMC_SH); | ||
| 272 | |||
| 273 | return -1; | ||
| 274 | } | ||
| 275 | |||
| 276 | static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
| 277 | { | ||
| 278 | int i, j, nalt = 1; | ||
| 279 | int nlim; | ||
| 280 | s64 ae; | ||
| 281 | |||
| 282 | alt[0] = event; | ||
| 283 | nalt = 1; | ||
| 284 | nlim = power5p_limited_pmc_event(event); | ||
| 285 | i = find_alternative(event); | ||
| 286 | if (i >= 0) { | ||
| 287 | for (j = 0; j < MAX_ALT; ++j) { | ||
| 288 | ae = event_alternatives[i][j]; | ||
| 289 | if (ae && ae != event) | ||
| 290 | alt[nalt++] = ae; | ||
| 291 | nlim += power5p_limited_pmc_event(ae); | ||
| 292 | } | ||
| 293 | } else { | ||
| 294 | ae = find_alternative_bdecode(event); | ||
| 295 | if (ae > 0) | ||
| 296 | alt[nalt++] = ae; | ||
| 297 | } | ||
| 298 | |||
| 299 | if (flags & PPMU_ONLY_COUNT_RUN) { | ||
| 300 | /* | ||
| 301 | * We're only counting in RUN state, | ||
| 302 | * so PM_CYC is equivalent to PM_RUN_CYC | ||
| 303 | * and PM_INST_CMPL === PM_RUN_INST_CMPL. | ||
| 304 | * This doesn't include alternatives that don't provide | ||
| 305 | * any extra flexibility in assigning PMCs (e.g. | ||
| 306 | * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC). | ||
| 307 | * Note that even with these additional alternatives | ||
| 308 | * we never end up with more than 3 alternatives for any event. | ||
| 309 | */ | ||
| 310 | j = nalt; | ||
| 311 | for (i = 0; i < nalt; ++i) { | ||
| 312 | switch (alt[i]) { | ||
| 313 | case 0xf: /* PM_CYC */ | ||
| 314 | alt[j++] = 0x600005; /* PM_RUN_CYC */ | ||
| 315 | ++nlim; | ||
| 316 | break; | ||
| 317 | case 0x600005: /* PM_RUN_CYC */ | ||
| 318 | alt[j++] = 0xf; | ||
| 319 | break; | ||
| 320 | case 0x100009: /* PM_INST_CMPL */ | ||
| 321 | alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ | ||
| 322 | ++nlim; | ||
| 323 | break; | ||
| 324 | case 0x500009: /* PM_RUN_INST_CMPL */ | ||
| 325 | alt[j++] = 0x100009; /* PM_INST_CMPL */ | ||
| 326 | alt[j++] = 0x200009; | ||
| 327 | break; | ||
| 328 | } | ||
| 329 | } | ||
| 330 | nalt = j; | ||
| 331 | } | ||
| 332 | |||
| 333 | if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { | ||
| 334 | /* remove the limited PMC events */ | ||
| 335 | j = 0; | ||
| 336 | for (i = 0; i < nalt; ++i) { | ||
| 337 | if (!power5p_limited_pmc_event(alt[i])) { | ||
| 338 | alt[j] = alt[i]; | ||
| 339 | ++j; | ||
| 340 | } | ||
| 341 | } | ||
| 342 | nalt = j; | ||
| 343 | } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { | ||
| 344 | /* remove all but the limited PMC events */ | ||
| 345 | j = 0; | ||
| 346 | for (i = 0; i < nalt; ++i) { | ||
| 347 | if (power5p_limited_pmc_event(alt[i])) { | ||
| 348 | alt[j] = alt[i]; | ||
| 349 | ++j; | ||
| 350 | } | ||
| 351 | } | ||
| 352 | nalt = j; | ||
| 353 | } | ||
| 354 | |||
| 355 | return nalt; | ||
| 356 | } | ||
| 357 | |||
| 358 | /* | ||
| 359 | * Map of which direct events on which PMCs are marked instruction events. | ||
| 360 | * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. | ||
| 361 | * Bit 0 is set if it is marked for all PMCs. | ||
| 362 | * The 0x80 bit indicates a byte decode PMCSEL value. | ||
| 363 | */ | ||
| 364 | static unsigned char direct_event_is_marked[0x28] = { | ||
| 365 | 0, /* 00 */ | ||
| 366 | 0x1f, /* 01 PM_IOPS_CMPL */ | ||
| 367 | 0x2, /* 02 PM_MRK_GRP_DISP */ | ||
| 368 | 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
| 369 | 0, /* 04 */ | ||
| 370 | 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ | ||
| 371 | 0x80, /* 06 */ | ||
| 372 | 0x80, /* 07 */ | ||
| 373 | 0, 0, 0,/* 08 - 0a */ | ||
| 374 | 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ | ||
| 375 | 0, /* 0c */ | ||
| 376 | 0x80, /* 0d */ | ||
| 377 | 0x80, /* 0e */ | ||
| 378 | 0, /* 0f */ | ||
| 379 | 0, /* 10 */ | ||
| 380 | 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ | ||
| 381 | 0, /* 12 */ | ||
| 382 | 0x10, /* 13 PM_MRK_GRP_CMPL */ | ||
| 383 | 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
| 384 | 0x2, /* 15 PM_MRK_GRP_ISSUED */ | ||
| 385 | 0x80, /* 16 */ | ||
| 386 | 0x80, /* 17 */ | ||
| 387 | 0, 0, 0, 0, 0, | ||
| 388 | 0x80, /* 1d */ | ||
| 389 | 0x80, /* 1e */ | ||
| 390 | 0, /* 1f */ | ||
| 391 | 0x80, /* 20 */ | ||
| 392 | 0x80, /* 21 */ | ||
| 393 | 0x80, /* 22 */ | ||
| 394 | 0x80, /* 23 */ | ||
| 395 | 0x80, /* 24 */ | ||
| 396 | 0x80, /* 25 */ | ||
| 397 | 0x80, /* 26 */ | ||
| 398 | 0x80, /* 27 */ | ||
| 399 | }; | ||
| 400 | |||
| 401 | /* | ||
| 402 | * Returns 1 if event counts things relating to marked instructions | ||
| 403 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
| 404 | */ | ||
| 405 | static int power5p_marked_instr_event(u64 event) | ||
| 406 | { | ||
| 407 | int pmc, psel; | ||
| 408 | int bit, byte, unit; | ||
| 409 | u32 mask; | ||
| 410 | |||
| 411 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 412 | psel = event & PM_PMCSEL_MSK; | ||
| 413 | if (pmc >= 5) | ||
| 414 | return 0; | ||
| 415 | |||
| 416 | bit = -1; | ||
| 417 | if (psel < sizeof(direct_event_is_marked)) { | ||
| 418 | if (direct_event_is_marked[psel] & (1 << pmc)) | ||
| 419 | return 1; | ||
| 420 | if (direct_event_is_marked[psel] & 0x80) | ||
| 421 | bit = 4; | ||
| 422 | else if (psel == 0x08) | ||
| 423 | bit = pmc - 1; | ||
| 424 | else if (psel == 0x10) | ||
| 425 | bit = 4 - pmc; | ||
| 426 | else if (psel == 0x1b && (pmc == 1 || pmc == 3)) | ||
| 427 | bit = 4; | ||
| 428 | } else if ((psel & 0x48) == 0x40) { | ||
| 429 | bit = psel & 7; | ||
| 430 | } else if (psel == 0x28) { | ||
| 431 | bit = pmc - 1; | ||
| 432 | } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) { | ||
| 433 | bit = 4; | ||
| 434 | } | ||
| 435 | |||
| 436 | if (!(event & PM_BUSEVENT_MSK) || bit == -1) | ||
| 437 | return 0; | ||
| 438 | |||
| 439 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 440 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 441 | if (unit == PM_LSU0) { | ||
| 442 | /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ | ||
| 443 | mask = 0x5dff00; | ||
| 444 | } else if (unit == PM_LSU1 && byte >= 4) { | ||
| 445 | byte -= 4; | ||
| 446 | /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */ | ||
| 447 | mask = 0x5f11c000; | ||
| 448 | } else | ||
| 449 | return 0; | ||
| 450 | |||
| 451 | return (mask >> (byte * 8 + bit)) & 1; | ||
| 452 | } | ||
| 453 | |||
| 454 | static int power5p_compute_mmcr(u64 event[], int n_ev, | ||
| 455 | unsigned int hwc[], unsigned long mmcr[]) | ||
| 456 | { | ||
| 457 | unsigned long mmcr1 = 0; | ||
| 458 | unsigned long mmcra = 0; | ||
| 459 | unsigned int pmc, unit, byte, psel; | ||
| 460 | unsigned int ttm; | ||
| 461 | int i, isbus, bit, grsel; | ||
| 462 | unsigned int pmc_inuse = 0; | ||
| 463 | unsigned char busbyte[4]; | ||
| 464 | unsigned char unituse[16]; | ||
| 465 | int ttmuse; | ||
| 466 | |||
| 467 | if (n_ev > 6) | ||
| 468 | return -1; | ||
| 469 | |||
| 470 | /* First pass to count resource use */ | ||
| 471 | memset(busbyte, 0, sizeof(busbyte)); | ||
| 472 | memset(unituse, 0, sizeof(unituse)); | ||
| 473 | for (i = 0; i < n_ev; ++i) { | ||
| 474 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 475 | if (pmc) { | ||
| 476 | if (pmc > 6) | ||
| 477 | return -1; | ||
| 478 | if (pmc_inuse & (1 << (pmc - 1))) | ||
| 479 | return -1; | ||
| 480 | pmc_inuse |= 1 << (pmc - 1); | ||
| 481 | } | ||
| 482 | if (event[i] & PM_BUSEVENT_MSK) { | ||
| 483 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 484 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 485 | if (unit > PM_LASTUNIT) | ||
| 486 | return -1; | ||
| 487 | if (unit == PM_ISU0_ALT) | ||
| 488 | unit = PM_ISU0; | ||
| 489 | if (byte >= 4) { | ||
| 490 | if (unit != PM_LSU1) | ||
| 491 | return -1; | ||
| 492 | ++unit; | ||
| 493 | byte &= 3; | ||
| 494 | } | ||
| 495 | if (busbyte[byte] && busbyte[byte] != unit) | ||
| 496 | return -1; | ||
| 497 | busbyte[byte] = unit; | ||
| 498 | unituse[unit] = 1; | ||
| 499 | } | ||
| 500 | } | ||
| 501 | |||
| 502 | /* | ||
| 503 | * Assign resources and set multiplexer selects. | ||
| 504 | * | ||
| 505 | * PM_ISU0 can go either on TTM0 or TTM1, but that's the only | ||
| 506 | * choice we have to deal with. | ||
| 507 | */ | ||
| 508 | if (unituse[PM_ISU0] & | ||
| 509 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { | ||
| 510 | unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ | ||
| 511 | unituse[PM_ISU0] = 0; | ||
| 512 | } | ||
| 513 | /* Set TTM[01]SEL fields. */ | ||
| 514 | ttmuse = 0; | ||
| 515 | for (i = PM_FPU; i <= PM_ISU1; ++i) { | ||
| 516 | if (!unituse[i]) | ||
| 517 | continue; | ||
| 518 | if (ttmuse++) | ||
| 519 | return -1; | ||
| 520 | mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH; | ||
| 521 | } | ||
| 522 | ttmuse = 0; | ||
| 523 | for (; i <= PM_GRS; ++i) { | ||
| 524 | if (!unituse[i]) | ||
| 525 | continue; | ||
| 526 | if (ttmuse++) | ||
| 527 | return -1; | ||
| 528 | mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH; | ||
| 529 | } | ||
| 530 | if (ttmuse > 1) | ||
| 531 | return -1; | ||
| 532 | |||
| 533 | /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ | ||
| 534 | for (byte = 0; byte < 4; ++byte) { | ||
| 535 | unit = busbyte[byte]; | ||
| 536 | if (!unit) | ||
| 537 | continue; | ||
| 538 | if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { | ||
| 539 | /* get ISU0 through TTM1 rather than TTM0 */ | ||
| 540 | unit = PM_ISU0_ALT; | ||
| 541 | } else if (unit == PM_LSU1 + 1) { | ||
| 542 | /* select lower word of LSU1 for this byte */ | ||
| 543 | mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
| 544 | } | ||
| 545 | ttm = unit >> 2; | ||
| 546 | mmcr1 |= (unsigned long)ttm | ||
| 547 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
| 548 | } | ||
| 549 | |||
| 550 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
| 551 | for (i = 0; i < n_ev; ++i) { | ||
| 552 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 553 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 554 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 555 | psel = event[i] & PM_PMCSEL_MSK; | ||
| 556 | isbus = event[i] & PM_BUSEVENT_MSK; | ||
| 557 | if (!pmc) { | ||
| 558 | /* Bus event or any-PMC direct event */ | ||
| 559 | for (pmc = 0; pmc < 4; ++pmc) { | ||
| 560 | if (!(pmc_inuse & (1 << pmc))) | ||
| 561 | break; | ||
| 562 | } | ||
| 563 | if (pmc >= 4) | ||
| 564 | return -1; | ||
| 565 | pmc_inuse |= 1 << pmc; | ||
| 566 | } else if (pmc <= 4) { | ||
| 567 | /* Direct event */ | ||
| 568 | --pmc; | ||
| 569 | if (isbus && (byte & 2) && | ||
| 570 | (psel == 8 || psel == 0x10 || psel == 0x28)) | ||
| 571 | /* add events on higher-numbered bus */ | ||
| 572 | mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc); | ||
| 573 | } else { | ||
| 574 | /* Instructions or run cycles on PMC5/6 */ | ||
| 575 | --pmc; | ||
| 576 | } | ||
| 577 | if (isbus && unit == PM_GRS) { | ||
| 578 | bit = psel & 7; | ||
| 579 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; | ||
| 580 | mmcr1 |= (unsigned long)grsel << grsel_shift[bit]; | ||
| 581 | } | ||
| 582 | if (power5p_marked_instr_event(event[i])) | ||
| 583 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
| 584 | if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1)) | ||
| 585 | /* select alternate byte lane */ | ||
| 586 | psel |= 0x10; | ||
| 587 | if (pmc <= 3) | ||
| 588 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
| 589 | hwc[i] = pmc; | ||
| 590 | } | ||
| 591 | |||
| 592 | /* Return MMCRx values */ | ||
| 593 | mmcr[0] = 0; | ||
| 594 | if (pmc_inuse & 1) | ||
| 595 | mmcr[0] = MMCR0_PMC1CE; | ||
| 596 | if (pmc_inuse & 0x3e) | ||
| 597 | mmcr[0] |= MMCR0_PMCjCE; | ||
| 598 | mmcr[1] = mmcr1; | ||
| 599 | mmcr[2] = mmcra; | ||
| 600 | return 0; | ||
| 601 | } | ||
| 602 | |||
| 603 | static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
| 604 | { | ||
| 605 | if (pmc <= 3) | ||
| 606 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); | ||
| 607 | } | ||
| 608 | |||
| 609 | static int power5p_generic_events[] = { | ||
| 610 | [PERF_COUNT_HW_CPU_CYCLES] = 0xf, | ||
| 611 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, | ||
| 612 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */ | ||
| 613 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ | ||
| 614 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ | ||
| 615 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ | ||
| 616 | }; | ||
| 617 | |||
| 618 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
| 619 | |||
| 620 | /* | ||
| 621 | * Table of generalized cache-related events. | ||
| 622 | * 0 means not supported, -1 means nonsensical, other values | ||
| 623 | * are event codes. | ||
| 624 | */ | ||
| 625 | static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
| 626 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 627 | [C(OP_READ)] = { 0x1c10a8, 0x3c1088 }, | ||
| 628 | [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 }, | ||
| 629 | [C(OP_PREFETCH)] = { 0xc70e7, -1 }, | ||
| 630 | }, | ||
| 631 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 632 | [C(OP_READ)] = { 0, 0 }, | ||
| 633 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 634 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
| 635 | }, | ||
| 636 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 637 | [C(OP_READ)] = { 0, 0 }, | ||
| 638 | [C(OP_WRITE)] = { 0, 0 }, | ||
| 639 | [C(OP_PREFETCH)] = { 0xc50c3, 0 }, | ||
| 640 | }, | ||
| 641 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 642 | [C(OP_READ)] = { 0xc20e4, 0x800c4 }, | ||
| 643 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 644 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 645 | }, | ||
| 646 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 647 | [C(OP_READ)] = { 0, 0x800c0 }, | ||
| 648 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 649 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 650 | }, | ||
| 651 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 652 | [C(OP_READ)] = { 0x230e4, 0x230e5 }, | ||
| 653 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 654 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 655 | }, | ||
| 656 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 657 | [C(OP_READ)] = { -1, -1 }, | ||
| 658 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 659 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 660 | }, | ||
| 661 | }; | ||
| 662 | |||
| 663 | static struct power_pmu power5p_pmu = { | ||
| 664 | .name = "POWER5+/++", | ||
| 665 | .n_counter = 6, | ||
| 666 | .max_alternatives = MAX_ALT, | ||
| 667 | .add_fields = 0x7000000000055ul, | ||
| 668 | .test_adder = 0x3000040000000ul, | ||
| 669 | .compute_mmcr = power5p_compute_mmcr, | ||
| 670 | .get_constraint = power5p_get_constraint, | ||
| 671 | .get_alternatives = power5p_get_alternatives, | ||
| 672 | .disable_pmc = power5p_disable_pmc, | ||
| 673 | .limited_pmc_event = power5p_limited_pmc_event, | ||
| 674 | .flags = PPMU_LIMITED_PMC5_6, | ||
| 675 | .n_generic = ARRAY_SIZE(power5p_generic_events), | ||
| 676 | .generic_events = power5p_generic_events, | ||
| 677 | .cache_events = &power5p_cache_events, | ||
| 678 | }; | ||
| 679 | |||
| 680 | static int __init init_power5p_pmu(void) | ||
| 681 | { | ||
| 682 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
| 683 | (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+") | ||
| 684 | && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++"))) | ||
| 685 | return -ENODEV; | ||
| 686 | |||
| 687 | return register_power_pmu(&power5p_pmu); | ||
| 688 | } | ||
| 689 | |||
| 690 | early_initcall(init_power5p_pmu); | ||
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c new file mode 100644 index 00000000000..e7f06eb7a86 --- /dev/null +++ b/arch/powerpc/kernel/power5-pmu.c | |||
| @@ -0,0 +1,629 @@ | |||
| 1 | /* | ||
| 2 | * Performance counter support for POWER5 (not POWER5++) processors. | ||
| 3 | * | ||
| 4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public License | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the License, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | #include <linux/kernel.h> | ||
| 12 | #include <linux/perf_event.h> | ||
| 13 | #include <linux/string.h> | ||
| 14 | #include <asm/reg.h> | ||
| 15 | #include <asm/cputable.h> | ||
| 16 | |||
| 17 | /* | ||
| 18 | * Bits in event code for POWER5 (not POWER5++) | ||
| 19 | */ | ||
| 20 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
| 21 | #define PM_PMC_MSK 0xf | ||
| 22 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
| 23 | #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ | ||
| 24 | #define PM_UNIT_MSK 0xf | ||
| 25 | #define PM_BYTE_SH 12 /* Byte number of event bus to use */ | ||
| 26 | #define PM_BYTE_MSK 7 | ||
| 27 | #define PM_GRS_SH 8 /* Storage subsystem mux select */ | ||
| 28 | #define PM_GRS_MSK 7 | ||
| 29 | #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ | ||
| 30 | #define PM_PMCSEL_MSK 0x7f | ||
| 31 | |||
| 32 | /* Values in PM_UNIT field */ | ||
| 33 | #define PM_FPU 0 | ||
| 34 | #define PM_ISU0 1 | ||
| 35 | #define PM_IFU 2 | ||
| 36 | #define PM_ISU1 3 | ||
| 37 | #define PM_IDU 4 | ||
| 38 | #define PM_ISU0_ALT 6 | ||
| 39 | #define PM_GRS 7 | ||
| 40 | #define PM_LSU0 8 | ||
| 41 | #define PM_LSU1 0xc | ||
| 42 | #define PM_LASTUNIT 0xc | ||
| 43 | |||
| 44 | /* | ||
| 45 | * Bits in MMCR1 for POWER5 | ||
| 46 | */ | ||
| 47 | #define MMCR1_TTM0SEL_SH 62 | ||
| 48 | #define MMCR1_TTM1SEL_SH 60 | ||
| 49 | #define MMCR1_TTM2SEL_SH 58 | ||
| 50 | #define MMCR1_TTM3SEL_SH 56 | ||
| 51 | #define MMCR1_TTMSEL_MSK 3 | ||
| 52 | #define MMCR1_TD_CP_DBG0SEL_SH 54 | ||
| 53 | #define MMCR1_TD_CP_DBG1SEL_SH 52 | ||
| 54 | #define MMCR1_TD_CP_DBG2SEL_SH 50 | ||
| 55 | #define MMCR1_TD_CP_DBG3SEL_SH 48 | ||
| 56 | #define MMCR1_GRS_L2SEL_SH 46 | ||
| 57 | #define MMCR1_GRS_L2SEL_MSK 3 | ||
| 58 | #define MMCR1_GRS_L3SEL_SH 44 | ||
| 59 | #define MMCR1_GRS_L3SEL_MSK 3 | ||
| 60 | #define MMCR1_GRS_MCSEL_SH 41 | ||
| 61 | #define MMCR1_GRS_MCSEL_MSK 7 | ||
| 62 | #define MMCR1_GRS_FABSEL_SH 39 | ||
| 63 | #define MMCR1_GRS_FABSEL_MSK 3 | ||
| 64 | #define MMCR1_PMC1_ADDER_SEL_SH 35 | ||
| 65 | #define MMCR1_PMC2_ADDER_SEL_SH 34 | ||
| 66 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
| 67 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
| 68 | #define MMCR1_PMC1SEL_SH 25 | ||
| 69 | #define MMCR1_PMC2SEL_SH 17 | ||
| 70 | #define MMCR1_PMC3SEL_SH 9 | ||
| 71 | #define MMCR1_PMC4SEL_SH 1 | ||
| 72 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
| 73 | #define MMCR1_PMCSEL_MSK 0x7f | ||
| 74 | |||
| 75 | /* | ||
| 76 | * Layout of constraint bits: | ||
| 77 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
| 78 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
| 79 | * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><> | ||
| 80 | * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1 | ||
| 81 | * | ||
| 82 | * T0 - TTM0 constraint | ||
| 83 | * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000 | ||
| 84 | * | ||
| 85 | * T1 - TTM1 constraint | ||
| 86 | * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000 | ||
| 87 | * | ||
| 88 | * NC - number of counters | ||
| 89 | * 51: NC error 0x0008_0000_0000_0000 | ||
| 90 | * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 | ||
| 91 | * | ||
| 92 | * G0..G3 - GRS mux constraints | ||
| 93 | * 46-47: GRS_L2SEL value | ||
| 94 | * 44-45: GRS_L3SEL value | ||
| 95 | * 41-44: GRS_MCSEL value | ||
| 96 | * 39-40: GRS_FABSEL value | ||
| 97 | * Note that these match up with their bit positions in MMCR1 | ||
| 98 | * | ||
| 99 | * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS | ||
| 100 | * 37: UC3 error 0x20_0000_0000 | ||
| 101 | * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000 | ||
| 102 | * 35: ISU0 events needed 0x08_0000_0000 | ||
| 103 | * 34: IDU|GRS events needed 0x04_0000_0000 | ||
| 104 | * | ||
| 105 | * PS1 | ||
| 106 | * 33: PS1 error 0x2_0000_0000 | ||
| 107 | * 31-32: count of events needing PMC1/2 0x1_8000_0000 | ||
| 108 | * | ||
| 109 | * PS2 | ||
| 110 | * 30: PS2 error 0x4000_0000 | ||
| 111 | * 28-29: count of events needing PMC3/4 0x3000_0000 | ||
| 112 | * | ||
| 113 | * B0 | ||
| 114 | * 24-27: Byte 0 event source 0x0f00_0000 | ||
| 115 | * Encoding as for the event code | ||
| 116 | * | ||
| 117 | * B1, B2, B3 | ||
| 118 | * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources | ||
| 119 | * | ||
| 120 | * P1..P6 | ||
| 121 | * 0-11: Count of events needing PMC1..PMC6 | ||
| 122 | */ | ||
| 123 | |||
| 124 | static const int grsel_shift[8] = { | ||
| 125 | MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, | ||
| 126 | MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, | ||
| 127 | MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH | ||
| 128 | }; | ||
| 129 | |||
| 130 | /* Masks and values for using events from the various units */ | ||
| 131 | static unsigned long unit_cons[PM_LASTUNIT+1][2] = { | ||
| 132 | [PM_FPU] = { 0xc0002000000000ul, 0x00001000000000ul }, | ||
| 133 | [PM_ISU0] = { 0x00002000000000ul, 0x00000800000000ul }, | ||
| 134 | [PM_ISU1] = { 0xc0002000000000ul, 0xc0001000000000ul }, | ||
| 135 | [PM_IFU] = { 0xc0002000000000ul, 0x80001000000000ul }, | ||
| 136 | [PM_IDU] = { 0x30002000000000ul, 0x00000400000000ul }, | ||
| 137 | [PM_GRS] = { 0x30002000000000ul, 0x30000400000000ul }, | ||
| 138 | }; | ||
| 139 | |||
| 140 | static int power5_get_constraint(u64 event, unsigned long *maskp, | ||
| 141 | unsigned long *valp) | ||
| 142 | { | ||
| 143 | int pmc, byte, unit, sh; | ||
| 144 | int bit, fmask; | ||
| 145 | unsigned long mask = 0, value = 0; | ||
| 146 | int grp = -1; | ||
| 147 | |||
| 148 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 149 | if (pmc) { | ||
| 150 | if (pmc > 6) | ||
| 151 | return -1; | ||
| 152 | sh = (pmc - 1) * 2; | ||
| 153 | mask |= 2 << sh; | ||
| 154 | value |= 1 << sh; | ||
| 155 | if (pmc <= 4) | ||
| 156 | grp = (pmc - 1) >> 1; | ||
| 157 | else if (event != 0x500009 && event != 0x600005) | ||
| 158 | return -1; | ||
| 159 | } | ||
| 160 | if (event & PM_BUSEVENT_MSK) { | ||
| 161 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 162 | if (unit > PM_LASTUNIT) | ||
| 163 | return -1; | ||
| 164 | if (unit == PM_ISU0_ALT) | ||
| 165 | unit = PM_ISU0; | ||
| 166 | mask |= unit_cons[unit][0]; | ||
| 167 | value |= unit_cons[unit][1]; | ||
| 168 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 169 | if (byte >= 4) { | ||
| 170 | if (unit != PM_LSU1) | ||
| 171 | return -1; | ||
| 172 | /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ | ||
| 173 | ++unit; | ||
| 174 | byte &= 3; | ||
| 175 | } | ||
| 176 | if (unit == PM_GRS) { | ||
| 177 | bit = event & 7; | ||
| 178 | fmask = (bit == 6)? 7: 3; | ||
| 179 | sh = grsel_shift[bit]; | ||
| 180 | mask |= (unsigned long)fmask << sh; | ||
| 181 | value |= (unsigned long)((event >> PM_GRS_SH) & fmask) | ||
| 182 | << sh; | ||
| 183 | } | ||
| 184 | /* | ||
| 185 | * Bus events on bytes 0 and 2 can be counted | ||
| 186 | * on PMC1/2; bytes 1 and 3 on PMC3/4. | ||
| 187 | */ | ||
| 188 | if (!pmc) | ||
| 189 | grp = byte & 1; | ||
| 190 | /* Set byte lane select field */ | ||
| 191 | mask |= 0xfUL << (24 - 4 * byte); | ||
| 192 | value |= (unsigned long)unit << (24 - 4 * byte); | ||
| 193 | } | ||
| 194 | if (grp == 0) { | ||
| 195 | /* increment PMC1/2 field */ | ||
| 196 | mask |= 0x200000000ul; | ||
| 197 | value |= 0x080000000ul; | ||
| 198 | } else if (grp == 1) { | ||
| 199 | /* increment PMC3/4 field */ | ||
| 200 | mask |= 0x40000000ul; | ||
| 201 | value |= 0x10000000ul; | ||
| 202 | } | ||
| 203 | if (pmc < 5) { | ||
| 204 | /* need a counter from PMC1-4 set */ | ||
| 205 | mask |= 0x8000000000000ul; | ||
| 206 | value |= 0x1000000000000ul; | ||
| 207 | } | ||
| 208 | *maskp = mask; | ||
| 209 | *valp = value; | ||
| 210 | return 0; | ||
| 211 | } | ||
| 212 | |||
| 213 | #define MAX_ALT 3 /* at most 3 alternatives for any event */ | ||
| 214 | |||
| 215 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
| 216 | { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ | ||
| 217 | { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ | ||
| 218 | { 0x100005, 0x600005 }, /* PM_RUN_CYC */ | ||
| 219 | { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */ | ||
| 220 | { 0x300009, 0x400009 }, /* PM_INST_DISP */ | ||
| 221 | }; | ||
| 222 | |||
| 223 | /* | ||
| 224 | * Scan the alternatives table for a match and return the | ||
| 225 | * index into the alternatives table if found, else -1. | ||
| 226 | */ | ||
| 227 | static int find_alternative(u64 event) | ||
| 228 | { | ||
| 229 | int i, j; | ||
| 230 | |||
| 231 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
| 232 | if (event < event_alternatives[i][0]) | ||
| 233 | break; | ||
| 234 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
| 235 | if (event == event_alternatives[i][j]) | ||
| 236 | return i; | ||
| 237 | } | ||
| 238 | return -1; | ||
| 239 | } | ||
| 240 | |||
| 241 | static const unsigned char bytedecode_alternatives[4][4] = { | ||
| 242 | /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, | ||
| 243 | /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, | ||
| 244 | /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, | ||
| 245 | /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } | ||
| 246 | }; | ||
| 247 | |||
| 248 | /* | ||
| 249 | * Some direct events for decodes of event bus byte 3 have alternative | ||
| 250 | * PMCSEL values on other counters. This returns the alternative | ||
| 251 | * event code for those that do, or -1 otherwise. | ||
| 252 | */ | ||
| 253 | static s64 find_alternative_bdecode(u64 event) | ||
| 254 | { | ||
| 255 | int pmc, altpmc, pp, j; | ||
| 256 | |||
| 257 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 258 | if (pmc == 0 || pmc > 4) | ||
| 259 | return -1; | ||
| 260 | altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ | ||
| 261 | pp = event & PM_PMCSEL_MSK; | ||
| 262 | for (j = 0; j < 4; ++j) { | ||
| 263 | if (bytedecode_alternatives[pmc - 1][j] == pp) { | ||
| 264 | return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | | ||
| 265 | (altpmc << PM_PMC_SH) | | ||
| 266 | bytedecode_alternatives[altpmc - 1][j]; | ||
| 267 | } | ||
| 268 | } | ||
| 269 | return -1; | ||
| 270 | } | ||
| 271 | |||
| 272 | static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
| 273 | { | ||
| 274 | int i, j, nalt = 1; | ||
| 275 | s64 ae; | ||
| 276 | |||
| 277 | alt[0] = event; | ||
| 278 | nalt = 1; | ||
| 279 | i = find_alternative(event); | ||
| 280 | if (i >= 0) { | ||
| 281 | for (j = 0; j < MAX_ALT; ++j) { | ||
| 282 | ae = event_alternatives[i][j]; | ||
| 283 | if (ae && ae != event) | ||
| 284 | alt[nalt++] = ae; | ||
| 285 | } | ||
| 286 | } else { | ||
| 287 | ae = find_alternative_bdecode(event); | ||
| 288 | if (ae > 0) | ||
| 289 | alt[nalt++] = ae; | ||
| 290 | } | ||
| 291 | return nalt; | ||
| 292 | } | ||
| 293 | |||
| 294 | /* | ||
| 295 | * Map of which direct events on which PMCs are marked instruction events. | ||
| 296 | * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. | ||
| 297 | * Bit 0 is set if it is marked for all PMCs. | ||
| 298 | * The 0x80 bit indicates a byte decode PMCSEL value. | ||
| 299 | */ | ||
| 300 | static unsigned char direct_event_is_marked[0x28] = { | ||
| 301 | 0, /* 00 */ | ||
| 302 | 0x1f, /* 01 PM_IOPS_CMPL */ | ||
| 303 | 0x2, /* 02 PM_MRK_GRP_DISP */ | ||
| 304 | 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
| 305 | 0, /* 04 */ | ||
| 306 | 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ | ||
| 307 | 0x80, /* 06 */ | ||
| 308 | 0x80, /* 07 */ | ||
| 309 | 0, 0, 0,/* 08 - 0a */ | ||
| 310 | 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ | ||
| 311 | 0, /* 0c */ | ||
| 312 | 0x80, /* 0d */ | ||
| 313 | 0x80, /* 0e */ | ||
| 314 | 0, /* 0f */ | ||
| 315 | 0, /* 10 */ | ||
| 316 | 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ | ||
| 317 | 0, /* 12 */ | ||
| 318 | 0x10, /* 13 PM_MRK_GRP_CMPL */ | ||
| 319 | 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
| 320 | 0x2, /* 15 PM_MRK_GRP_ISSUED */ | ||
| 321 | 0x80, /* 16 */ | ||
| 322 | 0x80, /* 17 */ | ||
| 323 | 0, 0, 0, 0, 0, | ||
| 324 | 0x80, /* 1d */ | ||
| 325 | 0x80, /* 1e */ | ||
| 326 | 0, /* 1f */ | ||
| 327 | 0x80, /* 20 */ | ||
| 328 | 0x80, /* 21 */ | ||
| 329 | 0x80, /* 22 */ | ||
| 330 | 0x80, /* 23 */ | ||
| 331 | 0x80, /* 24 */ | ||
| 332 | 0x80, /* 25 */ | ||
| 333 | 0x80, /* 26 */ | ||
| 334 | 0x80, /* 27 */ | ||
| 335 | }; | ||
| 336 | |||
| 337 | /* | ||
| 338 | * Returns 1 if event counts things relating to marked instructions | ||
| 339 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
| 340 | */ | ||
| 341 | static int power5_marked_instr_event(u64 event) | ||
| 342 | { | ||
| 343 | int pmc, psel; | ||
| 344 | int bit, byte, unit; | ||
| 345 | u32 mask; | ||
| 346 | |||
| 347 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 348 | psel = event & PM_PMCSEL_MSK; | ||
| 349 | if (pmc >= 5) | ||
| 350 | return 0; | ||
| 351 | |||
| 352 | bit = -1; | ||
| 353 | if (psel < sizeof(direct_event_is_marked)) { | ||
| 354 | if (direct_event_is_marked[psel] & (1 << pmc)) | ||
| 355 | return 1; | ||
| 356 | if (direct_event_is_marked[psel] & 0x80) | ||
| 357 | bit = 4; | ||
| 358 | else if (psel == 0x08) | ||
| 359 | bit = pmc - 1; | ||
| 360 | else if (psel == 0x10) | ||
| 361 | bit = 4 - pmc; | ||
| 362 | else if (psel == 0x1b && (pmc == 1 || pmc == 3)) | ||
| 363 | bit = 4; | ||
| 364 | } else if ((psel & 0x58) == 0x40) | ||
| 365 | bit = psel & 7; | ||
| 366 | |||
| 367 | if (!(event & PM_BUSEVENT_MSK)) | ||
| 368 | return 0; | ||
| 369 | |||
| 370 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 371 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 372 | if (unit == PM_LSU0) { | ||
| 373 | /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ | ||
| 374 | mask = 0x5dff00; | ||
| 375 | } else if (unit == PM_LSU1 && byte >= 4) { | ||
| 376 | byte -= 4; | ||
| 377 | /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */ | ||
| 378 | mask = 0x5f00c0aa; | ||
| 379 | } else | ||
| 380 | return 0; | ||
| 381 | |||
| 382 | return (mask >> (byte * 8 + bit)) & 1; | ||
| 383 | } | ||
| 384 | |||
| 385 | static int power5_compute_mmcr(u64 event[], int n_ev, | ||
| 386 | unsigned int hwc[], unsigned long mmcr[]) | ||
| 387 | { | ||
| 388 | unsigned long mmcr1 = 0; | ||
| 389 | unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; | ||
| 390 | unsigned int pmc, unit, byte, psel; | ||
| 391 | unsigned int ttm, grp; | ||
| 392 | int i, isbus, bit, grsel; | ||
| 393 | unsigned int pmc_inuse = 0; | ||
| 394 | unsigned int pmc_grp_use[2]; | ||
| 395 | unsigned char busbyte[4]; | ||
| 396 | unsigned char unituse[16]; | ||
| 397 | int ttmuse; | ||
| 398 | |||
| 399 | if (n_ev > 6) | ||
| 400 | return -1; | ||
| 401 | |||
| 402 | /* First pass to count resource use */ | ||
| 403 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
| 404 | memset(busbyte, 0, sizeof(busbyte)); | ||
| 405 | memset(unituse, 0, sizeof(unituse)); | ||
| 406 | for (i = 0; i < n_ev; ++i) { | ||
| 407 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 408 | if (pmc) { | ||
| 409 | if (pmc > 6) | ||
| 410 | return -1; | ||
| 411 | if (pmc_inuse & (1 << (pmc - 1))) | ||
| 412 | return -1; | ||
| 413 | pmc_inuse |= 1 << (pmc - 1); | ||
| 414 | /* count 1/2 vs 3/4 use */ | ||
| 415 | if (pmc <= 4) | ||
| 416 | ++pmc_grp_use[(pmc - 1) >> 1]; | ||
| 417 | } | ||
| 418 | if (event[i] & PM_BUSEVENT_MSK) { | ||
| 419 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 420 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 421 | if (unit > PM_LASTUNIT) | ||
| 422 | return -1; | ||
| 423 | if (unit == PM_ISU0_ALT) | ||
| 424 | unit = PM_ISU0; | ||
| 425 | if (byte >= 4) { | ||
| 426 | if (unit != PM_LSU1) | ||
| 427 | return -1; | ||
| 428 | ++unit; | ||
| 429 | byte &= 3; | ||
| 430 | } | ||
| 431 | if (!pmc) | ||
| 432 | ++pmc_grp_use[byte & 1]; | ||
| 433 | if (busbyte[byte] && busbyte[byte] != unit) | ||
| 434 | return -1; | ||
| 435 | busbyte[byte] = unit; | ||
| 436 | unituse[unit] = 1; | ||
| 437 | } | ||
| 438 | } | ||
| 439 | if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2) | ||
| 440 | return -1; | ||
| 441 | |||
| 442 | /* | ||
| 443 | * Assign resources and set multiplexer selects. | ||
| 444 | * | ||
| 445 | * PM_ISU0 can go either on TTM0 or TTM1, but that's the only | ||
| 446 | * choice we have to deal with. | ||
| 447 | */ | ||
| 448 | if (unituse[PM_ISU0] & | ||
| 449 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { | ||
| 450 | unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ | ||
| 451 | unituse[PM_ISU0] = 0; | ||
| 452 | } | ||
| 453 | /* Set TTM[01]SEL fields. */ | ||
| 454 | ttmuse = 0; | ||
| 455 | for (i = PM_FPU; i <= PM_ISU1; ++i) { | ||
| 456 | if (!unituse[i]) | ||
| 457 | continue; | ||
| 458 | if (ttmuse++) | ||
| 459 | return -1; | ||
| 460 | mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH; | ||
| 461 | } | ||
| 462 | ttmuse = 0; | ||
| 463 | for (; i <= PM_GRS; ++i) { | ||
| 464 | if (!unituse[i]) | ||
| 465 | continue; | ||
| 466 | if (ttmuse++) | ||
| 467 | return -1; | ||
| 468 | mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH; | ||
| 469 | } | ||
| 470 | if (ttmuse > 1) | ||
| 471 | return -1; | ||
| 472 | |||
| 473 | /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ | ||
| 474 | for (byte = 0; byte < 4; ++byte) { | ||
| 475 | unit = busbyte[byte]; | ||
| 476 | if (!unit) | ||
| 477 | continue; | ||
| 478 | if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { | ||
| 479 | /* get ISU0 through TTM1 rather than TTM0 */ | ||
| 480 | unit = PM_ISU0_ALT; | ||
| 481 | } else if (unit == PM_LSU1 + 1) { | ||
| 482 | /* select lower word of LSU1 for this byte */ | ||
| 483 | mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
| 484 | } | ||
| 485 | ttm = unit >> 2; | ||
| 486 | mmcr1 |= (unsigned long)ttm | ||
| 487 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
| 488 | } | ||
| 489 | |||
| 490 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
| 491 | for (i = 0; i < n_ev; ++i) { | ||
| 492 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 493 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 494 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 495 | psel = event[i] & PM_PMCSEL_MSK; | ||
| 496 | isbus = event[i] & PM_BUSEVENT_MSK; | ||
| 497 | if (!pmc) { | ||
| 498 | /* Bus event or any-PMC direct event */ | ||
| 499 | for (pmc = 0; pmc < 4; ++pmc) { | ||
| 500 | if (pmc_inuse & (1 << pmc)) | ||
| 501 | continue; | ||
| 502 | grp = (pmc >> 1) & 1; | ||
| 503 | if (isbus) { | ||
| 504 | if (grp == (byte & 1)) | ||
| 505 | break; | ||
| 506 | } else if (pmc_grp_use[grp] < 2) { | ||
| 507 | ++pmc_grp_use[grp]; | ||
| 508 | break; | ||
| 509 | } | ||
| 510 | } | ||
| 511 | pmc_inuse |= 1 << pmc; | ||
| 512 | } else if (pmc <= 4) { | ||
| 513 | /* Direct event */ | ||
| 514 | --pmc; | ||
| 515 | if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) | ||
| 516 | /* add events on higher-numbered bus */ | ||
| 517 | mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc); | ||
| 518 | } else { | ||
| 519 | /* Instructions or run cycles on PMC5/6 */ | ||
| 520 | --pmc; | ||
| 521 | } | ||
| 522 | if (isbus && unit == PM_GRS) { | ||
| 523 | bit = psel & 7; | ||
| 524 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; | ||
| 525 | mmcr1 |= (unsigned long)grsel << grsel_shift[bit]; | ||
| 526 | } | ||
| 527 | if (power5_marked_instr_event(event[i])) | ||
| 528 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
| 529 | if (pmc <= 3) | ||
| 530 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
| 531 | hwc[i] = pmc; | ||
| 532 | } | ||
| 533 | |||
| 534 | /* Return MMCRx values */ | ||
| 535 | mmcr[0] = 0; | ||
| 536 | if (pmc_inuse & 1) | ||
| 537 | mmcr[0] = MMCR0_PMC1CE; | ||
| 538 | if (pmc_inuse & 0x3e) | ||
| 539 | mmcr[0] |= MMCR0_PMCjCE; | ||
| 540 | mmcr[1] = mmcr1; | ||
| 541 | mmcr[2] = mmcra; | ||
| 542 | return 0; | ||
| 543 | } | ||
| 544 | |||
| 545 | static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
| 546 | { | ||
| 547 | if (pmc <= 3) | ||
| 548 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); | ||
| 549 | } | ||
| 550 | |||
| 551 | static int power5_generic_events[] = { | ||
| 552 | [PERF_COUNT_HW_CPU_CYCLES] = 0xf, | ||
| 553 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, | ||
| 554 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */ | ||
| 555 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ | ||
| 556 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ | ||
| 557 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ | ||
| 558 | }; | ||
| 559 | |||
| 560 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
| 561 | |||
| 562 | /* | ||
| 563 | * Table of generalized cache-related events. | ||
| 564 | * 0 means not supported, -1 means nonsensical, other values | ||
| 565 | * are event codes. | ||
| 566 | */ | ||
| 567 | static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
| 568 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 569 | [C(OP_READ)] = { 0x4c1090, 0x3c1088 }, | ||
| 570 | [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 }, | ||
| 571 | [C(OP_PREFETCH)] = { 0xc70e7, 0 }, | ||
| 572 | }, | ||
| 573 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 574 | [C(OP_READ)] = { 0, 0 }, | ||
| 575 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 576 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
| 577 | }, | ||
| 578 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 579 | [C(OP_READ)] = { 0, 0x3c309b }, | ||
| 580 | [C(OP_WRITE)] = { 0, 0 }, | ||
| 581 | [C(OP_PREFETCH)] = { 0xc50c3, 0 }, | ||
| 582 | }, | ||
| 583 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 584 | [C(OP_READ)] = { 0x2c4090, 0x800c4 }, | ||
| 585 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 586 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 587 | }, | ||
| 588 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 589 | [C(OP_READ)] = { 0, 0x800c0 }, | ||
| 590 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 591 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 592 | }, | ||
| 593 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 594 | [C(OP_READ)] = { 0x230e4, 0x230e5 }, | ||
| 595 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 596 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 597 | }, | ||
| 598 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 599 | [C(OP_READ)] = { -1, -1 }, | ||
| 600 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 601 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 602 | }, | ||
| 603 | }; | ||
| 604 | |||
| 605 | static struct power_pmu power5_pmu = { | ||
| 606 | .name = "POWER5", | ||
| 607 | .n_counter = 6, | ||
| 608 | .max_alternatives = MAX_ALT, | ||
| 609 | .add_fields = 0x7000090000555ul, | ||
| 610 | .test_adder = 0x3000490000000ul, | ||
| 611 | .compute_mmcr = power5_compute_mmcr, | ||
| 612 | .get_constraint = power5_get_constraint, | ||
| 613 | .get_alternatives = power5_get_alternatives, | ||
| 614 | .disable_pmc = power5_disable_pmc, | ||
| 615 | .n_generic = ARRAY_SIZE(power5_generic_events), | ||
| 616 | .generic_events = power5_generic_events, | ||
| 617 | .cache_events = &power5_cache_events, | ||
| 618 | }; | ||
| 619 | |||
| 620 | static int __init init_power5_pmu(void) | ||
| 621 | { | ||
| 622 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
| 623 | strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5")) | ||
| 624 | return -ENODEV; | ||
| 625 | |||
| 626 | return register_power_pmu(&power5_pmu); | ||
| 627 | } | ||
| 628 | |||
| 629 | early_initcall(init_power5_pmu); | ||
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c new file mode 100644 index 00000000000..03b95e2c6d6 --- /dev/null +++ b/arch/powerpc/kernel/power6-pmu.c | |||
| @@ -0,0 +1,552 @@ | |||
| 1 | /* | ||
| 2 | * Performance counter support for POWER6 processors. | ||
| 3 | * | ||
| 4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public License | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the License, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | #include <linux/kernel.h> | ||
| 12 | #include <linux/perf_event.h> | ||
| 13 | #include <linux/string.h> | ||
| 14 | #include <asm/reg.h> | ||
| 15 | #include <asm/cputable.h> | ||
| 16 | |||
| 17 | /* | ||
| 18 | * Bits in event code for POWER6 | ||
| 19 | */ | ||
| 20 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
| 21 | #define PM_PMC_MSK 0x7 | ||
| 22 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
| 23 | #define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */ | ||
| 24 | #define PM_UNIT_MSK 0xf | ||
| 25 | #define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH) | ||
| 26 | #define PM_LLAV 0x8000 /* Load lookahead match value */ | ||
| 27 | #define PM_LLA 0x4000 /* Load lookahead match enable */ | ||
| 28 | #define PM_BYTE_SH 12 /* Byte of event bus to use */ | ||
| 29 | #define PM_BYTE_MSK 3 | ||
| 30 | #define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */ | ||
| 31 | #define PM_SUBUNIT_MSK 7 | ||
| 32 | #define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH) | ||
| 33 | #define PM_PMCSEL_MSK 0xff /* PMCxSEL value */ | ||
| 34 | #define PM_BUSEVENT_MSK 0xf3700 | ||
| 35 | |||
| 36 | /* | ||
| 37 | * Bits in MMCR1 for POWER6 | ||
| 38 | */ | ||
| 39 | #define MMCR1_TTM0SEL_SH 60 | ||
| 40 | #define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4) | ||
| 41 | #define MMCR1_TTMSEL_MSK 0xf | ||
| 42 | #define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK) | ||
| 43 | #define MMCR1_NESTSEL_SH 45 | ||
| 44 | #define MMCR1_NESTSEL_MSK 0x7 | ||
| 45 | #define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK) | ||
| 46 | #define MMCR1_PMC1_LLA (1ul << 44) | ||
| 47 | #define MMCR1_PMC1_LLA_VALUE (1ul << 39) | ||
| 48 | #define MMCR1_PMC1_ADDR_SEL (1ul << 35) | ||
| 49 | #define MMCR1_PMC1SEL_SH 24 | ||
| 50 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
| 51 | #define MMCR1_PMCSEL_MSK 0xff | ||
| 52 | |||
| 53 | /* | ||
| 54 | * Map of which direct events on which PMCs are marked instruction events. | ||
| 55 | * Indexed by PMCSEL value >> 1. | ||
| 56 | * Bottom 4 bits are a map of which PMCs are interesting, | ||
| 57 | * top 4 bits say what sort of event: | ||
| 58 | * 0 = direct marked event, | ||
| 59 | * 1 = byte decode event, | ||
| 60 | * 4 = add/and event (PMC1 -> bits 0 & 4), | ||
| 61 | * 5 = add/and event (PMC1 -> bits 1 & 5), | ||
| 62 | * 6 = add/and event (PMC1 -> bits 2 & 6), | ||
| 63 | * 7 = add/and event (PMC1 -> bits 3 & 7). | ||
| 64 | */ | ||
| 65 | static unsigned char direct_event_is_marked[0x60 >> 1] = { | ||
| 66 | 0, /* 00 */ | ||
| 67 | 0, /* 02 */ | ||
| 68 | 0, /* 04 */ | ||
| 69 | 0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
| 70 | 0x04, /* 08 PM_MRK_DFU_FIN */ | ||
| 71 | 0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */ | ||
| 72 | 0, /* 0c */ | ||
| 73 | 0, /* 0e */ | ||
| 74 | 0x02, /* 10 PM_MRK_INST_DISP */ | ||
| 75 | 0x08, /* 12 PM_MRK_LSU_DERAT_MISS */ | ||
| 76 | 0, /* 14 */ | ||
| 77 | 0, /* 16 */ | ||
| 78 | 0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */ | ||
| 79 | 0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
| 80 | 0x01, /* 1c PM_MRK_INST_ISSUED */ | ||
| 81 | 0, /* 1e */ | ||
| 82 | 0, /* 20 */ | ||
| 83 | 0, /* 22 */ | ||
| 84 | 0, /* 24 */ | ||
| 85 | 0, /* 26 */ | ||
| 86 | 0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */ | ||
| 87 | 0, /* 2a */ | ||
| 88 | 0, /* 2c */ | ||
| 89 | 0, /* 2e */ | ||
| 90 | 0x4f, /* 30 */ | ||
| 91 | 0x7f, /* 32 */ | ||
| 92 | 0x4f, /* 34 */ | ||
| 93 | 0x5f, /* 36 */ | ||
| 94 | 0x6f, /* 38 */ | ||
| 95 | 0x4f, /* 3a */ | ||
| 96 | 0, /* 3c */ | ||
| 97 | 0x08, /* 3e PM_MRK_INST_TIMEO */ | ||
| 98 | 0x1f, /* 40 */ | ||
| 99 | 0x1f, /* 42 */ | ||
| 100 | 0x1f, /* 44 */ | ||
| 101 | 0x1f, /* 46 */ | ||
| 102 | 0x1f, /* 48 */ | ||
| 103 | 0x1f, /* 4a */ | ||
| 104 | 0x1f, /* 4c */ | ||
| 105 | 0x1f, /* 4e */ | ||
| 106 | 0, /* 50 */ | ||
| 107 | 0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */ | ||
| 108 | 0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */ | ||
| 109 | 0x02, /* 56 PM_MRK_LD_MISS_L1 */ | ||
| 110 | 0, /* 58 */ | ||
| 111 | 0, /* 5a */ | ||
| 112 | 0, /* 5c */ | ||
| 113 | 0, /* 5e */ | ||
| 114 | }; | ||
| 115 | |||
| 116 | /* | ||
| 117 | * Masks showing for each unit which bits are marked events. | ||
| 118 | * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0. | ||
| 119 | */ | ||
| 120 | static u32 marked_bus_events[16] = { | ||
| 121 | 0x01000000, /* direct events set 1: byte 3 bit 0 */ | ||
| 122 | 0x00010000, /* direct events set 2: byte 2 bit 0 */ | ||
| 123 | 0, 0, 0, 0, /* IDU, IFU, nest: nothing */ | ||
| 124 | 0x00000088, /* VMX set 1: byte 0 bits 3, 7 */ | ||
| 125 | 0x000000c0, /* VMX set 2: byte 0 bits 4-7 */ | ||
| 126 | 0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */ | ||
| 127 | 0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */ | ||
| 128 | 0, /* LSU set 3 */ | ||
| 129 | 0x00000010, /* VMX set 3: byte 0 bit 4 */ | ||
| 130 | 0, /* BFP set 1 */ | ||
| 131 | 0x00000022, /* BFP set 2: byte 0 bits 1, 5 */ | ||
| 132 | 0, 0 | ||
| 133 | }; | ||
| 134 | |||
| 135 | /* | ||
| 136 | * Returns 1 if event counts things relating to marked instructions | ||
| 137 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
| 138 | */ | ||
| 139 | static int power6_marked_instr_event(u64 event) | ||
| 140 | { | ||
| 141 | int pmc, psel, ptype; | ||
| 142 | int bit, byte, unit; | ||
| 143 | u32 mask; | ||
| 144 | |||
| 145 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 146 | psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */ | ||
| 147 | if (pmc >= 5) | ||
| 148 | return 0; | ||
| 149 | |||
| 150 | bit = -1; | ||
| 151 | if (psel < sizeof(direct_event_is_marked)) { | ||
| 152 | ptype = direct_event_is_marked[psel]; | ||
| 153 | if (pmc == 0 || !(ptype & (1 << (pmc - 1)))) | ||
| 154 | return 0; | ||
| 155 | ptype >>= 4; | ||
| 156 | if (ptype == 0) | ||
| 157 | return 1; | ||
| 158 | if (ptype == 1) | ||
| 159 | bit = 0; | ||
| 160 | else | ||
| 161 | bit = ptype ^ (pmc - 1); | ||
| 162 | } else if ((psel & 0x48) == 0x40) | ||
| 163 | bit = psel & 7; | ||
| 164 | |||
| 165 | if (!(event & PM_BUSEVENT_MSK) || bit == -1) | ||
| 166 | return 0; | ||
| 167 | |||
| 168 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 169 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 170 | mask = marked_bus_events[unit]; | ||
| 171 | return (mask >> (byte * 8 + bit)) & 1; | ||
| 172 | } | ||
| 173 | |||
| 174 | /* | ||
| 175 | * Assign PMC numbers and compute MMCR1 value for a set of events | ||
| 176 | */ | ||
| 177 | static int p6_compute_mmcr(u64 event[], int n_ev, | ||
| 178 | unsigned int hwc[], unsigned long mmcr[]) | ||
| 179 | { | ||
| 180 | unsigned long mmcr1 = 0; | ||
| 181 | unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; | ||
| 182 | int i; | ||
| 183 | unsigned int pmc, ev, b, u, s, psel; | ||
| 184 | unsigned int ttmset = 0; | ||
| 185 | unsigned int pmc_inuse = 0; | ||
| 186 | |||
| 187 | if (n_ev > 6) | ||
| 188 | return -1; | ||
| 189 | for (i = 0; i < n_ev; ++i) { | ||
| 190 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 191 | if (pmc) { | ||
| 192 | if (pmc_inuse & (1 << (pmc - 1))) | ||
| 193 | return -1; /* collision! */ | ||
| 194 | pmc_inuse |= 1 << (pmc - 1); | ||
| 195 | } | ||
| 196 | } | ||
| 197 | for (i = 0; i < n_ev; ++i) { | ||
| 198 | ev = event[i]; | ||
| 199 | pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 200 | if (pmc) { | ||
| 201 | --pmc; | ||
| 202 | } else { | ||
| 203 | /* can go on any PMC; find a free one */ | ||
| 204 | for (pmc = 0; pmc < 4; ++pmc) | ||
| 205 | if (!(pmc_inuse & (1 << pmc))) | ||
| 206 | break; | ||
| 207 | if (pmc >= 4) | ||
| 208 | return -1; | ||
| 209 | pmc_inuse |= 1 << pmc; | ||
| 210 | } | ||
| 211 | hwc[i] = pmc; | ||
| 212 | psel = ev & PM_PMCSEL_MSK; | ||
| 213 | if (ev & PM_BUSEVENT_MSK) { | ||
| 214 | /* this event uses the event bus */ | ||
| 215 | b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 216 | u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 217 | /* check for conflict on this byte of event bus */ | ||
| 218 | if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u) | ||
| 219 | return -1; | ||
| 220 | mmcr1 |= (unsigned long)u << MMCR1_TTMSEL_SH(b); | ||
| 221 | ttmset |= 1 << b; | ||
| 222 | if (u == 5) { | ||
| 223 | /* Nest events have a further mux */ | ||
| 224 | s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; | ||
| 225 | if ((ttmset & 0x10) && | ||
| 226 | MMCR1_NESTSEL(mmcr1) != s) | ||
| 227 | return -1; | ||
| 228 | ttmset |= 0x10; | ||
| 229 | mmcr1 |= (unsigned long)s << MMCR1_NESTSEL_SH; | ||
| 230 | } | ||
| 231 | if (0x30 <= psel && psel <= 0x3d) { | ||
| 232 | /* these need the PMCx_ADDR_SEL bits */ | ||
| 233 | if (b >= 2) | ||
| 234 | mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc; | ||
| 235 | } | ||
| 236 | /* bus select values are different for PMC3/4 */ | ||
| 237 | if (pmc >= 2 && (psel & 0x90) == 0x80) | ||
| 238 | psel ^= 0x20; | ||
| 239 | } | ||
| 240 | if (ev & PM_LLA) { | ||
| 241 | mmcr1 |= MMCR1_PMC1_LLA >> pmc; | ||
| 242 | if (ev & PM_LLAV) | ||
| 243 | mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc; | ||
| 244 | } | ||
| 245 | if (power6_marked_instr_event(event[i])) | ||
| 246 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
| 247 | if (pmc < 4) | ||
| 248 | mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc); | ||
| 249 | } | ||
| 250 | mmcr[0] = 0; | ||
| 251 | if (pmc_inuse & 1) | ||
| 252 | mmcr[0] = MMCR0_PMC1CE; | ||
| 253 | if (pmc_inuse & 0xe) | ||
| 254 | mmcr[0] |= MMCR0_PMCjCE; | ||
| 255 | mmcr[1] = mmcr1; | ||
| 256 | mmcr[2] = mmcra; | ||
| 257 | return 0; | ||
| 258 | } | ||
| 259 | |||
| 260 | /* | ||
| 261 | * Layout of constraint bits: | ||
| 262 | * | ||
| 263 | * 0-1 add field: number of uses of PMC1 (max 1) | ||
| 264 | * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6 | ||
| 265 | * 12-15 add field: number of uses of PMC1-4 (max 4) | ||
| 266 | * 16-19 select field: unit on byte 0 of event bus | ||
| 267 | * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 | ||
| 268 | * 32-34 select field: nest (subunit) event selector | ||
| 269 | */ | ||
| 270 | static int p6_get_constraint(u64 event, unsigned long *maskp, | ||
| 271 | unsigned long *valp) | ||
| 272 | { | ||
| 273 | int pmc, byte, sh, subunit; | ||
| 274 | unsigned long mask = 0, value = 0; | ||
| 275 | |||
| 276 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 277 | if (pmc) { | ||
| 278 | if (pmc > 4 && !(event == 0x500009 || event == 0x600005)) | ||
| 279 | return -1; | ||
| 280 | sh = (pmc - 1) * 2; | ||
| 281 | mask |= 2 << sh; | ||
| 282 | value |= 1 << sh; | ||
| 283 | } | ||
| 284 | if (event & PM_BUSEVENT_MSK) { | ||
| 285 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 286 | sh = byte * 4 + (16 - PM_UNIT_SH); | ||
| 287 | mask |= PM_UNIT_MSKS << sh; | ||
| 288 | value |= (unsigned long)(event & PM_UNIT_MSKS) << sh; | ||
| 289 | if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { | ||
| 290 | subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; | ||
| 291 | mask |= (unsigned long)PM_SUBUNIT_MSK << 32; | ||
| 292 | value |= (unsigned long)subunit << 32; | ||
| 293 | } | ||
| 294 | } | ||
| 295 | if (pmc <= 4) { | ||
| 296 | mask |= 0x8000; /* add field for count of PMC1-4 uses */ | ||
| 297 | value |= 0x1000; | ||
| 298 | } | ||
| 299 | *maskp = mask; | ||
| 300 | *valp = value; | ||
| 301 | return 0; | ||
| 302 | } | ||
| 303 | |||
| 304 | static int p6_limited_pmc_event(u64 event) | ||
| 305 | { | ||
| 306 | int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 307 | |||
| 308 | return pmc == 5 || pmc == 6; | ||
| 309 | } | ||
| 310 | |||
| 311 | #define MAX_ALT 4 /* at most 4 alternatives for any event */ | ||
| 312 | |||
| 313 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
| 314 | { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */ | ||
| 315 | { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */ | ||
| 316 | { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */ | ||
| 317 | { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */ | ||
| 318 | { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */ | ||
| 319 | { 0x10000e, 0x400010 }, /* PM_PURR */ | ||
| 320 | { 0x100010, 0x4000f8 }, /* PM_FLUSH */ | ||
| 321 | { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */ | ||
| 322 | { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */ | ||
| 323 | { 0x100054, 0x2000f0 }, /* PM_ST_FIN */ | ||
| 324 | { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */ | ||
| 325 | { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */ | ||
| 326 | { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */ | ||
| 327 | { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */ | ||
| 328 | { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */ | ||
| 329 | { 0x200012, 0x300012 }, /* PM_INST_DISP */ | ||
| 330 | { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */ | ||
| 331 | { 0x2000f8, 0x300010 }, /* PM_EXT_INT */ | ||
| 332 | { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */ | ||
| 333 | { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */ | ||
| 334 | { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */ | ||
| 335 | { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */ | ||
| 336 | { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */ | ||
| 337 | }; | ||
| 338 | |||
| 339 | /* | ||
| 340 | * This could be made more efficient with a binary search on | ||
| 341 | * a presorted list, if necessary | ||
| 342 | */ | ||
| 343 | static int find_alternatives_list(u64 event) | ||
| 344 | { | ||
| 345 | int i, j; | ||
| 346 | unsigned int alt; | ||
| 347 | |||
| 348 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
| 349 | if (event < event_alternatives[i][0]) | ||
| 350 | return -1; | ||
| 351 | for (j = 0; j < MAX_ALT; ++j) { | ||
| 352 | alt = event_alternatives[i][j]; | ||
| 353 | if (!alt || event < alt) | ||
| 354 | break; | ||
| 355 | if (event == alt) | ||
| 356 | return i; | ||
| 357 | } | ||
| 358 | } | ||
| 359 | return -1; | ||
| 360 | } | ||
| 361 | |||
| 362 | static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
| 363 | { | ||
| 364 | int i, j, nlim; | ||
| 365 | unsigned int psel, pmc; | ||
| 366 | unsigned int nalt = 1; | ||
| 367 | u64 aevent; | ||
| 368 | |||
| 369 | alt[0] = event; | ||
| 370 | nlim = p6_limited_pmc_event(event); | ||
| 371 | |||
| 372 | /* check the alternatives table */ | ||
| 373 | i = find_alternatives_list(event); | ||
| 374 | if (i >= 0) { | ||
| 375 | /* copy out alternatives from list */ | ||
| 376 | for (j = 0; j < MAX_ALT; ++j) { | ||
| 377 | aevent = event_alternatives[i][j]; | ||
| 378 | if (!aevent) | ||
| 379 | break; | ||
| 380 | if (aevent != event) | ||
| 381 | alt[nalt++] = aevent; | ||
| 382 | nlim += p6_limited_pmc_event(aevent); | ||
| 383 | } | ||
| 384 | |||
| 385 | } else { | ||
| 386 | /* Check for alternative ways of computing sum events */ | ||
| 387 | /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */ | ||
| 388 | psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */ | ||
| 389 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 390 | if (pmc && (psel == 0x32 || psel == 0x34)) | ||
| 391 | alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) | | ||
| 392 | ((5 - pmc) << PM_PMC_SH); | ||
| 393 | |||
| 394 | /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */ | ||
| 395 | if (pmc && (psel == 0x38 || psel == 0x3a)) | ||
| 396 | alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) | | ||
| 397 | ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH); | ||
| 398 | } | ||
| 399 | |||
| 400 | if (flags & PPMU_ONLY_COUNT_RUN) { | ||
| 401 | /* | ||
| 402 | * We're only counting in RUN state, | ||
| 403 | * so PM_CYC is equivalent to PM_RUN_CYC, | ||
| 404 | * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR. | ||
| 405 | * This doesn't include alternatives that don't provide | ||
| 406 | * any extra flexibility in assigning PMCs (e.g. | ||
| 407 | * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC). | ||
| 408 | * Note that even with these additional alternatives | ||
| 409 | * we never end up with more than 4 alternatives for any event. | ||
| 410 | */ | ||
| 411 | j = nalt; | ||
| 412 | for (i = 0; i < nalt; ++i) { | ||
| 413 | switch (alt[i]) { | ||
| 414 | case 0x1e: /* PM_CYC */ | ||
| 415 | alt[j++] = 0x600005; /* PM_RUN_CYC */ | ||
| 416 | ++nlim; | ||
| 417 | break; | ||
| 418 | case 0x10000a: /* PM_RUN_CYC */ | ||
| 419 | alt[j++] = 0x1e; /* PM_CYC */ | ||
| 420 | break; | ||
| 421 | case 2: /* PM_INST_CMPL */ | ||
| 422 | alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ | ||
| 423 | ++nlim; | ||
| 424 | break; | ||
| 425 | case 0x500009: /* PM_RUN_INST_CMPL */ | ||
| 426 | alt[j++] = 2; /* PM_INST_CMPL */ | ||
| 427 | break; | ||
| 428 | case 0x10000e: /* PM_PURR */ | ||
| 429 | alt[j++] = 0x4000f4; /* PM_RUN_PURR */ | ||
| 430 | break; | ||
| 431 | case 0x4000f4: /* PM_RUN_PURR */ | ||
| 432 | alt[j++] = 0x10000e; /* PM_PURR */ | ||
| 433 | break; | ||
| 434 | } | ||
| 435 | } | ||
| 436 | nalt = j; | ||
| 437 | } | ||
| 438 | |||
| 439 | if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { | ||
| 440 | /* remove the limited PMC events */ | ||
| 441 | j = 0; | ||
| 442 | for (i = 0; i < nalt; ++i) { | ||
| 443 | if (!p6_limited_pmc_event(alt[i])) { | ||
| 444 | alt[j] = alt[i]; | ||
| 445 | ++j; | ||
| 446 | } | ||
| 447 | } | ||
| 448 | nalt = j; | ||
| 449 | } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { | ||
| 450 | /* remove all but the limited PMC events */ | ||
| 451 | j = 0; | ||
| 452 | for (i = 0; i < nalt; ++i) { | ||
| 453 | if (p6_limited_pmc_event(alt[i])) { | ||
| 454 | alt[j] = alt[i]; | ||
| 455 | ++j; | ||
| 456 | } | ||
| 457 | } | ||
| 458 | nalt = j; | ||
| 459 | } | ||
| 460 | |||
| 461 | return nalt; | ||
| 462 | } | ||
| 463 | |||
| 464 | static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
| 465 | { | ||
| 466 | /* Set PMCxSEL to 0 to disable PMCx */ | ||
| 467 | if (pmc <= 3) | ||
| 468 | mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); | ||
| 469 | } | ||
| 470 | |||
| 471 | static int power6_generic_events[] = { | ||
| 472 | [PERF_COUNT_HW_CPU_CYCLES] = 0x1e, | ||
| 473 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, | ||
| 474 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */ | ||
| 475 | [PERF_COUNT_HW_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */ | ||
| 476 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */ | ||
| 477 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x400052, /* BR_MPRED */ | ||
| 478 | }; | ||
| 479 | |||
| 480 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
| 481 | |||
| 482 | /* | ||
| 483 | * Table of generalized cache-related events. | ||
| 484 | * 0 means not supported, -1 means nonsensical, other values | ||
| 485 | * are event codes. | ||
| 486 | * The "DTLB" and "ITLB" events relate to the DERAT and IERAT. | ||
| 487 | */ | ||
| 488 | static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
| 489 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 490 | [C(OP_READ)] = { 0x80082, 0x80080 }, | ||
| 491 | [C(OP_WRITE)] = { 0x80086, 0x80088 }, | ||
| 492 | [C(OP_PREFETCH)] = { 0x810a4, 0 }, | ||
| 493 | }, | ||
| 494 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 495 | [C(OP_READ)] = { 0, 0x100056 }, | ||
| 496 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 497 | [C(OP_PREFETCH)] = { 0x4008c, 0 }, | ||
| 498 | }, | ||
| 499 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 500 | [C(OP_READ)] = { 0x150730, 0x250532 }, | ||
| 501 | [C(OP_WRITE)] = { 0x250432, 0x150432 }, | ||
| 502 | [C(OP_PREFETCH)] = { 0x810a6, 0 }, | ||
| 503 | }, | ||
| 504 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 505 | [C(OP_READ)] = { 0, 0x20000e }, | ||
| 506 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 507 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 508 | }, | ||
| 509 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 510 | [C(OP_READ)] = { 0, 0x420ce }, | ||
| 511 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 512 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 513 | }, | ||
| 514 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 515 | [C(OP_READ)] = { 0x430e6, 0x400052 }, | ||
| 516 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 517 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 518 | }, | ||
| 519 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 520 | [C(OP_READ)] = { -1, -1 }, | ||
| 521 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 522 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 523 | }, | ||
| 524 | }; | ||
| 525 | |||
| 526 | static struct power_pmu power6_pmu = { | ||
| 527 | .name = "POWER6", | ||
| 528 | .n_counter = 6, | ||
| 529 | .max_alternatives = MAX_ALT, | ||
| 530 | .add_fields = 0x1555, | ||
| 531 | .test_adder = 0x3000, | ||
| 532 | .compute_mmcr = p6_compute_mmcr, | ||
| 533 | .get_constraint = p6_get_constraint, | ||
| 534 | .get_alternatives = p6_get_alternatives, | ||
| 535 | .disable_pmc = p6_disable_pmc, | ||
| 536 | .limited_pmc_event = p6_limited_pmc_event, | ||
| 537 | .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, | ||
| 538 | .n_generic = ARRAY_SIZE(power6_generic_events), | ||
| 539 | .generic_events = power6_generic_events, | ||
| 540 | .cache_events = &power6_cache_events, | ||
| 541 | }; | ||
| 542 | |||
| 543 | static int __init init_power6_pmu(void) | ||
| 544 | { | ||
| 545 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
| 546 | strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6")) | ||
| 547 | return -ENODEV; | ||
| 548 | |||
| 549 | return register_power_pmu(&power6_pmu); | ||
| 550 | } | ||
| 551 | |||
| 552 | early_initcall(init_power6_pmu); | ||
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c new file mode 100644 index 00000000000..de83d6060dd --- /dev/null +++ b/arch/powerpc/kernel/power7-pmu.c | |||
| @@ -0,0 +1,377 @@ | |||
| 1 | /* | ||
| 2 | * Performance counter support for POWER7 processors. | ||
| 3 | * | ||
| 4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public License | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the License, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | #include <linux/kernel.h> | ||
| 12 | #include <linux/perf_event.h> | ||
| 13 | #include <linux/string.h> | ||
| 14 | #include <asm/reg.h> | ||
| 15 | #include <asm/cputable.h> | ||
| 16 | |||
| 17 | /* | ||
| 18 | * Bits in event code for POWER7 | ||
| 19 | */ | ||
| 20 | #define PM_PMC_SH 16 /* PMC number (1-based) for direct events */ | ||
| 21 | #define PM_PMC_MSK 0xf | ||
| 22 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
| 23 | #define PM_UNIT_SH 12 /* TTMMUX number and setting - unit select */ | ||
| 24 | #define PM_UNIT_MSK 0xf | ||
| 25 | #define PM_COMBINE_SH 11 /* Combined event bit */ | ||
| 26 | #define PM_COMBINE_MSK 1 | ||
| 27 | #define PM_COMBINE_MSKS 0x800 | ||
| 28 | #define PM_L2SEL_SH 8 /* L2 event select */ | ||
| 29 | #define PM_L2SEL_MSK 7 | ||
| 30 | #define PM_PMCSEL_MSK 0xff | ||
| 31 | |||
| 32 | /* | ||
| 33 | * Bits in MMCR1 for POWER7 | ||
| 34 | */ | ||
| 35 | #define MMCR1_TTM0SEL_SH 60 | ||
| 36 | #define MMCR1_TTM1SEL_SH 56 | ||
| 37 | #define MMCR1_TTM2SEL_SH 52 | ||
| 38 | #define MMCR1_TTM3SEL_SH 48 | ||
| 39 | #define MMCR1_TTMSEL_MSK 0xf | ||
| 40 | #define MMCR1_L2SEL_SH 45 | ||
| 41 | #define MMCR1_L2SEL_MSK 7 | ||
| 42 | #define MMCR1_PMC1_COMBINE_SH 35 | ||
| 43 | #define MMCR1_PMC2_COMBINE_SH 34 | ||
| 44 | #define MMCR1_PMC3_COMBINE_SH 33 | ||
| 45 | #define MMCR1_PMC4_COMBINE_SH 32 | ||
| 46 | #define MMCR1_PMC1SEL_SH 24 | ||
| 47 | #define MMCR1_PMC2SEL_SH 16 | ||
| 48 | #define MMCR1_PMC3SEL_SH 8 | ||
| 49 | #define MMCR1_PMC4SEL_SH 0 | ||
| 50 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
| 51 | #define MMCR1_PMCSEL_MSK 0xff | ||
| 52 | |||
| 53 | /* | ||
| 54 | * Layout of constraint bits: | ||
| 55 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
| 56 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
| 57 | * [ ><><><><><><> | ||
| 58 | * NC P6P5P4P3P2P1 | ||
| 59 | * | ||
| 60 | * NC - number of counters | ||
| 61 | * 15: NC error 0x8000 | ||
| 62 | * 12-14: number of events needing PMC1-4 0x7000 | ||
| 63 | * | ||
| 64 | * P6 | ||
| 65 | * 11: P6 error 0x800 | ||
| 66 | * 10-11: Count of events needing PMC6 | ||
| 67 | * | ||
| 68 | * P1..P5 | ||
| 69 | * 0-9: Count of events needing PMC1..PMC5 | ||
| 70 | */ | ||
| 71 | |||
| 72 | static int power7_get_constraint(u64 event, unsigned long *maskp, | ||
| 73 | unsigned long *valp) | ||
| 74 | { | ||
| 75 | int pmc, sh; | ||
| 76 | unsigned long mask = 0, value = 0; | ||
| 77 | |||
| 78 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 79 | if (pmc) { | ||
| 80 | if (pmc > 6) | ||
| 81 | return -1; | ||
| 82 | sh = (pmc - 1) * 2; | ||
| 83 | mask |= 2 << sh; | ||
| 84 | value |= 1 << sh; | ||
| 85 | if (pmc >= 5 && !(event == 0x500fa || event == 0x600f4)) | ||
| 86 | return -1; | ||
| 87 | } | ||
| 88 | if (pmc < 5) { | ||
| 89 | /* need a counter from PMC1-4 set */ | ||
| 90 | mask |= 0x8000; | ||
| 91 | value |= 0x1000; | ||
| 92 | } | ||
| 93 | *maskp = mask; | ||
| 94 | *valp = value; | ||
| 95 | return 0; | ||
| 96 | } | ||
| 97 | |||
| 98 | #define MAX_ALT 2 /* at most 2 alternatives for any event */ | ||
| 99 | |||
| 100 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
| 101 | { 0x200f2, 0x300f2 }, /* PM_INST_DISP */ | ||
| 102 | { 0x200f4, 0x600f4 }, /* PM_RUN_CYC */ | ||
| 103 | { 0x400fa, 0x500fa }, /* PM_RUN_INST_CMPL */ | ||
| 104 | }; | ||
| 105 | |||
| 106 | /* | ||
| 107 | * Scan the alternatives table for a match and return the | ||
| 108 | * index into the alternatives table if found, else -1. | ||
| 109 | */ | ||
| 110 | static int find_alternative(u64 event) | ||
| 111 | { | ||
| 112 | int i, j; | ||
| 113 | |||
| 114 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
| 115 | if (event < event_alternatives[i][0]) | ||
| 116 | break; | ||
| 117 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
| 118 | if (event == event_alternatives[i][j]) | ||
| 119 | return i; | ||
| 120 | } | ||
| 121 | return -1; | ||
| 122 | } | ||
| 123 | |||
| 124 | static s64 find_alternative_decode(u64 event) | ||
| 125 | { | ||
| 126 | int pmc, psel; | ||
| 127 | |||
| 128 | /* this only handles the 4x decode events */ | ||
| 129 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 130 | psel = event & PM_PMCSEL_MSK; | ||
| 131 | if ((pmc == 2 || pmc == 4) && (psel & ~7) == 0x40) | ||
| 132 | return event - (1 << PM_PMC_SH) + 8; | ||
| 133 | if ((pmc == 1 || pmc == 3) && (psel & ~7) == 0x48) | ||
| 134 | return event + (1 << PM_PMC_SH) - 8; | ||
| 135 | return -1; | ||
| 136 | } | ||
| 137 | |||
| 138 | static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
| 139 | { | ||
| 140 | int i, j, nalt = 1; | ||
| 141 | s64 ae; | ||
| 142 | |||
| 143 | alt[0] = event; | ||
| 144 | nalt = 1; | ||
| 145 | i = find_alternative(event); | ||
| 146 | if (i >= 0) { | ||
| 147 | for (j = 0; j < MAX_ALT; ++j) { | ||
| 148 | ae = event_alternatives[i][j]; | ||
| 149 | if (ae && ae != event) | ||
| 150 | alt[nalt++] = ae; | ||
| 151 | } | ||
| 152 | } else { | ||
| 153 | ae = find_alternative_decode(event); | ||
| 154 | if (ae > 0) | ||
| 155 | alt[nalt++] = ae; | ||
| 156 | } | ||
| 157 | |||
| 158 | if (flags & PPMU_ONLY_COUNT_RUN) { | ||
| 159 | /* | ||
| 160 | * We're only counting in RUN state, | ||
| 161 | * so PM_CYC is equivalent to PM_RUN_CYC | ||
| 162 | * and PM_INST_CMPL === PM_RUN_INST_CMPL. | ||
| 163 | * This doesn't include alternatives that don't provide | ||
| 164 | * any extra flexibility in assigning PMCs. | ||
| 165 | */ | ||
| 166 | j = nalt; | ||
| 167 | for (i = 0; i < nalt; ++i) { | ||
| 168 | switch (alt[i]) { | ||
| 169 | case 0x1e: /* PM_CYC */ | ||
| 170 | alt[j++] = 0x600f4; /* PM_RUN_CYC */ | ||
| 171 | break; | ||
| 172 | case 0x600f4: /* PM_RUN_CYC */ | ||
| 173 | alt[j++] = 0x1e; | ||
| 174 | break; | ||
| 175 | case 0x2: /* PM_PPC_CMPL */ | ||
| 176 | alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */ | ||
| 177 | break; | ||
| 178 | case 0x500fa: /* PM_RUN_INST_CMPL */ | ||
| 179 | alt[j++] = 0x2; /* PM_PPC_CMPL */ | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | } | ||
| 183 | nalt = j; | ||
| 184 | } | ||
| 185 | |||
| 186 | return nalt; | ||
| 187 | } | ||
| 188 | |||
| 189 | /* | ||
| 190 | * Returns 1 if event counts things relating to marked instructions | ||
| 191 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
| 192 | */ | ||
| 193 | static int power7_marked_instr_event(u64 event) | ||
| 194 | { | ||
| 195 | int pmc, psel; | ||
| 196 | int unit; | ||
| 197 | |||
| 198 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 199 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 200 | psel = event & PM_PMCSEL_MSK & ~1; /* trim off edge/level bit */ | ||
| 201 | if (pmc >= 5) | ||
| 202 | return 0; | ||
| 203 | |||
| 204 | switch (psel >> 4) { | ||
| 205 | case 2: | ||
| 206 | return pmc == 2 || pmc == 4; | ||
| 207 | case 3: | ||
| 208 | if (psel == 0x3c) | ||
| 209 | return pmc == 1; | ||
| 210 | if (psel == 0x3e) | ||
| 211 | return pmc != 2; | ||
| 212 | return 1; | ||
| 213 | case 4: | ||
| 214 | case 5: | ||
| 215 | return unit == 0xd; | ||
| 216 | case 6: | ||
| 217 | if (psel == 0x64) | ||
| 218 | return pmc >= 3; | ||
| 219 | case 8: | ||
| 220 | return unit == 0xd; | ||
| 221 | } | ||
| 222 | return 0; | ||
| 223 | } | ||
| 224 | |||
| 225 | static int power7_compute_mmcr(u64 event[], int n_ev, | ||
| 226 | unsigned int hwc[], unsigned long mmcr[]) | ||
| 227 | { | ||
| 228 | unsigned long mmcr1 = 0; | ||
| 229 | unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; | ||
| 230 | unsigned int pmc, unit, combine, l2sel, psel; | ||
| 231 | unsigned int pmc_inuse = 0; | ||
| 232 | int i; | ||
| 233 | |||
| 234 | /* First pass to count resource use */ | ||
| 235 | for (i = 0; i < n_ev; ++i) { | ||
| 236 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 237 | if (pmc) { | ||
| 238 | if (pmc > 6) | ||
| 239 | return -1; | ||
| 240 | if (pmc_inuse & (1 << (pmc - 1))) | ||
| 241 | return -1; | ||
| 242 | pmc_inuse |= 1 << (pmc - 1); | ||
| 243 | } | ||
| 244 | } | ||
| 245 | |||
| 246 | /* Second pass: assign PMCs, set all MMCR1 fields */ | ||
| 247 | for (i = 0; i < n_ev; ++i) { | ||
| 248 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 249 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 250 | combine = (event[i] >> PM_COMBINE_SH) & PM_COMBINE_MSK; | ||
| 251 | l2sel = (event[i] >> PM_L2SEL_SH) & PM_L2SEL_MSK; | ||
| 252 | psel = event[i] & PM_PMCSEL_MSK; | ||
| 253 | if (!pmc) { | ||
| 254 | /* Bus event or any-PMC direct event */ | ||
| 255 | for (pmc = 0; pmc < 4; ++pmc) { | ||
| 256 | if (!(pmc_inuse & (1 << pmc))) | ||
| 257 | break; | ||
| 258 | } | ||
| 259 | if (pmc >= 4) | ||
| 260 | return -1; | ||
| 261 | pmc_inuse |= 1 << pmc; | ||
| 262 | } else { | ||
| 263 | /* Direct or decoded event */ | ||
| 264 | --pmc; | ||
| 265 | } | ||
| 266 | if (pmc <= 3) { | ||
| 267 | mmcr1 |= (unsigned long) unit | ||
| 268 | << (MMCR1_TTM0SEL_SH - 4 * pmc); | ||
| 269 | mmcr1 |= (unsigned long) combine | ||
| 270 | << (MMCR1_PMC1_COMBINE_SH - pmc); | ||
| 271 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
| 272 | if (unit == 6) /* L2 events */ | ||
| 273 | mmcr1 |= (unsigned long) l2sel | ||
| 274 | << MMCR1_L2SEL_SH; | ||
| 275 | } | ||
| 276 | if (power7_marked_instr_event(event[i])) | ||
| 277 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
| 278 | hwc[i] = pmc; | ||
| 279 | } | ||
| 280 | |||
| 281 | /* Return MMCRx values */ | ||
| 282 | mmcr[0] = 0; | ||
| 283 | if (pmc_inuse & 1) | ||
| 284 | mmcr[0] = MMCR0_PMC1CE; | ||
| 285 | if (pmc_inuse & 0x3e) | ||
| 286 | mmcr[0] |= MMCR0_PMCjCE; | ||
| 287 | mmcr[1] = mmcr1; | ||
| 288 | mmcr[2] = mmcra; | ||
| 289 | return 0; | ||
| 290 | } | ||
| 291 | |||
| 292 | static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
| 293 | { | ||
| 294 | if (pmc <= 3) | ||
| 295 | mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); | ||
| 296 | } | ||
| 297 | |||
| 298 | static int power7_generic_events[] = { | ||
| 299 | [PERF_COUNT_HW_CPU_CYCLES] = 0x1e, | ||
| 300 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, | ||
| 301 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU*/ | ||
| 302 | [PERF_COUNT_HW_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */ | ||
| 303 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */ | ||
| 304 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */ | ||
| 305 | }; | ||
| 306 | |||
| 307 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
| 308 | |||
| 309 | /* | ||
| 310 | * Table of generalized cache-related events. | ||
| 311 | * 0 means not supported, -1 means nonsensical, other values | ||
| 312 | * are event codes. | ||
| 313 | */ | ||
| 314 | static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
| 315 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 316 | [C(OP_READ)] = { 0xc880, 0x400f0 }, | ||
| 317 | [C(OP_WRITE)] = { 0, 0x300f0 }, | ||
| 318 | [C(OP_PREFETCH)] = { 0xd8b8, 0 }, | ||
| 319 | }, | ||
| 320 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 321 | [C(OP_READ)] = { 0, 0x200fc }, | ||
| 322 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 323 | [C(OP_PREFETCH)] = { 0x408a, 0 }, | ||
| 324 | }, | ||
| 325 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 326 | [C(OP_READ)] = { 0x16080, 0x26080 }, | ||
| 327 | [C(OP_WRITE)] = { 0x16082, 0x26082 }, | ||
| 328 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
| 329 | }, | ||
| 330 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 331 | [C(OP_READ)] = { 0, 0x300fc }, | ||
| 332 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 333 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 334 | }, | ||
| 335 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 336 | [C(OP_READ)] = { 0, 0x400fc }, | ||
| 337 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 338 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 339 | }, | ||
| 340 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 341 | [C(OP_READ)] = { 0x10068, 0x400f6 }, | ||
| 342 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 343 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 344 | }, | ||
| 345 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 346 | [C(OP_READ)] = { -1, -1 }, | ||
| 347 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 348 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 349 | }, | ||
| 350 | }; | ||
| 351 | |||
| 352 | static struct power_pmu power7_pmu = { | ||
| 353 | .name = "POWER7", | ||
| 354 | .n_counter = 6, | ||
| 355 | .max_alternatives = MAX_ALT + 1, | ||
| 356 | .add_fields = 0x1555ul, | ||
| 357 | .test_adder = 0x3000ul, | ||
| 358 | .compute_mmcr = power7_compute_mmcr, | ||
| 359 | .get_constraint = power7_get_constraint, | ||
| 360 | .get_alternatives = power7_get_alternatives, | ||
| 361 | .disable_pmc = power7_disable_pmc, | ||
| 362 | .flags = PPMU_ALT_SIPR, | ||
| 363 | .n_generic = ARRAY_SIZE(power7_generic_events), | ||
| 364 | .generic_events = power7_generic_events, | ||
| 365 | .cache_events = &power7_cache_events, | ||
| 366 | }; | ||
| 367 | |||
| 368 | static int __init init_power7_pmu(void) | ||
| 369 | { | ||
| 370 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
| 371 | strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7")) | ||
| 372 | return -ENODEV; | ||
| 373 | |||
| 374 | return register_power_pmu(&power7_pmu); | ||
| 375 | } | ||
| 376 | |||
| 377 | early_initcall(init_power7_pmu); | ||
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c new file mode 100644 index 00000000000..8c219020696 --- /dev/null +++ b/arch/powerpc/kernel/ppc970-pmu.c | |||
| @@ -0,0 +1,502 @@ | |||
| 1 | /* | ||
| 2 | * Performance counter support for PPC970-family processors. | ||
| 3 | * | ||
| 4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public License | ||
| 8 | * as published by the Free Software Foundation; either version | ||
| 9 | * 2 of the License, or (at your option) any later version. | ||
| 10 | */ | ||
| 11 | #include <linux/string.h> | ||
| 12 | #include <linux/perf_event.h> | ||
| 13 | #include <asm/reg.h> | ||
| 14 | #include <asm/cputable.h> | ||
| 15 | |||
| 16 | /* | ||
| 17 | * Bits in event code for PPC970 | ||
| 18 | */ | ||
| 19 | #define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ | ||
| 20 | #define PM_PMC_MSK 0xf | ||
| 21 | #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ | ||
| 22 | #define PM_UNIT_MSK 0xf | ||
| 23 | #define PM_SPCSEL_SH 6 | ||
| 24 | #define PM_SPCSEL_MSK 3 | ||
| 25 | #define PM_BYTE_SH 4 /* Byte number of event bus to use */ | ||
| 26 | #define PM_BYTE_MSK 3 | ||
| 27 | #define PM_PMCSEL_MSK 0xf | ||
| 28 | |||
| 29 | /* Values in PM_UNIT field */ | ||
| 30 | #define PM_NONE 0 | ||
| 31 | #define PM_FPU 1 | ||
| 32 | #define PM_VPU 2 | ||
| 33 | #define PM_ISU 3 | ||
| 34 | #define PM_IFU 4 | ||
| 35 | #define PM_IDU 5 | ||
| 36 | #define PM_STS 6 | ||
| 37 | #define PM_LSU0 7 | ||
| 38 | #define PM_LSU1U 8 | ||
| 39 | #define PM_LSU1L 9 | ||
| 40 | #define PM_LASTUNIT 9 | ||
| 41 | |||
| 42 | /* | ||
| 43 | * Bits in MMCR0 for PPC970 | ||
| 44 | */ | ||
| 45 | #define MMCR0_PMC1SEL_SH 8 | ||
| 46 | #define MMCR0_PMC2SEL_SH 1 | ||
| 47 | #define MMCR_PMCSEL_MSK 0x1f | ||
| 48 | |||
| 49 | /* | ||
| 50 | * Bits in MMCR1 for PPC970 | ||
| 51 | */ | ||
| 52 | #define MMCR1_TTM0SEL_SH 62 | ||
| 53 | #define MMCR1_TTM1SEL_SH 59 | ||
| 54 | #define MMCR1_TTM3SEL_SH 53 | ||
| 55 | #define MMCR1_TTMSEL_MSK 3 | ||
| 56 | #define MMCR1_TD_CP_DBG0SEL_SH 50 | ||
| 57 | #define MMCR1_TD_CP_DBG1SEL_SH 48 | ||
| 58 | #define MMCR1_TD_CP_DBG2SEL_SH 46 | ||
| 59 | #define MMCR1_TD_CP_DBG3SEL_SH 44 | ||
| 60 | #define MMCR1_PMC1_ADDER_SEL_SH 39 | ||
| 61 | #define MMCR1_PMC2_ADDER_SEL_SH 38 | ||
| 62 | #define MMCR1_PMC6_ADDER_SEL_SH 37 | ||
| 63 | #define MMCR1_PMC5_ADDER_SEL_SH 36 | ||
| 64 | #define MMCR1_PMC8_ADDER_SEL_SH 35 | ||
| 65 | #define MMCR1_PMC7_ADDER_SEL_SH 34 | ||
| 66 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
| 67 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
| 68 | #define MMCR1_PMC3SEL_SH 27 | ||
| 69 | #define MMCR1_PMC4SEL_SH 22 | ||
| 70 | #define MMCR1_PMC5SEL_SH 17 | ||
| 71 | #define MMCR1_PMC6SEL_SH 12 | ||
| 72 | #define MMCR1_PMC7SEL_SH 7 | ||
| 73 | #define MMCR1_PMC8SEL_SH 2 | ||
| 74 | |||
| 75 | static short mmcr1_adder_bits[8] = { | ||
| 76 | MMCR1_PMC1_ADDER_SEL_SH, | ||
| 77 | MMCR1_PMC2_ADDER_SEL_SH, | ||
| 78 | MMCR1_PMC3_ADDER_SEL_SH, | ||
| 79 | MMCR1_PMC4_ADDER_SEL_SH, | ||
| 80 | MMCR1_PMC5_ADDER_SEL_SH, | ||
| 81 | MMCR1_PMC6_ADDER_SEL_SH, | ||
| 82 | MMCR1_PMC7_ADDER_SEL_SH, | ||
| 83 | MMCR1_PMC8_ADDER_SEL_SH | ||
| 84 | }; | ||
| 85 | |||
| 86 | /* | ||
| 87 | * Layout of constraint bits: | ||
| 88 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
| 89 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
| 90 | * <><><>[ >[ >[ >< >< >< >< ><><><><><><><><> | ||
| 91 | * SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 | ||
| 92 | * | ||
| 93 | * SP - SPCSEL constraint | ||
| 94 | * 48-49: SPCSEL value 0x3_0000_0000_0000 | ||
| 95 | * | ||
| 96 | * T0 - TTM0 constraint | ||
| 97 | * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000 | ||
| 98 | * | ||
| 99 | * T1 - TTM1 constraint | ||
| 100 | * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000 | ||
| 101 | * | ||
| 102 | * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS | ||
| 103 | * 43: UC3 error 0x0800_0000_0000 | ||
| 104 | * 42: FPU|IFU|VPU events needed 0x0400_0000_0000 | ||
| 105 | * 41: ISU events needed 0x0200_0000_0000 | ||
| 106 | * 40: IDU|STS events needed 0x0100_0000_0000 | ||
| 107 | * | ||
| 108 | * PS1 | ||
| 109 | * 39: PS1 error 0x0080_0000_0000 | ||
| 110 | * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 | ||
| 111 | * | ||
| 112 | * PS2 | ||
| 113 | * 35: PS2 error 0x0008_0000_0000 | ||
| 114 | * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 | ||
| 115 | * | ||
| 116 | * B0 | ||
| 117 | * 28-31: Byte 0 event source 0xf000_0000 | ||
| 118 | * Encoding as for the event code | ||
| 119 | * | ||
| 120 | * B1, B2, B3 | ||
| 121 | * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources | ||
| 122 | * | ||
| 123 | * P1 | ||
| 124 | * 15: P1 error 0x8000 | ||
| 125 | * 14-15: Count of events needing PMC1 | ||
| 126 | * | ||
| 127 | * P2..P8 | ||
| 128 | * 0-13: Count of events needing PMC2..PMC8 | ||
| 129 | */ | ||
| 130 | |||
| 131 | static unsigned char direct_marked_event[8] = { | ||
| 132 | (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ | ||
| 133 | (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ | ||
| 134 | (1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */ | ||
| 135 | (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ | ||
| 136 | (1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */ | ||
| 137 | (1<<3) | (1<<4) | (1<<5), | ||
| 138 | /* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ | ||
| 139 | (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ | ||
| 140 | (1<<4) /* PMC8: PM_MRK_LSU_FIN */ | ||
| 141 | }; | ||
| 142 | |||
| 143 | /* | ||
| 144 | * Returns 1 if event counts things relating to marked instructions | ||
| 145 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
| 146 | */ | ||
| 147 | static int p970_marked_instr_event(u64 event) | ||
| 148 | { | ||
| 149 | int pmc, psel, unit, byte, bit; | ||
| 150 | unsigned int mask; | ||
| 151 | |||
| 152 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 153 | psel = event & PM_PMCSEL_MSK; | ||
| 154 | if (pmc) { | ||
| 155 | if (direct_marked_event[pmc - 1] & (1 << psel)) | ||
| 156 | return 1; | ||
| 157 | if (psel == 0) /* add events */ | ||
| 158 | bit = (pmc <= 4)? pmc - 1: 8 - pmc; | ||
| 159 | else if (psel == 7 || psel == 13) /* decode events */ | ||
| 160 | bit = 4; | ||
| 161 | else | ||
| 162 | return 0; | ||
| 163 | } else | ||
| 164 | bit = psel; | ||
| 165 | |||
| 166 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 167 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 168 | mask = 0; | ||
| 169 | switch (unit) { | ||
| 170 | case PM_VPU: | ||
| 171 | mask = 0x4c; /* byte 0 bits 2,3,6 */ | ||
| 172 | break; | ||
| 173 | case PM_LSU0: | ||
| 174 | /* byte 2 bits 0,2,3,4,6; all of byte 1 */ | ||
| 175 | mask = 0x085dff00; | ||
| 176 | break; | ||
| 177 | case PM_LSU1L: | ||
| 178 | mask = 0x50 << 24; /* byte 3 bits 4,6 */ | ||
| 179 | break; | ||
| 180 | } | ||
| 181 | return (mask >> (byte * 8 + bit)) & 1; | ||
| 182 | } | ||
| 183 | |||
| 184 | /* Masks and values for using events from the various units */ | ||
| 185 | static unsigned long unit_cons[PM_LASTUNIT+1][2] = { | ||
| 186 | [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, | ||
| 187 | [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull }, | ||
| 188 | [PM_ISU] = { 0x080000000000ull, 0x020000000000ull }, | ||
| 189 | [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull }, | ||
| 190 | [PM_IDU] = { 0x380000000000ull, 0x010000000000ull }, | ||
| 191 | [PM_STS] = { 0x380000000000ull, 0x310000000000ull }, | ||
| 192 | }; | ||
| 193 | |||
| 194 | static int p970_get_constraint(u64 event, unsigned long *maskp, | ||
| 195 | unsigned long *valp) | ||
| 196 | { | ||
| 197 | int pmc, byte, unit, sh, spcsel; | ||
| 198 | unsigned long mask = 0, value = 0; | ||
| 199 | int grp = -1; | ||
| 200 | |||
| 201 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 202 | if (pmc) { | ||
| 203 | if (pmc > 8) | ||
| 204 | return -1; | ||
| 205 | sh = (pmc - 1) * 2; | ||
| 206 | mask |= 2 << sh; | ||
| 207 | value |= 1 << sh; | ||
| 208 | grp = ((pmc - 1) >> 1) & 1; | ||
| 209 | } | ||
| 210 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 211 | if (unit) { | ||
| 212 | if (unit > PM_LASTUNIT) | ||
| 213 | return -1; | ||
| 214 | mask |= unit_cons[unit][0]; | ||
| 215 | value |= unit_cons[unit][1]; | ||
| 216 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 217 | /* | ||
| 218 | * Bus events on bytes 0 and 2 can be counted | ||
| 219 | * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. | ||
| 220 | */ | ||
| 221 | if (!pmc) | ||
| 222 | grp = byte & 1; | ||
| 223 | /* Set byte lane select field */ | ||
| 224 | mask |= 0xfULL << (28 - 4 * byte); | ||
| 225 | value |= (unsigned long)unit << (28 - 4 * byte); | ||
| 226 | } | ||
| 227 | if (grp == 0) { | ||
| 228 | /* increment PMC1/2/5/6 field */ | ||
| 229 | mask |= 0x8000000000ull; | ||
| 230 | value |= 0x1000000000ull; | ||
| 231 | } else if (grp == 1) { | ||
| 232 | /* increment PMC3/4/7/8 field */ | ||
| 233 | mask |= 0x800000000ull; | ||
| 234 | value |= 0x100000000ull; | ||
| 235 | } | ||
| 236 | spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; | ||
| 237 | if (spcsel) { | ||
| 238 | mask |= 3ull << 48; | ||
| 239 | value |= (unsigned long)spcsel << 48; | ||
| 240 | } | ||
| 241 | *maskp = mask; | ||
| 242 | *valp = value; | ||
| 243 | return 0; | ||
| 244 | } | ||
| 245 | |||
| 246 | static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
| 247 | { | ||
| 248 | alt[0] = event; | ||
| 249 | |||
| 250 | /* 2 alternatives for LSU empty */ | ||
| 251 | if (event == 0x2002 || event == 0x3002) { | ||
| 252 | alt[1] = event ^ 0x1000; | ||
| 253 | return 2; | ||
| 254 | } | ||
| 255 | |||
| 256 | return 1; | ||
| 257 | } | ||
| 258 | |||
| 259 | static int p970_compute_mmcr(u64 event[], int n_ev, | ||
| 260 | unsigned int hwc[], unsigned long mmcr[]) | ||
| 261 | { | ||
| 262 | unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; | ||
| 263 | unsigned int pmc, unit, byte, psel; | ||
| 264 | unsigned int ttm, grp; | ||
| 265 | unsigned int pmc_inuse = 0; | ||
| 266 | unsigned int pmc_grp_use[2]; | ||
| 267 | unsigned char busbyte[4]; | ||
| 268 | unsigned char unituse[16]; | ||
| 269 | unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 }; | ||
| 270 | unsigned char ttmuse[2]; | ||
| 271 | unsigned char pmcsel[8]; | ||
| 272 | int i; | ||
| 273 | int spcsel; | ||
| 274 | |||
| 275 | if (n_ev > 8) | ||
| 276 | return -1; | ||
| 277 | |||
| 278 | /* First pass to count resource use */ | ||
| 279 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
| 280 | memset(busbyte, 0, sizeof(busbyte)); | ||
| 281 | memset(unituse, 0, sizeof(unituse)); | ||
| 282 | for (i = 0; i < n_ev; ++i) { | ||
| 283 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 284 | if (pmc) { | ||
| 285 | if (pmc_inuse & (1 << (pmc - 1))) | ||
| 286 | return -1; | ||
| 287 | pmc_inuse |= 1 << (pmc - 1); | ||
| 288 | /* count 1/2/5/6 vs 3/4/7/8 use */ | ||
| 289 | ++pmc_grp_use[((pmc - 1) >> 1) & 1]; | ||
| 290 | } | ||
| 291 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 292 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 293 | if (unit) { | ||
| 294 | if (unit > PM_LASTUNIT) | ||
| 295 | return -1; | ||
| 296 | if (!pmc) | ||
| 297 | ++pmc_grp_use[byte & 1]; | ||
| 298 | if (busbyte[byte] && busbyte[byte] != unit) | ||
| 299 | return -1; | ||
| 300 | busbyte[byte] = unit; | ||
| 301 | unituse[unit] = 1; | ||
| 302 | } | ||
| 303 | } | ||
| 304 | if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) | ||
| 305 | return -1; | ||
| 306 | |||
| 307 | /* | ||
| 308 | * Assign resources and set multiplexer selects. | ||
| 309 | * | ||
| 310 | * PM_ISU can go either on TTM0 or TTM1, but that's the only | ||
| 311 | * choice we have to deal with. | ||
| 312 | */ | ||
| 313 | if (unituse[PM_ISU] & | ||
| 314 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU])) | ||
| 315 | unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */ | ||
| 316 | /* Set TTM[01]SEL fields. */ | ||
| 317 | ttmuse[0] = ttmuse[1] = 0; | ||
| 318 | for (i = PM_FPU; i <= PM_STS; ++i) { | ||
| 319 | if (!unituse[i]) | ||
| 320 | continue; | ||
| 321 | ttm = unitmap[i]; | ||
| 322 | ++ttmuse[(ttm >> 2) & 1]; | ||
| 323 | mmcr1 |= (unsigned long)(ttm & ~4) << MMCR1_TTM1SEL_SH; | ||
| 324 | } | ||
| 325 | /* Check only one unit per TTMx */ | ||
| 326 | if (ttmuse[0] > 1 || ttmuse[1] > 1) | ||
| 327 | return -1; | ||
| 328 | |||
| 329 | /* Set byte lane select fields and TTM3SEL. */ | ||
| 330 | for (byte = 0; byte < 4; ++byte) { | ||
| 331 | unit = busbyte[byte]; | ||
| 332 | if (!unit) | ||
| 333 | continue; | ||
| 334 | if (unit <= PM_STS) | ||
| 335 | ttm = (unitmap[unit] >> 2) & 1; | ||
| 336 | else if (unit == PM_LSU0) | ||
| 337 | ttm = 2; | ||
| 338 | else { | ||
| 339 | ttm = 3; | ||
| 340 | if (unit == PM_LSU1L && byte >= 2) | ||
| 341 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
| 342 | } | ||
| 343 | mmcr1 |= (unsigned long)ttm | ||
| 344 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
| 345 | } | ||
| 346 | |||
| 347 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
| 348 | memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */ | ||
| 349 | for (i = 0; i < n_ev; ++i) { | ||
| 350 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
| 351 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
| 352 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
| 353 | psel = event[i] & PM_PMCSEL_MSK; | ||
| 354 | if (!pmc) { | ||
| 355 | /* Bus event or any-PMC direct event */ | ||
| 356 | if (unit) | ||
| 357 | psel |= 0x10 | ((byte & 2) << 2); | ||
| 358 | else | ||
| 359 | psel |= 8; | ||
| 360 | for (pmc = 0; pmc < 8; ++pmc) { | ||
| 361 | if (pmc_inuse & (1 << pmc)) | ||
| 362 | continue; | ||
| 363 | grp = (pmc >> 1) & 1; | ||
| 364 | if (unit) { | ||
| 365 | if (grp == (byte & 1)) | ||
| 366 | break; | ||
| 367 | } else if (pmc_grp_use[grp] < 4) { | ||
| 368 | ++pmc_grp_use[grp]; | ||
| 369 | break; | ||
| 370 | } | ||
| 371 | } | ||
| 372 | pmc_inuse |= 1 << pmc; | ||
| 373 | } else { | ||
| 374 | /* Direct event */ | ||
| 375 | --pmc; | ||
| 376 | if (psel == 0 && (byte & 2)) | ||
| 377 | /* add events on higher-numbered bus */ | ||
| 378 | mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; | ||
| 379 | } | ||
| 380 | pmcsel[pmc] = psel; | ||
| 381 | hwc[i] = pmc; | ||
| 382 | spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; | ||
| 383 | mmcr1 |= spcsel; | ||
| 384 | if (p970_marked_instr_event(event[i])) | ||
| 385 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
| 386 | } | ||
| 387 | for (pmc = 0; pmc < 2; ++pmc) | ||
| 388 | mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); | ||
| 389 | for (; pmc < 8; ++pmc) | ||
| 390 | mmcr1 |= (unsigned long)pmcsel[pmc] | ||
| 391 | << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); | ||
| 392 | if (pmc_inuse & 1) | ||
| 393 | mmcr0 |= MMCR0_PMC1CE; | ||
| 394 | if (pmc_inuse & 0xfe) | ||
| 395 | mmcr0 |= MMCR0_PMCjCE; | ||
| 396 | |||
| 397 | mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ | ||
| 398 | |||
| 399 | /* Return MMCRx values */ | ||
| 400 | mmcr[0] = mmcr0; | ||
| 401 | mmcr[1] = mmcr1; | ||
| 402 | mmcr[2] = mmcra; | ||
| 403 | return 0; | ||
| 404 | } | ||
| 405 | |||
| 406 | static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
| 407 | { | ||
| 408 | int shift, i; | ||
| 409 | |||
| 410 | if (pmc <= 1) { | ||
| 411 | shift = MMCR0_PMC1SEL_SH - 7 * pmc; | ||
| 412 | i = 0; | ||
| 413 | } else { | ||
| 414 | shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2); | ||
| 415 | i = 1; | ||
| 416 | } | ||
| 417 | /* | ||
| 418 | * Setting the PMCxSEL field to 0x08 disables PMC x. | ||
| 419 | */ | ||
| 420 | mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift); | ||
| 421 | } | ||
| 422 | |||
| 423 | static int ppc970_generic_events[] = { | ||
| 424 | [PERF_COUNT_HW_CPU_CYCLES] = 7, | ||
| 425 | [PERF_COUNT_HW_INSTRUCTIONS] = 1, | ||
| 426 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */ | ||
| 427 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */ | ||
| 428 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */ | ||
| 429 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */ | ||
| 430 | }; | ||
| 431 | |||
| 432 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
| 433 | |||
| 434 | /* | ||
| 435 | * Table of generalized cache-related events. | ||
| 436 | * 0 means not supported, -1 means nonsensical, other values | ||
| 437 | * are event codes. | ||
| 438 | */ | ||
| 439 | static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
| 440 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 441 | [C(OP_READ)] = { 0x8810, 0x3810 }, | ||
| 442 | [C(OP_WRITE)] = { 0x7810, 0x813 }, | ||
| 443 | [C(OP_PREFETCH)] = { 0x731, 0 }, | ||
| 444 | }, | ||
| 445 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 446 | [C(OP_READ)] = { 0, 0 }, | ||
| 447 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 448 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
| 449 | }, | ||
| 450 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 451 | [C(OP_READ)] = { 0, 0 }, | ||
| 452 | [C(OP_WRITE)] = { 0, 0 }, | ||
| 453 | [C(OP_PREFETCH)] = { 0x733, 0 }, | ||
| 454 | }, | ||
| 455 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 456 | [C(OP_READ)] = { 0, 0x704 }, | ||
| 457 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 458 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 459 | }, | ||
| 460 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 461 | [C(OP_READ)] = { 0, 0x700 }, | ||
| 462 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 463 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 464 | }, | ||
| 465 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 466 | [C(OP_READ)] = { 0x431, 0x327 }, | ||
| 467 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 468 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 469 | }, | ||
| 470 | [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
| 471 | [C(OP_READ)] = { -1, -1 }, | ||
| 472 | [C(OP_WRITE)] = { -1, -1 }, | ||
| 473 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
| 474 | }, | ||
| 475 | }; | ||
| 476 | |||
| 477 | static struct power_pmu ppc970_pmu = { | ||
| 478 | .name = "PPC970/FX/MP", | ||
| 479 | .n_counter = 8, | ||
| 480 | .max_alternatives = 2, | ||
| 481 | .add_fields = 0x001100005555ull, | ||
| 482 | .test_adder = 0x013300000000ull, | ||
| 483 | .compute_mmcr = p970_compute_mmcr, | ||
| 484 | .get_constraint = p970_get_constraint, | ||
| 485 | .get_alternatives = p970_get_alternatives, | ||
| 486 | .disable_pmc = p970_disable_pmc, | ||
| 487 | .n_generic = ARRAY_SIZE(ppc970_generic_events), | ||
| 488 | .generic_events = ppc970_generic_events, | ||
| 489 | .cache_events = &ppc970_cache_events, | ||
| 490 | }; | ||
| 491 | |||
| 492 | static int __init init_ppc970_pmu(void) | ||
| 493 | { | ||
| 494 | if (!cur_cpu_spec->oprofile_cpu_type || | ||
| 495 | (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970") | ||
| 496 | && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP"))) | ||
| 497 | return -ENODEV; | ||
| 498 | |||
| 499 | return register_power_pmu(&ppc970_pmu); | ||
| 500 | } | ||
| 501 | |||
| 502 | early_initcall(init_ppc970_pmu); | ||
