diff options
author | Lin Ming <ming.m.lin@intel.com> | 2010-03-18 06:33:07 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-03-18 12:03:51 -0400 |
commit | f34edbc1cdb0f8f83d94e1d668dd6e41abf0defb (patch) | |
tree | d57803bcc891a748551429d2feafec2df8007367 /arch/x86 | |
parent | 55632770d7298835645489828af87f854c47749c (diff) |
perf, x86: Add a key to simplify template lookup in Pentium-4 PMU
Currently, we use opcode(Event and Event-Selector) + emask to
look up template in p4_templates.
But cache events (L1-dcache-load-misses, LLC-load-misses, etc)
use the same event(P4_REPLAY_EVENT) to do the counting, ie, they
have the same opcode and emask. So we can not use current lookup
mechanism to find the template for cache events.
This patch introduces a "key", which is the index into
p4_templates. The low 12 bits of CCCR are reserved, so we can
hide the "key" in the low 12 bits of hwc->config.
We extract the key from hwc->config and then quickly find the
template.
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1268908387.13901.127.camel@minggr.sh.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/perf_event_p4.h | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 86 |
2 files changed, 38 insertions, 53 deletions
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index b842b3238e46..7d3406a2773c 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -65,6 +65,7 @@ | |||
65 | #define P4_CCCR_THREAD_SINGLE 0x00010000U | 65 | #define P4_CCCR_THREAD_SINGLE 0x00010000U |
66 | #define P4_CCCR_THREAD_BOTH 0x00020000U | 66 | #define P4_CCCR_THREAD_BOTH 0x00020000U |
67 | #define P4_CCCR_THREAD_ANY 0x00030000U | 67 | #define P4_CCCR_THREAD_ANY 0x00030000U |
68 | #define P4_CCCR_RESERVED 0x00000fffU | ||
68 | 69 | ||
69 | /* Non HT mask */ | 70 | /* Non HT mask */ |
70 | #define P4_CCCR_MASK \ | 71 | #define P4_CCCR_MASK \ |
@@ -116,7 +117,7 @@ | |||
116 | #define p4_config_pack_escr(v) (((u64)(v)) << 32) | 117 | #define p4_config_pack_escr(v) (((u64)(v)) << 32) |
117 | #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) | 118 | #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) |
118 | #define p4_config_unpack_escr(v) (((u64)(v)) >> 32) | 119 | #define p4_config_unpack_escr(v) (((u64)(v)) >> 32) |
119 | #define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL) | 120 | #define p4_config_unpack_cccr(v) (((u64)(v)) & 0xfffff000ULL) |
120 | 121 | ||
121 | #define p4_config_unpack_emask(v) \ | 122 | #define p4_config_unpack_emask(v) \ |
122 | ({ \ | 123 | ({ \ |
@@ -126,6 +127,8 @@ | |||
126 | t; \ | 127 | t; \ |
127 | }) | 128 | }) |
128 | 129 | ||
130 | #define p4_config_unpack_key(v) (((u64)(v)) & P4_CCCR_RESERVED) | ||
131 | |||
129 | #define P4_CONFIG_HT_SHIFT 63 | 132 | #define P4_CONFIG_HT_SHIFT 63 |
130 | #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) | 133 | #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) |
131 | 134 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 0367889b4ae0..3e97ed3904cc 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -18,6 +18,7 @@ struct p4_event_template { | |||
18 | u32 opcode; /* ESCR event + CCCR selector */ | 18 | u32 opcode; /* ESCR event + CCCR selector */ |
19 | u64 config; /* packed predefined bits */ | 19 | u64 config; /* packed predefined bits */ |
20 | int dep; /* upstream dependency event index */ | 20 | int dep; /* upstream dependency event index */ |
21 | int key; /* index into p4_templates */ | ||
21 | unsigned int emask; /* ESCR EventMask */ | 22 | unsigned int emask; /* ESCR EventMask */ |
22 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | 23 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ |
23 | unsigned int cntr[2]; /* counter index (offset) */ | 24 | unsigned int cntr[2]; /* counter index (offset) */ |
@@ -39,38 +40,31 @@ static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config); | |||
39 | */ | 40 | */ |
40 | struct p4_event_template p4_templates[] = { | 41 | struct p4_event_template p4_templates[] = { |
41 | [0] = { | 42 | [0] = { |
42 | .opcode = P4_UOP_TYPE, | ||
43 | .config = 0, | ||
44 | .dep = -1, | ||
45 | .emask = | ||
46 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) | | ||
47 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES), | ||
48 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | ||
49 | .cntr = { 16, 17 }, | ||
50 | }, | ||
51 | [1] = { | ||
52 | .opcode = P4_GLOBAL_POWER_EVENTS, | 43 | .opcode = P4_GLOBAL_POWER_EVENTS, |
53 | .config = 0, | 44 | .config = 0, |
54 | .dep = -1, | 45 | .dep = -1, |
46 | .key = 0, | ||
55 | .emask = | 47 | .emask = |
56 | P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING), | 48 | P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING), |
57 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 49 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
58 | .cntr = { 0, 2 }, | 50 | .cntr = { 0, 2 }, |
59 | }, | 51 | }, |
60 | [2] = { | 52 | [1] = { |
61 | .opcode = P4_INSTR_RETIRED, | 53 | .opcode = P4_INSTR_RETIRED, |
62 | .config = 0, | 54 | .config = 0, |
63 | .dep = -1, /* needs front-end tagging */ | 55 | .dep = -1, /* needs front-end tagging */ |
56 | .key = 1, | ||
64 | .emask = | 57 | .emask = |
65 | P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) | | 58 | P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) | |
66 | P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG), | 59 | P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG), |
67 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | 60 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
68 | .cntr = { 12, 14 }, | 61 | .cntr = { 12, 14 }, |
69 | }, | 62 | }, |
70 | [3] = { | 63 | [2] = { |
71 | .opcode = P4_BSQ_CACHE_REFERENCE, | 64 | .opcode = P4_BSQ_CACHE_REFERENCE, |
72 | .config = 0, | 65 | .config = 0, |
73 | .dep = -1, | 66 | .dep = -1, |
67 | .key = 2, | ||
74 | .emask = | 68 | .emask = |
75 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | 69 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | |
76 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | 70 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | |
@@ -81,10 +75,11 @@ struct p4_event_template p4_templates[] = { | |||
81 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | 75 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, |
82 | .cntr = { 0, 2 }, | 76 | .cntr = { 0, 2 }, |
83 | }, | 77 | }, |
84 | [4] = { | 78 | [3] = { |
85 | .opcode = P4_BSQ_CACHE_REFERENCE, | 79 | .opcode = P4_BSQ_CACHE_REFERENCE, |
86 | .config = 0, | 80 | .config = 0, |
87 | .dep = -1, | 81 | .dep = -1, |
82 | .key = 3, | ||
88 | .emask = | 83 | .emask = |
89 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | 84 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | |
90 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | 85 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | |
@@ -92,10 +87,11 @@ struct p4_event_template p4_templates[] = { | |||
92 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | 87 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, |
93 | .cntr = { 0, 3 }, | 88 | .cntr = { 0, 3 }, |
94 | }, | 89 | }, |
95 | [5] = { | 90 | [4] = { |
96 | .opcode = P4_RETIRED_BRANCH_TYPE, | 91 | .opcode = P4_RETIRED_BRANCH_TYPE, |
97 | .config = 0, | 92 | .config = 0, |
98 | .dep = -1, | 93 | .dep = -1, |
94 | .key = 4, | ||
99 | .emask = | 95 | .emask = |
100 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL) | | 96 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL) | |
101 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL) | | 97 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL) | |
@@ -104,48 +100,38 @@ struct p4_event_template p4_templates[] = { | |||
104 | .escr_msr = { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 }, | 100 | .escr_msr = { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 }, |
105 | .cntr = { 4, 6 }, | 101 | .cntr = { 4, 6 }, |
106 | }, | 102 | }, |
107 | [6] = { | 103 | [5] = { |
108 | .opcode = P4_MISPRED_BRANCH_RETIRED, | 104 | .opcode = P4_MISPRED_BRANCH_RETIRED, |
109 | .config = 0, | 105 | .config = 0, |
110 | .dep = -1, | 106 | .dep = -1, |
107 | .key = 5, | ||
111 | .emask = | 108 | .emask = |
112 | P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS), | 109 | P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS), |
113 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | 110 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
114 | .cntr = { 12, 14 }, | 111 | .cntr = { 12, 14 }, |
115 | }, | 112 | }, |
116 | [7] = { | 113 | [6] = { |
117 | .opcode = P4_FSB_DATA_ACTIVITY, | 114 | .opcode = P4_FSB_DATA_ACTIVITY, |
118 | .config = p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), | 115 | .config = p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), |
119 | .dep = -1, | 116 | .dep = -1, |
117 | .key = 6, | ||
120 | .emask = | 118 | .emask = |
121 | P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV) | | 119 | P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV) | |
122 | P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN), | 120 | P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN), |
123 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | 121 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
124 | .cntr = { 0, 2 }, | 122 | .cntr = { 0, 2 }, |
125 | }, | 123 | }, |
126 | }; | 124 | [7] = { |
127 | 125 | .opcode = P4_UOP_TYPE, | |
128 | static struct p4_event_template *p4_event_map[PERF_COUNT_HW_MAX] = { | 126 | .config = 0, |
129 | /* non-halted CPU clocks */ | 127 | .dep = -1, |
130 | [PERF_COUNT_HW_CPU_CYCLES] = &p4_templates[1], | 128 | .key = 7, |
131 | 129 | .emask = | |
132 | /* retired instructions: dep on tagging the FSB */ | 130 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) | |
133 | [PERF_COUNT_HW_INSTRUCTIONS] = &p4_templates[2], | 131 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES), |
134 | 132 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | |
135 | /* cache hits */ | 133 | .cntr = { 16, 17 }, |
136 | [PERF_COUNT_HW_CACHE_REFERENCES] = &p4_templates[3], | 134 | }, |
137 | |||
138 | /* cache misses */ | ||
139 | [PERF_COUNT_HW_CACHE_MISSES] = &p4_templates[4], | ||
140 | |||
141 | /* branch instructions retired */ | ||
142 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = &p4_templates[5], | ||
143 | |||
144 | /* mispredicted branches retired */ | ||
145 | [PERF_COUNT_HW_BRANCH_MISSES] = &p4_templates[6], | ||
146 | |||
147 | /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ | ||
148 | [PERF_COUNT_HW_BUS_CYCLES] = &p4_templates[7], | ||
149 | }; | 135 | }; |
150 | 136 | ||
151 | static u64 p4_pmu_event_map(int hw_event) | 137 | static u64 p4_pmu_event_map(int hw_event) |
@@ -153,11 +139,11 @@ static u64 p4_pmu_event_map(int hw_event) | |||
153 | struct p4_event_template *tpl; | 139 | struct p4_event_template *tpl; |
154 | u64 config; | 140 | u64 config; |
155 | 141 | ||
156 | if (hw_event > ARRAY_SIZE(p4_event_map)) { | 142 | if (hw_event > ARRAY_SIZE(p4_templates)) { |
157 | printk_once(KERN_ERR "PMU: Incorrect event index\n"); | 143 | printk_once(KERN_ERR "PMU: Incorrect event index\n"); |
158 | return 0; | 144 | return 0; |
159 | } | 145 | } |
160 | tpl = p4_event_map[hw_event]; | 146 | tpl = &p4_templates[hw_event]; |
161 | 147 | ||
162 | /* | 148 | /* |
163 | * fill config up according to | 149 | * fill config up according to |
@@ -167,6 +153,7 @@ static u64 p4_pmu_event_map(int hw_event) | |||
167 | config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT); | 153 | config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT); |
168 | config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT); | 154 | config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT); |
169 | config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); | 155 | config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); |
156 | config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED); | ||
170 | 157 | ||
171 | /* on HT machine we need a special bit */ | 158 | /* on HT machine we need a special bit */ |
172 | if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id())) | 159 | if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id())) |
@@ -187,17 +174,12 @@ static inline int p4_pmu_emask_match(unsigned int dst, unsigned int src) | |||
187 | 174 | ||
188 | static struct p4_event_template *p4_pmu_template_lookup(u64 config) | 175 | static struct p4_event_template *p4_pmu_template_lookup(u64 config) |
189 | { | 176 | { |
190 | u32 opcode = p4_config_unpack_opcode(config); | 177 | int key = p4_config_unpack_key(config); |
191 | unsigned int emask = p4_config_unpack_emask(config); | ||
192 | unsigned int i; | ||
193 | |||
194 | for (i = 0; i < ARRAY_SIZE(p4_templates); i++) { | ||
195 | if (opcode == p4_templates[i].opcode && | ||
196 | p4_pmu_emask_match(emask, p4_templates[i].emask)) | ||
197 | return &p4_templates[i]; | ||
198 | } | ||
199 | 178 | ||
200 | return NULL; | 179 | if (key < ARRAY_SIZE(p4_templates)) |
180 | return &p4_templates[key]; | ||
181 | else | ||
182 | return NULL; | ||
201 | } | 183 | } |
202 | 184 | ||
203 | /* | 185 | /* |
@@ -564,7 +546,7 @@ static __initconst struct x86_pmu p4_pmu = { | |||
564 | .perfctr = MSR_P4_BPU_PERFCTR0, | 546 | .perfctr = MSR_P4_BPU_PERFCTR0, |
565 | .event_map = p4_pmu_event_map, | 547 | .event_map = p4_pmu_event_map, |
566 | .raw_event = p4_pmu_raw_event, | 548 | .raw_event = p4_pmu_raw_event, |
567 | .max_events = ARRAY_SIZE(p4_event_map), | 549 | .max_events = ARRAY_SIZE(p4_templates), |
568 | .get_event_constraints = x86_get_event_constraints, | 550 | .get_event_constraints = x86_get_event_constraints, |
569 | /* | 551 | /* |
570 | * IF HT disabled we may need to use all | 552 | * IF HT disabled we may need to use all |