aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLin Ming <ming.m.lin@intel.com>2010-03-18 06:33:07 -0400
committerIngo Molnar <mingo@elte.hu>2010-03-18 12:03:51 -0400
commitf34edbc1cdb0f8f83d94e1d668dd6e41abf0defb (patch)
treed57803bcc891a748551429d2feafec2df8007367 /arch/x86
parent55632770d7298835645489828af87f854c47749c (diff)
perf, x86: Add a key to simplify template lookup in Pentium-4 PMU
Currently, we use opcode(Event and Event-Selector) + emask to look up template in p4_templates. But cache events (L1-dcache-load-misses, LLC-load-misses, etc) use the same event(P4_REPLAY_EVENT) to do the counting, ie, they have the same opcode and emask. So we can not use current lookup mechanism to find the template for cache events. This patch introduces a "key", which is the index into p4_templates. The low 12 bits of CCCR are reserved, so we can hide the "key" in the low 12 bits of hwc->config. We extract the key from hwc->config and then quickly find the template. Signed-off-by: Lin Ming <ming.m.lin@intel.com> Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org> Cc: Peter Zijlstra <peterz@infradead.org> LKML-Reference: <1268908387.13901.127.camel@minggr.sh.intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/perf_event_p4.h5
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c86
2 files changed, 38 insertions, 53 deletions
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index b842b3238e46..7d3406a2773c 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -65,6 +65,7 @@
65#define P4_CCCR_THREAD_SINGLE 0x00010000U 65#define P4_CCCR_THREAD_SINGLE 0x00010000U
66#define P4_CCCR_THREAD_BOTH 0x00020000U 66#define P4_CCCR_THREAD_BOTH 0x00020000U
67#define P4_CCCR_THREAD_ANY 0x00030000U 67#define P4_CCCR_THREAD_ANY 0x00030000U
68#define P4_CCCR_RESERVED 0x00000fffU
68 69
69/* Non HT mask */ 70/* Non HT mask */
70#define P4_CCCR_MASK \ 71#define P4_CCCR_MASK \
@@ -116,7 +117,7 @@
116#define p4_config_pack_escr(v) (((u64)(v)) << 32) 117#define p4_config_pack_escr(v) (((u64)(v)) << 32)
117#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) 118#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL)
118#define p4_config_unpack_escr(v) (((u64)(v)) >> 32) 119#define p4_config_unpack_escr(v) (((u64)(v)) >> 32)
119#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL) 120#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xfffff000ULL)
120 121
121#define p4_config_unpack_emask(v) \ 122#define p4_config_unpack_emask(v) \
122 ({ \ 123 ({ \
@@ -126,6 +127,8 @@
126 t; \ 127 t; \
127 }) 128 })
128 129
130#define p4_config_unpack_key(v) (((u64)(v)) & P4_CCCR_RESERVED)
131
129#define P4_CONFIG_HT_SHIFT 63 132#define P4_CONFIG_HT_SHIFT 63
130#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) 133#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
131 134
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 0367889b4ae0..3e97ed3904cc 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -18,6 +18,7 @@ struct p4_event_template {
18 u32 opcode; /* ESCR event + CCCR selector */ 18 u32 opcode; /* ESCR event + CCCR selector */
19 u64 config; /* packed predefined bits */ 19 u64 config; /* packed predefined bits */
20 int dep; /* upstream dependency event index */ 20 int dep; /* upstream dependency event index */
21 int key; /* index into p4_templates */
21 unsigned int emask; /* ESCR EventMask */ 22 unsigned int emask; /* ESCR EventMask */
22 unsigned int escr_msr[2]; /* ESCR MSR for this event */ 23 unsigned int escr_msr[2]; /* ESCR MSR for this event */
23 unsigned int cntr[2]; /* counter index (offset) */ 24 unsigned int cntr[2]; /* counter index (offset) */
@@ -39,38 +40,31 @@ static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config);
39 */ 40 */
40struct p4_event_template p4_templates[] = { 41struct p4_event_template p4_templates[] = {
41 [0] = { 42 [0] = {
42 .opcode = P4_UOP_TYPE,
43 .config = 0,
44 .dep = -1,
45 .emask =
46 P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) |
47 P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES),
48 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
49 .cntr = { 16, 17 },
50 },
51 [1] = {
52 .opcode = P4_GLOBAL_POWER_EVENTS, 43 .opcode = P4_GLOBAL_POWER_EVENTS,
53 .config = 0, 44 .config = 0,
54 .dep = -1, 45 .dep = -1,
46 .key = 0,
55 .emask = 47 .emask =
56 P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING), 48 P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING),
57 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 49 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
58 .cntr = { 0, 2 }, 50 .cntr = { 0, 2 },
59 }, 51 },
60 [2] = { 52 [1] = {
61 .opcode = P4_INSTR_RETIRED, 53 .opcode = P4_INSTR_RETIRED,
62 .config = 0, 54 .config = 0,
63 .dep = -1, /* needs front-end tagging */ 55 .dep = -1, /* needs front-end tagging */
56 .key = 1,
64 .emask = 57 .emask =
65 P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) | 58 P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) |
66 P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG), 59 P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG),
67 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 60 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
68 .cntr = { 12, 14 }, 61 .cntr = { 12, 14 },
69 }, 62 },
70 [3] = { 63 [2] = {
71 .opcode = P4_BSQ_CACHE_REFERENCE, 64 .opcode = P4_BSQ_CACHE_REFERENCE,
72 .config = 0, 65 .config = 0,
73 .dep = -1, 66 .dep = -1,
67 .key = 2,
74 .emask = 68 .emask =
75 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | 69 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
76 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | 70 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
@@ -81,10 +75,11 @@ struct p4_event_template p4_templates[] = {
81 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, 75 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
82 .cntr = { 0, 2 }, 76 .cntr = { 0, 2 },
83 }, 77 },
84 [4] = { 78 [3] = {
85 .opcode = P4_BSQ_CACHE_REFERENCE, 79 .opcode = P4_BSQ_CACHE_REFERENCE,
86 .config = 0, 80 .config = 0,
87 .dep = -1, 81 .dep = -1,
82 .key = 3,
88 .emask = 83 .emask =
89 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | 84 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
90 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | 85 P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
@@ -92,10 +87,11 @@ struct p4_event_template p4_templates[] = {
92 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, 87 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
93 .cntr = { 0, 3 }, 88 .cntr = { 0, 3 },
94 }, 89 },
95 [5] = { 90 [4] = {
96 .opcode = P4_RETIRED_BRANCH_TYPE, 91 .opcode = P4_RETIRED_BRANCH_TYPE,
97 .config = 0, 92 .config = 0,
98 .dep = -1, 93 .dep = -1,
94 .key = 4,
99 .emask = 95 .emask =
100 P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL) | 96 P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL) |
101 P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL) | 97 P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL) |
@@ -104,48 +100,38 @@ struct p4_event_template p4_templates[] = {
104 .escr_msr = { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 }, 100 .escr_msr = { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 },
105 .cntr = { 4, 6 }, 101 .cntr = { 4, 6 },
106 }, 102 },
107 [6] = { 103 [5] = {
108 .opcode = P4_MISPRED_BRANCH_RETIRED, 104 .opcode = P4_MISPRED_BRANCH_RETIRED,
109 .config = 0, 105 .config = 0,
110 .dep = -1, 106 .dep = -1,
107 .key = 5,
111 .emask = 108 .emask =
112 P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS), 109 P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS),
113 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 110 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
114 .cntr = { 12, 14 }, 111 .cntr = { 12, 14 },
115 }, 112 },
116 [7] = { 113 [6] = {
117 .opcode = P4_FSB_DATA_ACTIVITY, 114 .opcode = P4_FSB_DATA_ACTIVITY,
118 .config = p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), 115 .config = p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
119 .dep = -1, 116 .dep = -1,
117 .key = 6,
120 .emask = 118 .emask =
121 P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV) | 119 P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV) |
122 P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN), 120 P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN),
123 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 121 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
124 .cntr = { 0, 2 }, 122 .cntr = { 0, 2 },
125 }, 123 },
126}; 124 [7] = {
127 125 .opcode = P4_UOP_TYPE,
128static struct p4_event_template *p4_event_map[PERF_COUNT_HW_MAX] = { 126 .config = 0,
129 /* non-halted CPU clocks */ 127 .dep = -1,
130 [PERF_COUNT_HW_CPU_CYCLES] = &p4_templates[1], 128 .key = 7,
131 129 .emask =
132 /* retired instructions: dep on tagging the FSB */ 130 P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) |
133 [PERF_COUNT_HW_INSTRUCTIONS] = &p4_templates[2], 131 P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES),
134 132 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
135 /* cache hits */ 133 .cntr = { 16, 17 },
136 [PERF_COUNT_HW_CACHE_REFERENCES] = &p4_templates[3], 134 },
137
138 /* cache misses */
139 [PERF_COUNT_HW_CACHE_MISSES] = &p4_templates[4],
140
141 /* branch instructions retired */
142 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = &p4_templates[5],
143
144 /* mispredicted branches retired */
145 [PERF_COUNT_HW_BRANCH_MISSES] = &p4_templates[6],
146
147 /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */
148 [PERF_COUNT_HW_BUS_CYCLES] = &p4_templates[7],
149}; 135};
150 136
151static u64 p4_pmu_event_map(int hw_event) 137static u64 p4_pmu_event_map(int hw_event)
@@ -153,11 +139,11 @@ static u64 p4_pmu_event_map(int hw_event)
153 struct p4_event_template *tpl; 139 struct p4_event_template *tpl;
154 u64 config; 140 u64 config;
155 141
156 if (hw_event > ARRAY_SIZE(p4_event_map)) { 142 if (hw_event > ARRAY_SIZE(p4_templates)) {
157 printk_once(KERN_ERR "PMU: Incorrect event index\n"); 143 printk_once(KERN_ERR "PMU: Incorrect event index\n");
158 return 0; 144 return 0;
159 } 145 }
160 tpl = p4_event_map[hw_event]; 146 tpl = &p4_templates[hw_event];
161 147
162 /* 148 /*
163 * fill config up according to 149 * fill config up according to
@@ -167,6 +153,7 @@ static u64 p4_pmu_event_map(int hw_event)
167 config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT); 153 config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT);
168 config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT); 154 config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT);
169 config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); 155 config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT);
156 config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED);
170 157
171 /* on HT machine we need a special bit */ 158 /* on HT machine we need a special bit */
172 if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id())) 159 if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id()))
@@ -187,17 +174,12 @@ static inline int p4_pmu_emask_match(unsigned int dst, unsigned int src)
187 174
188static struct p4_event_template *p4_pmu_template_lookup(u64 config) 175static struct p4_event_template *p4_pmu_template_lookup(u64 config)
189{ 176{
190 u32 opcode = p4_config_unpack_opcode(config); 177 int key = p4_config_unpack_key(config);
191 unsigned int emask = p4_config_unpack_emask(config);
192 unsigned int i;
193
194 for (i = 0; i < ARRAY_SIZE(p4_templates); i++) {
195 if (opcode == p4_templates[i].opcode &&
196 p4_pmu_emask_match(emask, p4_templates[i].emask))
197 return &p4_templates[i];
198 }
199 178
200 return NULL; 179 if (key < ARRAY_SIZE(p4_templates))
180 return &p4_templates[key];
181 else
182 return NULL;
201} 183}
202 184
203/* 185/*
@@ -564,7 +546,7 @@ static __initconst struct x86_pmu p4_pmu = {
564 .perfctr = MSR_P4_BPU_PERFCTR0, 546 .perfctr = MSR_P4_BPU_PERFCTR0,
565 .event_map = p4_pmu_event_map, 547 .event_map = p4_pmu_event_map,
566 .raw_event = p4_pmu_raw_event, 548 .raw_event = p4_pmu_raw_event,
567 .max_events = ARRAY_SIZE(p4_event_map), 549 .max_events = ARRAY_SIZE(p4_templates),
568 .get_event_constraints = x86_get_event_constraints, 550 .get_event_constraints = x86_get_event_constraints,
569 /* 551 /*
570 * IF HT disabled we may need to use all 552 * IF HT disabled we may need to use all