aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorStephane Eranian <eranian@googlemail.com>2009-10-06 10:42:09 -0400
committerIngo Molnar <mingo@elte.hu>2009-10-09 09:56:12 -0400
commitb690081d4d3f6a23541493f1682835c3cd5c54a1 (patch)
tree43b7859a8e75ef2b95a8de8eaa1feba6d64fa85f /arch
parent04a705df47d1ea27ca2b066f24b1951c51792d0d (diff)
perf_events: Add event constraints support for Intel processors
On some Intel processors, not all events can be measured in all counters. Some events can only be measured in one particular counter, for instance. Assigning an event to the wrong counter does not crash the machine but this yields bogus counts, i.e., silent error. This patch changes the event to counter assignment logic to take into account event constraints for Intel P6, Core and Nehalem processors. There is no contraints on Intel Atom. There are constraints on Intel Yonah (Core Duo) but they are not provided in this patch given that this processor is not yet supported by perf_events. As a result of the constraints, it is possible for some event groups to never actually be loaded onto the PMU if they contain two events which can only be measured on a single counter. That situation can be detected with the scaling information extracted with read(). Signed-off-by: Stephane Eranian <eranian@gmail.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1254840129-6198-3-git-send-email-eranian@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c109
1 files changed, 105 insertions, 4 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1d16bd69551e..9c758548a0e6 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -77,6 +77,18 @@ struct cpu_hw_events {
77 struct debug_store *ds; 77 struct debug_store *ds;
78}; 78};
79 79
80struct event_constraint {
81 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
82 int code;
83};
84
85#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
86#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 }
87
88#define for_each_event_constraint(e, c) \
89 for ((e) = (c); (e)->idxmsk[0]; (e)++)
90
91
80/* 92/*
81 * struct x86_pmu - generic x86 pmu 93 * struct x86_pmu - generic x86 pmu
82 */ 94 */
@@ -102,6 +114,7 @@ struct x86_pmu {
102 u64 intel_ctrl; 114 u64 intel_ctrl;
103 void (*enable_bts)(u64 config); 115 void (*enable_bts)(u64 config);
104 void (*disable_bts)(void); 116 void (*disable_bts)(void);
117 int (*get_event_idx)(struct hw_perf_event *hwc);
105}; 118};
106 119
107static struct x86_pmu x86_pmu __read_mostly; 120static struct x86_pmu x86_pmu __read_mostly;
@@ -110,6 +123,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
110 .enabled = 1, 123 .enabled = 1,
111}; 124};
112 125
126static const struct event_constraint *event_constraint;
127
113/* 128/*
114 * Not sure about some of these 129 * Not sure about some of these
115 */ 130 */
@@ -155,6 +170,16 @@ static u64 p6_pmu_raw_event(u64 hw_event)
155 return hw_event & P6_EVNTSEL_MASK; 170 return hw_event & P6_EVNTSEL_MASK;
156} 171}
157 172
173static const struct event_constraint intel_p6_event_constraints[] =
174{
175 EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
176 EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
177 EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
178 EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
179 EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
180 EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
181 EVENT_CONSTRAINT_END
182};
158 183
159/* 184/*
160 * Intel PerfMon v3. Used on Core2 and later. 185 * Intel PerfMon v3. Used on Core2 and later.
@@ -170,6 +195,35 @@ static const u64 intel_perfmon_event_map[] =
170 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 195 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
171}; 196};
172 197
198static const struct event_constraint intel_core_event_constraints[] =
199{
200 EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
201 EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
202 EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
203 EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
204 EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
205 EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
206 EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
207 EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
208 EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
209 EVENT_CONSTRAINT_END
210};
211
212static const struct event_constraint intel_nehalem_event_constraints[] =
213{
214 EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
215 EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
216 EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
217 EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
218 EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
219 EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
220 EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
221 EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
222 EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
223 EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
224 EVENT_CONSTRAINT_END
225};
226
173static u64 intel_pmu_event_map(int hw_event) 227static u64 intel_pmu_event_map(int hw_event)
174{ 228{
175 return intel_perfmon_event_map[hw_event]; 229 return intel_perfmon_event_map[hw_event];
@@ -932,6 +986,8 @@ static int __hw_perf_event_init(struct perf_event *event)
932 */ 986 */
933 hwc->config = ARCH_PERFMON_EVENTSEL_INT; 987 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
934 988
989 hwc->idx = -1;
990
935 /* 991 /*
936 * Count user and OS events unless requested not to. 992 * Count user and OS events unless requested not to.
937 */ 993 */
@@ -1366,6 +1422,45 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
1366} 1422}
1367 1423
1368/* 1424/*
1425 * generic counter allocator: get next free counter
1426 */
1427static int gen_get_event_idx(struct hw_perf_event *hwc)
1428{
1429 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1430 int idx;
1431
1432 idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
1433 return idx == x86_pmu.num_events ? -1 : idx;
1434}
1435
1436/*
1437 * intel-specific counter allocator: check event constraints
1438 */
1439static int intel_get_event_idx(struct hw_perf_event *hwc)
1440{
1441 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1442 const struct event_constraint *event_constraint;
1443 int i, code;
1444
1445 if (!event_constraint)
1446 goto skip;
1447
1448 code = hwc->config & 0xff;
1449
1450 for_each_event_constraint(event_constraint, event_constraint) {
1451 if (code == event_constraint->code) {
1452 for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
1453 if (!test_and_set_bit(i, cpuc->used_mask))
1454 return i;
1455 }
1456 return -1;
1457 }
1458 }
1459skip:
1460 return gen_get_event_idx(hwc);
1461}
1462
1463/*
1369 * Find a PMC slot for the freshly enabled / scheduled in event: 1464 * Find a PMC slot for the freshly enabled / scheduled in event:
1370 */ 1465 */
1371static int x86_pmu_enable(struct perf_event *event) 1466static int x86_pmu_enable(struct perf_event *event)
@@ -1402,11 +1497,10 @@ static int x86_pmu_enable(struct perf_event *event)
1402 } else { 1497 } else {
1403 idx = hwc->idx; 1498 idx = hwc->idx;
1404 /* Try to get the previous generic event again */ 1499 /* Try to get the previous generic event again */
1405 if (test_and_set_bit(idx, cpuc->used_mask)) { 1500 if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
1406try_generic: 1501try_generic:
1407 idx = find_first_zero_bit(cpuc->used_mask, 1502 idx = x86_pmu.get_event_idx(hwc);
1408 x86_pmu.num_events); 1503 if (idx == -1)
1409 if (idx == x86_pmu.num_events)
1410 return -EAGAIN; 1504 return -EAGAIN;
1411 1505
1412 set_bit(idx, cpuc->used_mask); 1506 set_bit(idx, cpuc->used_mask);
@@ -1883,6 +1977,7 @@ static struct x86_pmu p6_pmu = {
1883 */ 1977 */
1884 .event_bits = 32, 1978 .event_bits = 32,
1885 .event_mask = (1ULL << 32) - 1, 1979 .event_mask = (1ULL << 32) - 1,
1980 .get_event_idx = intel_get_event_idx,
1886}; 1981};
1887 1982
1888static struct x86_pmu intel_pmu = { 1983static struct x86_pmu intel_pmu = {
@@ -1906,6 +2001,7 @@ static struct x86_pmu intel_pmu = {
1906 .max_period = (1ULL << 31) - 1, 2001 .max_period = (1ULL << 31) - 1,
1907 .enable_bts = intel_pmu_enable_bts, 2002 .enable_bts = intel_pmu_enable_bts,
1908 .disable_bts = intel_pmu_disable_bts, 2003 .disable_bts = intel_pmu_disable_bts,
2004 .get_event_idx = intel_get_event_idx,
1909}; 2005};
1910 2006
1911static struct x86_pmu amd_pmu = { 2007static struct x86_pmu amd_pmu = {
@@ -1926,6 +2022,7 @@ static struct x86_pmu amd_pmu = {
1926 .apic = 1, 2022 .apic = 1,
1927 /* use highest bit to detect overflow */ 2023 /* use highest bit to detect overflow */
1928 .max_period = (1ULL << 47) - 1, 2024 .max_period = (1ULL << 47) - 1,
2025 .get_event_idx = gen_get_event_idx,
1929}; 2026};
1930 2027
1931static int p6_pmu_init(void) 2028static int p6_pmu_init(void)
@@ -1938,10 +2035,12 @@ static int p6_pmu_init(void)
1938 case 7: 2035 case 7:
1939 case 8: 2036 case 8:
1940 case 11: /* Pentium III */ 2037 case 11: /* Pentium III */
2038 event_constraint = intel_p6_event_constraints;
1941 break; 2039 break;
1942 case 9: 2040 case 9:
1943 case 13: 2041 case 13:
1944 /* Pentium M */ 2042 /* Pentium M */
2043 event_constraint = intel_p6_event_constraints;
1945 break; 2044 break;
1946 default: 2045 default:
1947 pr_cont("unsupported p6 CPU model %d ", 2046 pr_cont("unsupported p6 CPU model %d ",
@@ -2013,12 +2112,14 @@ static int intel_pmu_init(void)
2013 sizeof(hw_cache_event_ids)); 2112 sizeof(hw_cache_event_ids));
2014 2113
2015 pr_cont("Core2 events, "); 2114 pr_cont("Core2 events, ");
2115 event_constraint = intel_core_event_constraints;
2016 break; 2116 break;
2017 default: 2117 default:
2018 case 26: 2118 case 26:
2019 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 2119 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2020 sizeof(hw_cache_event_ids)); 2120 sizeof(hw_cache_event_ids));
2021 2121
2122 event_constraint = intel_nehalem_event_constraints;
2022 pr_cont("Nehalem/Corei7 events, "); 2123 pr_cont("Nehalem/Corei7 events, ");
2023 break; 2124 break;
2024 case 28: 2125 case 28: