diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 64 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 198 |
3 files changed, 265 insertions, 0 deletions
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 4d0dfa0d998e..d25e74cc1a50 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -47,6 +47,9 @@ | |||
47 | #define MSR_IA32_MCG_STATUS 0x0000017a | 47 | #define MSR_IA32_MCG_STATUS 0x0000017a |
48 | #define MSR_IA32_MCG_CTL 0x0000017b | 48 | #define MSR_IA32_MCG_CTL 0x0000017b |
49 | 49 | ||
50 | #define MSR_OFFCORE_RSP_0 0x000001a6 | ||
51 | #define MSR_OFFCORE_RSP_1 0x000001a7 | ||
52 | |||
50 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 | 53 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 |
51 | #define MSR_IA32_DS_AREA 0x00000600 | 54 | #define MSR_IA32_DS_AREA 0x00000600 |
52 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 | 55 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ea03c725e465..ec6a6db07332 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -93,6 +93,8 @@ struct amd_nb { | |||
93 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | 93 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; |
94 | }; | 94 | }; |
95 | 95 | ||
96 | struct intel_percore; | ||
97 | |||
96 | #define MAX_LBR_ENTRIES 16 | 98 | #define MAX_LBR_ENTRIES 16 |
97 | 99 | ||
98 | struct cpu_hw_events { | 100 | struct cpu_hw_events { |
@@ -128,6 +130,13 @@ struct cpu_hw_events { | |||
128 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | 130 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; |
129 | 131 | ||
130 | /* | 132 | /* |
133 | * Intel percore register state. | ||
134 | * Coordinate shared resources between HT threads. | ||
135 | */ | ||
136 | int percore_used; /* Used by this CPU? */ | ||
137 | struct intel_percore *per_core; | ||
138 | |||
139 | /* | ||
131 | * AMD specific bits | 140 | * AMD specific bits |
132 | */ | 141 | */ |
133 | struct amd_nb *amd_nb; | 142 | struct amd_nb *amd_nb; |
@@ -177,6 +186,28 @@ struct cpu_hw_events { | |||
177 | #define for_each_event_constraint(e, c) \ | 186 | #define for_each_event_constraint(e, c) \ |
178 | for ((e) = (c); (e)->weight; (e)++) | 187 | for ((e) = (c); (e)->weight; (e)++) |
179 | 188 | ||
189 | /* | ||
190 | * Extra registers for specific events. | ||
191 | * Some events need large masks and require external MSRs. | ||
192 | * Define a mapping to these extra registers. | ||
193 | */ | ||
194 | struct extra_reg { | ||
195 | unsigned int event; | ||
196 | unsigned int msr; | ||
197 | u64 config_mask; | ||
198 | u64 valid_mask; | ||
199 | }; | ||
200 | |||
201 | #define EVENT_EXTRA_REG(e, ms, m, vm) { \ | ||
202 | .event = (e), \ | ||
203 | .msr = (ms), \ | ||
204 | .config_mask = (m), \ | ||
205 | .valid_mask = (vm), \ | ||
206 | } | ||
207 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm) \ | ||
208 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) | ||
209 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) | ||
210 | |||
180 | union perf_capabilities { | 211 | union perf_capabilities { |
181 | struct { | 212 | struct { |
182 | u64 lbr_format : 6; | 213 | u64 lbr_format : 6; |
@@ -221,6 +252,7 @@ struct x86_pmu { | |||
221 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 252 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
222 | struct perf_event *event); | 253 | struct perf_event *event); |
223 | struct event_constraint *event_constraints; | 254 | struct event_constraint *event_constraints; |
255 | struct event_constraint *percore_constraints; | ||
224 | void (*quirks)(void); | 256 | void (*quirks)(void); |
225 | int perfctr_second_write; | 257 | int perfctr_second_write; |
226 | 258 | ||
@@ -249,6 +281,11 @@ struct x86_pmu { | |||
249 | */ | 281 | */ |
250 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | 282 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ |
251 | int lbr_nr; /* hardware stack size */ | 283 | int lbr_nr; /* hardware stack size */ |
284 | |||
285 | /* | ||
286 | * Extra registers for events | ||
287 | */ | ||
288 | struct extra_reg *extra_regs; | ||
252 | }; | 289 | }; |
253 | 290 | ||
254 | static struct x86_pmu x86_pmu __read_mostly; | 291 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -341,6 +378,31 @@ static inline unsigned int x86_pmu_event_addr(int index) | |||
341 | return x86_pmu.perfctr + x86_pmu_addr_offset(index); | 378 | return x86_pmu.perfctr + x86_pmu_addr_offset(index); |
342 | } | 379 | } |
343 | 380 | ||
381 | /* | ||
382 | * Find and validate any extra registers to set up. | ||
383 | */ | ||
384 | static int x86_pmu_extra_regs(u64 config, struct perf_event *event) | ||
385 | { | ||
386 | struct extra_reg *er; | ||
387 | |||
388 | event->hw.extra_reg = 0; | ||
389 | event->hw.extra_config = 0; | ||
390 | |||
391 | if (!x86_pmu.extra_regs) | ||
392 | return 0; | ||
393 | |||
394 | for (er = x86_pmu.extra_regs; er->msr; er++) { | ||
395 | if (er->event != (config & er->config_mask)) | ||
396 | continue; | ||
397 | if (event->attr.config1 & ~er->valid_mask) | ||
398 | return -EINVAL; | ||
399 | event->hw.extra_reg = er->msr; | ||
400 | event->hw.extra_config = event->attr.config1; | ||
401 | break; | ||
402 | } | ||
403 | return 0; | ||
404 | } | ||
405 | |||
344 | static atomic_t active_events; | 406 | static atomic_t active_events; |
345 | static DEFINE_MUTEX(pmc_reserve_mutex); | 407 | static DEFINE_MUTEX(pmc_reserve_mutex); |
346 | 408 | ||
@@ -665,6 +727,8 @@ static void x86_pmu_disable(struct pmu *pmu) | |||
665 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | 727 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, |
666 | u64 enable_mask) | 728 | u64 enable_mask) |
667 | { | 729 | { |
730 | if (hwc->extra_reg) | ||
731 | wrmsrl(hwc->extra_reg, hwc->extra_config); | ||
668 | wrmsrl(hwc->config_base, hwc->config | enable_mask); | 732 | wrmsrl(hwc->config_base, hwc->config | enable_mask); |
669 | } | 733 | } |
670 | 734 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index c3ce053ecb46..13cb6cf013f6 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1,5 +1,27 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | 1 | #ifdef CONFIG_CPU_SUP_INTEL |
2 | 2 | ||
3 | #define MAX_EXTRA_REGS 2 | ||
4 | |||
5 | /* | ||
6 | * Per register state. | ||
7 | */ | ||
8 | struct er_account { | ||
9 | int ref; /* reference count */ | ||
10 | unsigned int extra_reg; /* extra MSR number */ | ||
11 | u64 extra_config; /* extra MSR config */ | ||
12 | }; | ||
13 | |||
14 | /* | ||
15 | * Per core state | ||
16 | * This used to coordinate shared registers for HT threads. | ||
17 | */ | ||
18 | struct intel_percore { | ||
19 | raw_spinlock_t lock; /* protect structure */ | ||
20 | struct er_account regs[MAX_EXTRA_REGS]; | ||
21 | int refcnt; /* number of threads */ | ||
22 | unsigned core_id; | ||
23 | }; | ||
24 | |||
3 | /* | 25 | /* |
4 | * Intel PerfMon, used on Core and later. | 26 | * Intel PerfMon, used on Core and later. |
5 | */ | 27 | */ |
@@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] = | |||
64 | EVENT_CONSTRAINT_END | 86 | EVENT_CONSTRAINT_END |
65 | }; | 87 | }; |
66 | 88 | ||
89 | static struct extra_reg intel_nehalem_extra_regs[] = | ||
90 | { | ||
91 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | ||
92 | EVENT_EXTRA_END | ||
93 | }; | ||
94 | |||
95 | static struct event_constraint intel_nehalem_percore_constraints[] = | ||
96 | { | ||
97 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
98 | EVENT_CONSTRAINT_END | ||
99 | }; | ||
100 | |||
67 | static struct event_constraint intel_westmere_event_constraints[] = | 101 | static struct event_constraint intel_westmere_event_constraints[] = |
68 | { | 102 | { |
69 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -89,6 +123,20 @@ static struct event_constraint intel_snb_event_constraints[] = | |||
89 | EVENT_CONSTRAINT_END | 123 | EVENT_CONSTRAINT_END |
90 | }; | 124 | }; |
91 | 125 | ||
126 | static struct extra_reg intel_westmere_extra_regs[] = | ||
127 | { | ||
128 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | ||
129 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), | ||
130 | EVENT_EXTRA_END | ||
131 | }; | ||
132 | |||
133 | static struct event_constraint intel_westmere_percore_constraints[] = | ||
134 | { | ||
135 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
136 | INTEL_EVENT_CONSTRAINT(0xbb, 0), | ||
137 | EVENT_CONSTRAINT_END | ||
138 | }; | ||
139 | |||
92 | static struct event_constraint intel_gen_event_constraints[] = | 140 | static struct event_constraint intel_gen_event_constraints[] = |
93 | { | 141 | { |
94 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 142 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -907,6 +955,67 @@ intel_bts_constraints(struct perf_event *event) | |||
907 | } | 955 | } |
908 | 956 | ||
909 | static struct event_constraint * | 957 | static struct event_constraint * |
958 | intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
959 | { | ||
960 | struct hw_perf_event *hwc = &event->hw; | ||
961 | unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; | ||
962 | struct event_constraint *c; | ||
963 | struct intel_percore *pc; | ||
964 | struct er_account *era; | ||
965 | int i; | ||
966 | int free_slot; | ||
967 | int found; | ||
968 | |||
969 | if (!x86_pmu.percore_constraints || hwc->extra_alloc) | ||
970 | return NULL; | ||
971 | |||
972 | for (c = x86_pmu.percore_constraints; c->cmask; c++) { | ||
973 | if (e != c->code) | ||
974 | continue; | ||
975 | |||
976 | /* | ||
977 | * Allocate resource per core. | ||
978 | */ | ||
979 | pc = cpuc->per_core; | ||
980 | if (!pc) | ||
981 | break; | ||
982 | c = &emptyconstraint; | ||
983 | raw_spin_lock(&pc->lock); | ||
984 | free_slot = -1; | ||
985 | found = 0; | ||
986 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | ||
987 | era = &pc->regs[i]; | ||
988 | if (era->ref > 0 && hwc->extra_reg == era->extra_reg) { | ||
989 | /* Allow sharing same config */ | ||
990 | if (hwc->extra_config == era->extra_config) { | ||
991 | era->ref++; | ||
992 | cpuc->percore_used = 1; | ||
993 | hwc->extra_alloc = 1; | ||
994 | c = NULL; | ||
995 | } | ||
996 | /* else conflict */ | ||
997 | found = 1; | ||
998 | break; | ||
999 | } else if (era->ref == 0 && free_slot == -1) | ||
1000 | free_slot = i; | ||
1001 | } | ||
1002 | if (!found && free_slot != -1) { | ||
1003 | era = &pc->regs[free_slot]; | ||
1004 | era->ref = 1; | ||
1005 | era->extra_reg = hwc->extra_reg; | ||
1006 | era->extra_config = hwc->extra_config; | ||
1007 | cpuc->percore_used = 1; | ||
1008 | hwc->extra_alloc = 1; | ||
1009 | c = NULL; | ||
1010 | } | ||
1011 | raw_spin_unlock(&pc->lock); | ||
1012 | return c; | ||
1013 | } | ||
1014 | |||
1015 | return NULL; | ||
1016 | } | ||
1017 | |||
1018 | static struct event_constraint * | ||
910 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | 1019 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) |
911 | { | 1020 | { |
912 | struct event_constraint *c; | 1021 | struct event_constraint *c; |
@@ -919,9 +1028,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
919 | if (c) | 1028 | if (c) |
920 | return c; | 1029 | return c; |
921 | 1030 | ||
1031 | c = intel_percore_constraints(cpuc, event); | ||
1032 | if (c) | ||
1033 | return c; | ||
1034 | |||
922 | return x86_get_event_constraints(cpuc, event); | 1035 | return x86_get_event_constraints(cpuc, event); |
923 | } | 1036 | } |
924 | 1037 | ||
1038 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | ||
1039 | struct perf_event *event) | ||
1040 | { | ||
1041 | struct extra_reg *er; | ||
1042 | struct intel_percore *pc; | ||
1043 | struct er_account *era; | ||
1044 | struct hw_perf_event *hwc = &event->hw; | ||
1045 | int i, allref; | ||
1046 | |||
1047 | if (!cpuc->percore_used) | ||
1048 | return; | ||
1049 | |||
1050 | for (er = x86_pmu.extra_regs; er->msr; er++) { | ||
1051 | if (er->event != (hwc->config & er->config_mask)) | ||
1052 | continue; | ||
1053 | |||
1054 | pc = cpuc->per_core; | ||
1055 | raw_spin_lock(&pc->lock); | ||
1056 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | ||
1057 | era = &pc->regs[i]; | ||
1058 | if (era->ref > 0 && | ||
1059 | era->extra_config == hwc->extra_config && | ||
1060 | era->extra_reg == er->msr) { | ||
1061 | era->ref--; | ||
1062 | hwc->extra_alloc = 0; | ||
1063 | break; | ||
1064 | } | ||
1065 | } | ||
1066 | allref = 0; | ||
1067 | for (i = 0; i < MAX_EXTRA_REGS; i++) | ||
1068 | allref += pc->regs[i].ref; | ||
1069 | if (allref == 0) | ||
1070 | cpuc->percore_used = 0; | ||
1071 | raw_spin_unlock(&pc->lock); | ||
1072 | break; | ||
1073 | } | ||
1074 | } | ||
1075 | |||
925 | static int intel_pmu_hw_config(struct perf_event *event) | 1076 | static int intel_pmu_hw_config(struct perf_event *event) |
926 | { | 1077 | { |
927 | int ret = x86_pmu_hw_config(event); | 1078 | int ret = x86_pmu_hw_config(event); |
@@ -993,11 +1144,43 @@ static __initconst const struct x86_pmu core_pmu = { | |||
993 | */ | 1144 | */ |
994 | .max_period = (1ULL << 31) - 1, | 1145 | .max_period = (1ULL << 31) - 1, |
995 | .get_event_constraints = intel_get_event_constraints, | 1146 | .get_event_constraints = intel_get_event_constraints, |
1147 | .put_event_constraints = intel_put_event_constraints, | ||
996 | .event_constraints = intel_core_event_constraints, | 1148 | .event_constraints = intel_core_event_constraints, |
997 | }; | 1149 | }; |
998 | 1150 | ||
1151 | static int intel_pmu_cpu_prepare(int cpu) | ||
1152 | { | ||
1153 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1154 | |||
1155 | cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), | ||
1156 | GFP_KERNEL, cpu_to_node(cpu)); | ||
1157 | if (!cpuc->per_core) | ||
1158 | return NOTIFY_BAD; | ||
1159 | |||
1160 | raw_spin_lock_init(&cpuc->per_core->lock); | ||
1161 | cpuc->per_core->core_id = -1; | ||
1162 | return NOTIFY_OK; | ||
1163 | } | ||
1164 | |||
999 | static void intel_pmu_cpu_starting(int cpu) | 1165 | static void intel_pmu_cpu_starting(int cpu) |
1000 | { | 1166 | { |
1167 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1168 | int core_id = topology_core_id(cpu); | ||
1169 | int i; | ||
1170 | |||
1171 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | ||
1172 | struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; | ||
1173 | |||
1174 | if (pc && pc->core_id == core_id) { | ||
1175 | kfree(cpuc->per_core); | ||
1176 | cpuc->per_core = pc; | ||
1177 | break; | ||
1178 | } | ||
1179 | } | ||
1180 | |||
1181 | cpuc->per_core->core_id = core_id; | ||
1182 | cpuc->per_core->refcnt++; | ||
1183 | |||
1001 | init_debug_store_on_cpu(cpu); | 1184 | init_debug_store_on_cpu(cpu); |
1002 | /* | 1185 | /* |
1003 | * Deal with CPUs that don't clear their LBRs on power-up. | 1186 | * Deal with CPUs that don't clear their LBRs on power-up. |
@@ -1007,6 +1190,15 @@ static void intel_pmu_cpu_starting(int cpu) | |||
1007 | 1190 | ||
1008 | static void intel_pmu_cpu_dying(int cpu) | 1191 | static void intel_pmu_cpu_dying(int cpu) |
1009 | { | 1192 | { |
1193 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1194 | struct intel_percore *pc = cpuc->per_core; | ||
1195 | |||
1196 | if (pc) { | ||
1197 | if (pc->core_id == -1 || --pc->refcnt == 0) | ||
1198 | kfree(pc); | ||
1199 | cpuc->per_core = NULL; | ||
1200 | } | ||
1201 | |||
1010 | fini_debug_store_on_cpu(cpu); | 1202 | fini_debug_store_on_cpu(cpu); |
1011 | } | 1203 | } |
1012 | 1204 | ||
@@ -1031,7 +1223,9 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
1031 | */ | 1223 | */ |
1032 | .max_period = (1ULL << 31) - 1, | 1224 | .max_period = (1ULL << 31) - 1, |
1033 | .get_event_constraints = intel_get_event_constraints, | 1225 | .get_event_constraints = intel_get_event_constraints, |
1226 | .put_event_constraints = intel_put_event_constraints, | ||
1034 | 1227 | ||
1228 | .cpu_prepare = intel_pmu_cpu_prepare, | ||
1035 | .cpu_starting = intel_pmu_cpu_starting, | 1229 | .cpu_starting = intel_pmu_cpu_starting, |
1036 | .cpu_dying = intel_pmu_cpu_dying, | 1230 | .cpu_dying = intel_pmu_cpu_dying, |
1037 | }; | 1231 | }; |
@@ -1151,7 +1345,9 @@ static __init int intel_pmu_init(void) | |||
1151 | 1345 | ||
1152 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 1346 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
1153 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; | 1347 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; |
1348 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; | ||
1154 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1349 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1350 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | ||
1155 | pr_cont("Nehalem events, "); | 1351 | pr_cont("Nehalem events, "); |
1156 | break; | 1352 | break; |
1157 | 1353 | ||
@@ -1174,8 +1370,10 @@ static __init int intel_pmu_init(void) | |||
1174 | intel_pmu_lbr_init_nhm(); | 1370 | intel_pmu_lbr_init_nhm(); |
1175 | 1371 | ||
1176 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 1372 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
1373 | x86_pmu.percore_constraints = intel_westmere_percore_constraints; | ||
1177 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1374 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1178 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; | 1375 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; |
1376 | x86_pmu.extra_regs = intel_westmere_extra_regs; | ||
1179 | pr_cont("Westmere events, "); | 1377 | pr_cont("Westmere events, "); |
1180 | break; | 1378 | break; |
1181 | 1379 | ||