aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/msr-index.h3
-rw-r--r--arch/x86/kernel/cpu/perf_event.c64
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c198
3 files changed, 265 insertions, 0 deletions
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4d0dfa0d998e..d25e74cc1a50 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -47,6 +47,9 @@
47#define MSR_IA32_MCG_STATUS 0x0000017a 47#define MSR_IA32_MCG_STATUS 0x0000017a
48#define MSR_IA32_MCG_CTL 0x0000017b 48#define MSR_IA32_MCG_CTL 0x0000017b
49 49
50#define MSR_OFFCORE_RSP_0 0x000001a6
51#define MSR_OFFCORE_RSP_1 0x000001a7
52
50#define MSR_IA32_PEBS_ENABLE 0x000003f1 53#define MSR_IA32_PEBS_ENABLE 0x000003f1
51#define MSR_IA32_DS_AREA 0x00000600 54#define MSR_IA32_DS_AREA 0x00000600
52#define MSR_IA32_PERF_CAPABILITIES 0x00000345 55#define MSR_IA32_PERF_CAPABILITIES 0x00000345
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ea03c725e465..ec6a6db07332 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -93,6 +93,8 @@ struct amd_nb {
93 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 93 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
94}; 94};
95 95
96struct intel_percore;
97
96#define MAX_LBR_ENTRIES 16 98#define MAX_LBR_ENTRIES 16
97 99
98struct cpu_hw_events { 100struct cpu_hw_events {
@@ -128,6 +130,13 @@ struct cpu_hw_events {
128 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; 130 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
129 131
130 /* 132 /*
133 * Intel percore register state.
134 * Coordinate shared resources between HT threads.
135 */
136 int percore_used; /* Used by this CPU? */
137 struct intel_percore *per_core;
138
139 /*
131 * AMD specific bits 140 * AMD specific bits
132 */ 141 */
133 struct amd_nb *amd_nb; 142 struct amd_nb *amd_nb;
@@ -177,6 +186,28 @@ struct cpu_hw_events {
177#define for_each_event_constraint(e, c) \ 186#define for_each_event_constraint(e, c) \
178 for ((e) = (c); (e)->weight; (e)++) 187 for ((e) = (c); (e)->weight; (e)++)
179 188
189/*
190 * Extra registers for specific events.
191 * Some events need large masks and require external MSRs.
192 * Define a mapping to these extra registers.
193 */
194struct extra_reg {
195 unsigned int event;
196 unsigned int msr;
197 u64 config_mask;
198 u64 valid_mask;
199};
200
201#define EVENT_EXTRA_REG(e, ms, m, vm) { \
202 .event = (e), \
203 .msr = (ms), \
204 .config_mask = (m), \
205 .valid_mask = (vm), \
206 }
207#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
208 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
209#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
210
180union perf_capabilities { 211union perf_capabilities {
181 struct { 212 struct {
182 u64 lbr_format : 6; 213 u64 lbr_format : 6;
@@ -221,6 +252,7 @@ struct x86_pmu {
221 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 252 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
222 struct perf_event *event); 253 struct perf_event *event);
223 struct event_constraint *event_constraints; 254 struct event_constraint *event_constraints;
255 struct event_constraint *percore_constraints;
224 void (*quirks)(void); 256 void (*quirks)(void);
225 int perfctr_second_write; 257 int perfctr_second_write;
226 258
@@ -249,6 +281,11 @@ struct x86_pmu {
249 */ 281 */
250 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ 282 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
251 int lbr_nr; /* hardware stack size */ 283 int lbr_nr; /* hardware stack size */
284
285 /*
286 * Extra registers for events
287 */
288 struct extra_reg *extra_regs;
252}; 289};
253 290
254static struct x86_pmu x86_pmu __read_mostly; 291static struct x86_pmu x86_pmu __read_mostly;
@@ -341,6 +378,31 @@ static inline unsigned int x86_pmu_event_addr(int index)
341 return x86_pmu.perfctr + x86_pmu_addr_offset(index); 378 return x86_pmu.perfctr + x86_pmu_addr_offset(index);
342} 379}
343 380
381/*
382 * Find and validate any extra registers to set up.
383 */
384static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
385{
386 struct extra_reg *er;
387
388 event->hw.extra_reg = 0;
389 event->hw.extra_config = 0;
390
391 if (!x86_pmu.extra_regs)
392 return 0;
393
394 for (er = x86_pmu.extra_regs; er->msr; er++) {
395 if (er->event != (config & er->config_mask))
396 continue;
397 if (event->attr.config1 & ~er->valid_mask)
398 return -EINVAL;
399 event->hw.extra_reg = er->msr;
400 event->hw.extra_config = event->attr.config1;
401 break;
402 }
403 return 0;
404}
405
344static atomic_t active_events; 406static atomic_t active_events;
345static DEFINE_MUTEX(pmc_reserve_mutex); 407static DEFINE_MUTEX(pmc_reserve_mutex);
346 408
@@ -665,6 +727,8 @@ static void x86_pmu_disable(struct pmu *pmu)
665static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, 727static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
666 u64 enable_mask) 728 u64 enable_mask)
667{ 729{
730 if (hwc->extra_reg)
731 wrmsrl(hwc->extra_reg, hwc->extra_config);
668 wrmsrl(hwc->config_base, hwc->config | enable_mask); 732 wrmsrl(hwc->config_base, hwc->config | enable_mask);
669} 733}
670 734
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index c3ce053ecb46..13cb6cf013f6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,5 +1,27 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#ifdef CONFIG_CPU_SUP_INTEL
2 2
3#define MAX_EXTRA_REGS 2
4
5/*
6 * Per register state.
7 */
8struct er_account {
9 int ref; /* reference count */
10 unsigned int extra_reg; /* extra MSR number */
11 u64 extra_config; /* extra MSR config */
12};
13
14/*
15 * Per core state
16 * This used to coordinate shared registers for HT threads.
17 */
18struct intel_percore {
19 raw_spinlock_t lock; /* protect structure */
20 struct er_account regs[MAX_EXTRA_REGS];
21 int refcnt; /* number of threads */
22 unsigned core_id;
23};
24
3/* 25/*
4 * Intel PerfMon, used on Core and later. 26 * Intel PerfMon, used on Core and later.
5 */ 27 */
@@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] =
64 EVENT_CONSTRAINT_END 86 EVENT_CONSTRAINT_END
65}; 87};
66 88
89static struct extra_reg intel_nehalem_extra_regs[] =
90{
91 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
92 EVENT_EXTRA_END
93};
94
95static struct event_constraint intel_nehalem_percore_constraints[] =
96{
97 INTEL_EVENT_CONSTRAINT(0xb7, 0),
98 EVENT_CONSTRAINT_END
99};
100
67static struct event_constraint intel_westmere_event_constraints[] = 101static struct event_constraint intel_westmere_event_constraints[] =
68{ 102{
69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -89,6 +123,20 @@ static struct event_constraint intel_snb_event_constraints[] =
89 EVENT_CONSTRAINT_END 123 EVENT_CONSTRAINT_END
90}; 124};
91 125
126static struct extra_reg intel_westmere_extra_regs[] =
127{
128 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
129 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
130 EVENT_EXTRA_END
131};
132
133static struct event_constraint intel_westmere_percore_constraints[] =
134{
135 INTEL_EVENT_CONSTRAINT(0xb7, 0),
136 INTEL_EVENT_CONSTRAINT(0xbb, 0),
137 EVENT_CONSTRAINT_END
138};
139
92static struct event_constraint intel_gen_event_constraints[] = 140static struct event_constraint intel_gen_event_constraints[] =
93{ 141{
94 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 142 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -907,6 +955,67 @@ intel_bts_constraints(struct perf_event *event)
907} 955}
908 956
909static struct event_constraint * 957static struct event_constraint *
958intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
959{
960 struct hw_perf_event *hwc = &event->hw;
961 unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
962 struct event_constraint *c;
963 struct intel_percore *pc;
964 struct er_account *era;
965 int i;
966 int free_slot;
967 int found;
968
969 if (!x86_pmu.percore_constraints || hwc->extra_alloc)
970 return NULL;
971
972 for (c = x86_pmu.percore_constraints; c->cmask; c++) {
973 if (e != c->code)
974 continue;
975
976 /*
977 * Allocate resource per core.
978 */
979 pc = cpuc->per_core;
980 if (!pc)
981 break;
982 c = &emptyconstraint;
983 raw_spin_lock(&pc->lock);
984 free_slot = -1;
985 found = 0;
986 for (i = 0; i < MAX_EXTRA_REGS; i++) {
987 era = &pc->regs[i];
988 if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
989 /* Allow sharing same config */
990 if (hwc->extra_config == era->extra_config) {
991 era->ref++;
992 cpuc->percore_used = 1;
993 hwc->extra_alloc = 1;
994 c = NULL;
995 }
996 /* else conflict */
997 found = 1;
998 break;
999 } else if (era->ref == 0 && free_slot == -1)
1000 free_slot = i;
1001 }
1002 if (!found && free_slot != -1) {
1003 era = &pc->regs[free_slot];
1004 era->ref = 1;
1005 era->extra_reg = hwc->extra_reg;
1006 era->extra_config = hwc->extra_config;
1007 cpuc->percore_used = 1;
1008 hwc->extra_alloc = 1;
1009 c = NULL;
1010 }
1011 raw_spin_unlock(&pc->lock);
1012 return c;
1013 }
1014
1015 return NULL;
1016}
1017
1018static struct event_constraint *
910intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 1019intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
911{ 1020{
912 struct event_constraint *c; 1021 struct event_constraint *c;
@@ -919,9 +1028,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
919 if (c) 1028 if (c)
920 return c; 1029 return c;
921 1030
1031 c = intel_percore_constraints(cpuc, event);
1032 if (c)
1033 return c;
1034
922 return x86_get_event_constraints(cpuc, event); 1035 return x86_get_event_constraints(cpuc, event);
923} 1036}
924 1037
1038static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1039 struct perf_event *event)
1040{
1041 struct extra_reg *er;
1042 struct intel_percore *pc;
1043 struct er_account *era;
1044 struct hw_perf_event *hwc = &event->hw;
1045 int i, allref;
1046
1047 if (!cpuc->percore_used)
1048 return;
1049
1050 for (er = x86_pmu.extra_regs; er->msr; er++) {
1051 if (er->event != (hwc->config & er->config_mask))
1052 continue;
1053
1054 pc = cpuc->per_core;
1055 raw_spin_lock(&pc->lock);
1056 for (i = 0; i < MAX_EXTRA_REGS; i++) {
1057 era = &pc->regs[i];
1058 if (era->ref > 0 &&
1059 era->extra_config == hwc->extra_config &&
1060 era->extra_reg == er->msr) {
1061 era->ref--;
1062 hwc->extra_alloc = 0;
1063 break;
1064 }
1065 }
1066 allref = 0;
1067 for (i = 0; i < MAX_EXTRA_REGS; i++)
1068 allref += pc->regs[i].ref;
1069 if (allref == 0)
1070 cpuc->percore_used = 0;
1071 raw_spin_unlock(&pc->lock);
1072 break;
1073 }
1074}
1075
925static int intel_pmu_hw_config(struct perf_event *event) 1076static int intel_pmu_hw_config(struct perf_event *event)
926{ 1077{
927 int ret = x86_pmu_hw_config(event); 1078 int ret = x86_pmu_hw_config(event);
@@ -993,11 +1144,43 @@ static __initconst const struct x86_pmu core_pmu = {
993 */ 1144 */
994 .max_period = (1ULL << 31) - 1, 1145 .max_period = (1ULL << 31) - 1,
995 .get_event_constraints = intel_get_event_constraints, 1146 .get_event_constraints = intel_get_event_constraints,
1147 .put_event_constraints = intel_put_event_constraints,
996 .event_constraints = intel_core_event_constraints, 1148 .event_constraints = intel_core_event_constraints,
997}; 1149};
998 1150
1151static int intel_pmu_cpu_prepare(int cpu)
1152{
1153 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1154
1155 cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
1156 GFP_KERNEL, cpu_to_node(cpu));
1157 if (!cpuc->per_core)
1158 return NOTIFY_BAD;
1159
1160 raw_spin_lock_init(&cpuc->per_core->lock);
1161 cpuc->per_core->core_id = -1;
1162 return NOTIFY_OK;
1163}
1164
999static void intel_pmu_cpu_starting(int cpu) 1165static void intel_pmu_cpu_starting(int cpu)
1000{ 1166{
1167 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1168 int core_id = topology_core_id(cpu);
1169 int i;
1170
1171 for_each_cpu(i, topology_thread_cpumask(cpu)) {
1172 struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
1173
1174 if (pc && pc->core_id == core_id) {
1175 kfree(cpuc->per_core);
1176 cpuc->per_core = pc;
1177 break;
1178 }
1179 }
1180
1181 cpuc->per_core->core_id = core_id;
1182 cpuc->per_core->refcnt++;
1183
1001 init_debug_store_on_cpu(cpu); 1184 init_debug_store_on_cpu(cpu);
1002 /* 1185 /*
1003 * Deal with CPUs that don't clear their LBRs on power-up. 1186 * Deal with CPUs that don't clear their LBRs on power-up.
@@ -1007,6 +1190,15 @@ static void intel_pmu_cpu_starting(int cpu)
1007 1190
1008static void intel_pmu_cpu_dying(int cpu) 1191static void intel_pmu_cpu_dying(int cpu)
1009{ 1192{
1193 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1194 struct intel_percore *pc = cpuc->per_core;
1195
1196 if (pc) {
1197 if (pc->core_id == -1 || --pc->refcnt == 0)
1198 kfree(pc);
1199 cpuc->per_core = NULL;
1200 }
1201
1010 fini_debug_store_on_cpu(cpu); 1202 fini_debug_store_on_cpu(cpu);
1011} 1203}
1012 1204
@@ -1031,7 +1223,9 @@ static __initconst const struct x86_pmu intel_pmu = {
1031 */ 1223 */
1032 .max_period = (1ULL << 31) - 1, 1224 .max_period = (1ULL << 31) - 1,
1033 .get_event_constraints = intel_get_event_constraints, 1225 .get_event_constraints = intel_get_event_constraints,
1226 .put_event_constraints = intel_put_event_constraints,
1034 1227
1228 .cpu_prepare = intel_pmu_cpu_prepare,
1035 .cpu_starting = intel_pmu_cpu_starting, 1229 .cpu_starting = intel_pmu_cpu_starting,
1036 .cpu_dying = intel_pmu_cpu_dying, 1230 .cpu_dying = intel_pmu_cpu_dying,
1037}; 1231};
@@ -1151,7 +1345,9 @@ static __init int intel_pmu_init(void)
1151 1345
1152 x86_pmu.event_constraints = intel_nehalem_event_constraints; 1346 x86_pmu.event_constraints = intel_nehalem_event_constraints;
1153 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; 1347 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
1348 x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
1154 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1349 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1350 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1155 pr_cont("Nehalem events, "); 1351 pr_cont("Nehalem events, ");
1156 break; 1352 break;
1157 1353
@@ -1174,8 +1370,10 @@ static __init int intel_pmu_init(void)
1174 intel_pmu_lbr_init_nhm(); 1370 intel_pmu_lbr_init_nhm();
1175 1371
1176 x86_pmu.event_constraints = intel_westmere_event_constraints; 1372 x86_pmu.event_constraints = intel_westmere_event_constraints;
1373 x86_pmu.percore_constraints = intel_westmere_percore_constraints;
1177 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1374 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1178 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; 1375 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
1376 x86_pmu.extra_regs = intel_westmere_extra_regs;
1179 pr_cont("Westmere events, "); 1377 pr_cont("Westmere events, ");
1180 break; 1378 break;
1181 1379