aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event_intel.c
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2011-03-02 21:34:47 -0500
committerIngo Molnar <mingo@elte.hu>2011-03-04 05:32:53 -0500
commita7e3ed1e470116c9d12c2f778431a481a6be8ab6 (patch)
tree10e72043f3eb0d6a31fe27188f74267a5796dbcd /arch/x86/kernel/cpu/perf_event_intel.c
parent17e3162972cbb9796035fff1e2fd30669b0eef65 (diff)
perf: Add support for supplementary event registers
Change logs against Andi's original version: - Extends perf_event_attr:config to config{,1,2} (Peter Zijlstra) - Fixed a major event scheduling issue. There cannot be a ref++ on an event that has already done ref++ once and without calling put_constraint() in between. (Stephane Eranian) - Use thread_cpumask for percore allocation. (Lin Ming) - Use MSR names in the extra reg lists. (Lin Ming) - Remove redundant "c = NULL" in intel_percore_constraints - Fix comment of perf_event_attr::config1 Intel Nehalem/Westmere have a special OFFCORE_RESPONSE event that can be used to monitor any offcore accesses from a core. This is a very useful event for various tunings, and it's also needed to implement the generic LLC-* events correctly. Unfortunately this event requires programming a mask in a separate register. And worse this separate register is per core, not per CPU thread. This patch: - Teaches perf_events that OFFCORE_RESPONSE needs extra parameters. The extra parameters are passed by user space in the perf_event_attr::config1 field. - Adds support to the Intel perf_event core to schedule per core resources. This adds fairly generic infrastructure that can be also used for other per core resources. The basic code has is patterned after the similar AMD northbridge constraints code. Thanks to Stephane Eranian who pointed out some problems in the original version and suggested improvements. Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Lin Ming <ming.m.lin@intel.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1299119690-13991-2-git-send-email-ming.m.lin@intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c198
1 files changed, 198 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index c3ce053ecb46..13cb6cf013f6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,5 +1,27 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#ifdef CONFIG_CPU_SUP_INTEL
2 2
3#define MAX_EXTRA_REGS 2
4
5/*
6 * Per register state.
7 */
8struct er_account {
9 int ref; /* reference count */
10 unsigned int extra_reg; /* extra MSR number */
11 u64 extra_config; /* extra MSR config */
12};
13
14/*
15 * Per core state
16 * This used to coordinate shared registers for HT threads.
17 */
18struct intel_percore {
19 raw_spinlock_t lock; /* protect structure */
20 struct er_account regs[MAX_EXTRA_REGS];
21 int refcnt; /* number of threads */
22 unsigned core_id;
23};
24
3/* 25/*
4 * Intel PerfMon, used on Core and later. 26 * Intel PerfMon, used on Core and later.
5 */ 27 */
@@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] =
64 EVENT_CONSTRAINT_END 86 EVENT_CONSTRAINT_END
65}; 87};
66 88
89static struct extra_reg intel_nehalem_extra_regs[] =
90{
91 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
92 EVENT_EXTRA_END
93};
94
95static struct event_constraint intel_nehalem_percore_constraints[] =
96{
97 INTEL_EVENT_CONSTRAINT(0xb7, 0),
98 EVENT_CONSTRAINT_END
99};
100
67static struct event_constraint intel_westmere_event_constraints[] = 101static struct event_constraint intel_westmere_event_constraints[] =
68{ 102{
69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -89,6 +123,20 @@ static struct event_constraint intel_snb_event_constraints[] =
89 EVENT_CONSTRAINT_END 123 EVENT_CONSTRAINT_END
90}; 124};
91 125
126static struct extra_reg intel_westmere_extra_regs[] =
127{
128 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
129 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
130 EVENT_EXTRA_END
131};
132
133static struct event_constraint intel_westmere_percore_constraints[] =
134{
135 INTEL_EVENT_CONSTRAINT(0xb7, 0),
136 INTEL_EVENT_CONSTRAINT(0xbb, 0),
137 EVENT_CONSTRAINT_END
138};
139
92static struct event_constraint intel_gen_event_constraints[] = 140static struct event_constraint intel_gen_event_constraints[] =
93{ 141{
94 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 142 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -907,6 +955,67 @@ intel_bts_constraints(struct perf_event *event)
907} 955}
908 956
909static struct event_constraint * 957static struct event_constraint *
958intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
959{
960 struct hw_perf_event *hwc = &event->hw;
961 unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
962 struct event_constraint *c;
963 struct intel_percore *pc;
964 struct er_account *era;
965 int i;
966 int free_slot;
967 int found;
968
969 if (!x86_pmu.percore_constraints || hwc->extra_alloc)
970 return NULL;
971
972 for (c = x86_pmu.percore_constraints; c->cmask; c++) {
973 if (e != c->code)
974 continue;
975
976 /*
977 * Allocate resource per core.
978 */
979 pc = cpuc->per_core;
980 if (!pc)
981 break;
982 c = &emptyconstraint;
983 raw_spin_lock(&pc->lock);
984 free_slot = -1;
985 found = 0;
986 for (i = 0; i < MAX_EXTRA_REGS; i++) {
987 era = &pc->regs[i];
988 if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
989 /* Allow sharing same config */
990 if (hwc->extra_config == era->extra_config) {
991 era->ref++;
992 cpuc->percore_used = 1;
993 hwc->extra_alloc = 1;
994 c = NULL;
995 }
996 /* else conflict */
997 found = 1;
998 break;
999 } else if (era->ref == 0 && free_slot == -1)
1000 free_slot = i;
1001 }
1002 if (!found && free_slot != -1) {
1003 era = &pc->regs[free_slot];
1004 era->ref = 1;
1005 era->extra_reg = hwc->extra_reg;
1006 era->extra_config = hwc->extra_config;
1007 cpuc->percore_used = 1;
1008 hwc->extra_alloc = 1;
1009 c = NULL;
1010 }
1011 raw_spin_unlock(&pc->lock);
1012 return c;
1013 }
1014
1015 return NULL;
1016}
1017
1018static struct event_constraint *
910intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 1019intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
911{ 1020{
912 struct event_constraint *c; 1021 struct event_constraint *c;
@@ -919,9 +1028,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
919 if (c) 1028 if (c)
920 return c; 1029 return c;
921 1030
1031 c = intel_percore_constraints(cpuc, event);
1032 if (c)
1033 return c;
1034
922 return x86_get_event_constraints(cpuc, event); 1035 return x86_get_event_constraints(cpuc, event);
923} 1036}
924 1037
1038static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1039 struct perf_event *event)
1040{
1041 struct extra_reg *er;
1042 struct intel_percore *pc;
1043 struct er_account *era;
1044 struct hw_perf_event *hwc = &event->hw;
1045 int i, allref;
1046
1047 if (!cpuc->percore_used)
1048 return;
1049
1050 for (er = x86_pmu.extra_regs; er->msr; er++) {
1051 if (er->event != (hwc->config & er->config_mask))
1052 continue;
1053
1054 pc = cpuc->per_core;
1055 raw_spin_lock(&pc->lock);
1056 for (i = 0; i < MAX_EXTRA_REGS; i++) {
1057 era = &pc->regs[i];
1058 if (era->ref > 0 &&
1059 era->extra_config == hwc->extra_config &&
1060 era->extra_reg == er->msr) {
1061 era->ref--;
1062 hwc->extra_alloc = 0;
1063 break;
1064 }
1065 }
1066 allref = 0;
1067 for (i = 0; i < MAX_EXTRA_REGS; i++)
1068 allref += pc->regs[i].ref;
1069 if (allref == 0)
1070 cpuc->percore_used = 0;
1071 raw_spin_unlock(&pc->lock);
1072 break;
1073 }
1074}
1075
925static int intel_pmu_hw_config(struct perf_event *event) 1076static int intel_pmu_hw_config(struct perf_event *event)
926{ 1077{
927 int ret = x86_pmu_hw_config(event); 1078 int ret = x86_pmu_hw_config(event);
@@ -993,11 +1144,43 @@ static __initconst const struct x86_pmu core_pmu = {
993 */ 1144 */
994 .max_period = (1ULL << 31) - 1, 1145 .max_period = (1ULL << 31) - 1,
995 .get_event_constraints = intel_get_event_constraints, 1146 .get_event_constraints = intel_get_event_constraints,
1147 .put_event_constraints = intel_put_event_constraints,
996 .event_constraints = intel_core_event_constraints, 1148 .event_constraints = intel_core_event_constraints,
997}; 1149};
998 1150
1151static int intel_pmu_cpu_prepare(int cpu)
1152{
1153 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1154
1155 cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
1156 GFP_KERNEL, cpu_to_node(cpu));
1157 if (!cpuc->per_core)
1158 return NOTIFY_BAD;
1159
1160 raw_spin_lock_init(&cpuc->per_core->lock);
1161 cpuc->per_core->core_id = -1;
1162 return NOTIFY_OK;
1163}
1164
999static void intel_pmu_cpu_starting(int cpu) 1165static void intel_pmu_cpu_starting(int cpu)
1000{ 1166{
1167 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1168 int core_id = topology_core_id(cpu);
1169 int i;
1170
1171 for_each_cpu(i, topology_thread_cpumask(cpu)) {
1172 struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
1173
1174 if (pc && pc->core_id == core_id) {
1175 kfree(cpuc->per_core);
1176 cpuc->per_core = pc;
1177 break;
1178 }
1179 }
1180
1181 cpuc->per_core->core_id = core_id;
1182 cpuc->per_core->refcnt++;
1183
1001 init_debug_store_on_cpu(cpu); 1184 init_debug_store_on_cpu(cpu);
1002 /* 1185 /*
1003 * Deal with CPUs that don't clear their LBRs on power-up. 1186 * Deal with CPUs that don't clear their LBRs on power-up.
@@ -1007,6 +1190,15 @@ static void intel_pmu_cpu_starting(int cpu)
1007 1190
1008static void intel_pmu_cpu_dying(int cpu) 1191static void intel_pmu_cpu_dying(int cpu)
1009{ 1192{
1193 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1194 struct intel_percore *pc = cpuc->per_core;
1195
1196 if (pc) {
1197 if (pc->core_id == -1 || --pc->refcnt == 0)
1198 kfree(pc);
1199 cpuc->per_core = NULL;
1200 }
1201
1010 fini_debug_store_on_cpu(cpu); 1202 fini_debug_store_on_cpu(cpu);
1011} 1203}
1012 1204
@@ -1031,7 +1223,9 @@ static __initconst const struct x86_pmu intel_pmu = {
1031 */ 1223 */
1032 .max_period = (1ULL << 31) - 1, 1224 .max_period = (1ULL << 31) - 1,
1033 .get_event_constraints = intel_get_event_constraints, 1225 .get_event_constraints = intel_get_event_constraints,
1226 .put_event_constraints = intel_put_event_constraints,
1034 1227
1228 .cpu_prepare = intel_pmu_cpu_prepare,
1035 .cpu_starting = intel_pmu_cpu_starting, 1229 .cpu_starting = intel_pmu_cpu_starting,
1036 .cpu_dying = intel_pmu_cpu_dying, 1230 .cpu_dying = intel_pmu_cpu_dying,
1037}; 1231};
@@ -1151,7 +1345,9 @@ static __init int intel_pmu_init(void)
1151 1345
1152 x86_pmu.event_constraints = intel_nehalem_event_constraints; 1346 x86_pmu.event_constraints = intel_nehalem_event_constraints;
1153 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; 1347 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
1348 x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
1154 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1349 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1350 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1155 pr_cont("Nehalem events, "); 1351 pr_cont("Nehalem events, ");
1156 break; 1352 break;
1157 1353
@@ -1174,8 +1370,10 @@ static __init int intel_pmu_init(void)
1174 intel_pmu_lbr_init_nhm(); 1370 intel_pmu_lbr_init_nhm();
1175 1371
1176 x86_pmu.event_constraints = intel_westmere_event_constraints; 1372 x86_pmu.event_constraints = intel_westmere_event_constraints;
1373 x86_pmu.percore_constraints = intel_westmere_percore_constraints;
1177 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1374 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1178 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; 1375 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
1376 x86_pmu.extra_regs = intel_westmere_extra_regs;
1179 pr_cont("Westmere events, "); 1377 pr_cont("Westmere events, ");
1180 break; 1378 break;
1181 1379