diff options
author | Andi Kleen <ak@linux.intel.com> | 2011-03-02 21:34:47 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-03-04 05:32:53 -0500 |
commit | a7e3ed1e470116c9d12c2f778431a481a6be8ab6 (patch) | |
tree | 10e72043f3eb0d6a31fe27188f74267a5796dbcd /arch/x86/kernel/cpu/perf_event_intel.c | |
parent | 17e3162972cbb9796035fff1e2fd30669b0eef65 (diff) |
perf: Add support for supplementary event registers
Change logs against Andi's original version:
- Extends perf_event_attr:config to config{,1,2} (Peter Zijlstra)
- Fixed a major event scheduling issue. There cannot be a ref++ on an
event that has already done ref++ once and without calling
put_constraint() in between. (Stephane Eranian)
- Use thread_cpumask for percore allocation. (Lin Ming)
- Use MSR names in the extra reg lists. (Lin Ming)
- Remove redundant "c = NULL" in intel_percore_constraints
- Fix comment of perf_event_attr::config1
Intel Nehalem/Westmere have a special OFFCORE_RESPONSE event
that can be used to monitor any offcore accesses from a core.
This is a very useful event for various tunings, and it's
also needed to implement the generic LLC-* events correctly.
Unfortunately this event requires programming a mask in a separate
register. And worse this separate register is per core, not per
CPU thread.
This patch:
- Teaches perf_events that OFFCORE_RESPONSE needs extra parameters.
The extra parameters are passed by user space in the
perf_event_attr::config1 field.
- Adds support to the Intel perf_event core to schedule per
core resources. This adds fairly generic infrastructure that
can be also used for other per core resources.
The basic code has is patterned after the similar AMD northbridge
constraints code.
Thanks to Stephane Eranian who pointed out some problems
in the original version and suggested improvements.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1299119690-13991-2-git-send-email-ming.m.lin@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index c3ce053ecb46..13cb6cf013f6 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1,5 +1,27 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | 1 | #ifdef CONFIG_CPU_SUP_INTEL |
2 | 2 | ||
3 | #define MAX_EXTRA_REGS 2 | ||
4 | |||
5 | /* | ||
6 | * Per register state. | ||
7 | */ | ||
8 | struct er_account { | ||
9 | int ref; /* reference count */ | ||
10 | unsigned int extra_reg; /* extra MSR number */ | ||
11 | u64 extra_config; /* extra MSR config */ | ||
12 | }; | ||
13 | |||
14 | /* | ||
15 | * Per core state | ||
16 | * This used to coordinate shared registers for HT threads. | ||
17 | */ | ||
18 | struct intel_percore { | ||
19 | raw_spinlock_t lock; /* protect structure */ | ||
20 | struct er_account regs[MAX_EXTRA_REGS]; | ||
21 | int refcnt; /* number of threads */ | ||
22 | unsigned core_id; | ||
23 | }; | ||
24 | |||
3 | /* | 25 | /* |
4 | * Intel PerfMon, used on Core and later. | 26 | * Intel PerfMon, used on Core and later. |
5 | */ | 27 | */ |
@@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] = | |||
64 | EVENT_CONSTRAINT_END | 86 | EVENT_CONSTRAINT_END |
65 | }; | 87 | }; |
66 | 88 | ||
89 | static struct extra_reg intel_nehalem_extra_regs[] = | ||
90 | { | ||
91 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | ||
92 | EVENT_EXTRA_END | ||
93 | }; | ||
94 | |||
95 | static struct event_constraint intel_nehalem_percore_constraints[] = | ||
96 | { | ||
97 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
98 | EVENT_CONSTRAINT_END | ||
99 | }; | ||
100 | |||
67 | static struct event_constraint intel_westmere_event_constraints[] = | 101 | static struct event_constraint intel_westmere_event_constraints[] = |
68 | { | 102 | { |
69 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -89,6 +123,20 @@ static struct event_constraint intel_snb_event_constraints[] = | |||
89 | EVENT_CONSTRAINT_END | 123 | EVENT_CONSTRAINT_END |
90 | }; | 124 | }; |
91 | 125 | ||
126 | static struct extra_reg intel_westmere_extra_regs[] = | ||
127 | { | ||
128 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | ||
129 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), | ||
130 | EVENT_EXTRA_END | ||
131 | }; | ||
132 | |||
133 | static struct event_constraint intel_westmere_percore_constraints[] = | ||
134 | { | ||
135 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
136 | INTEL_EVENT_CONSTRAINT(0xbb, 0), | ||
137 | EVENT_CONSTRAINT_END | ||
138 | }; | ||
139 | |||
92 | static struct event_constraint intel_gen_event_constraints[] = | 140 | static struct event_constraint intel_gen_event_constraints[] = |
93 | { | 141 | { |
94 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 142 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -907,6 +955,67 @@ intel_bts_constraints(struct perf_event *event) | |||
907 | } | 955 | } |
908 | 956 | ||
909 | static struct event_constraint * | 957 | static struct event_constraint * |
958 | intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
959 | { | ||
960 | struct hw_perf_event *hwc = &event->hw; | ||
961 | unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; | ||
962 | struct event_constraint *c; | ||
963 | struct intel_percore *pc; | ||
964 | struct er_account *era; | ||
965 | int i; | ||
966 | int free_slot; | ||
967 | int found; | ||
968 | |||
969 | if (!x86_pmu.percore_constraints || hwc->extra_alloc) | ||
970 | return NULL; | ||
971 | |||
972 | for (c = x86_pmu.percore_constraints; c->cmask; c++) { | ||
973 | if (e != c->code) | ||
974 | continue; | ||
975 | |||
976 | /* | ||
977 | * Allocate resource per core. | ||
978 | */ | ||
979 | pc = cpuc->per_core; | ||
980 | if (!pc) | ||
981 | break; | ||
982 | c = &emptyconstraint; | ||
983 | raw_spin_lock(&pc->lock); | ||
984 | free_slot = -1; | ||
985 | found = 0; | ||
986 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | ||
987 | era = &pc->regs[i]; | ||
988 | if (era->ref > 0 && hwc->extra_reg == era->extra_reg) { | ||
989 | /* Allow sharing same config */ | ||
990 | if (hwc->extra_config == era->extra_config) { | ||
991 | era->ref++; | ||
992 | cpuc->percore_used = 1; | ||
993 | hwc->extra_alloc = 1; | ||
994 | c = NULL; | ||
995 | } | ||
996 | /* else conflict */ | ||
997 | found = 1; | ||
998 | break; | ||
999 | } else if (era->ref == 0 && free_slot == -1) | ||
1000 | free_slot = i; | ||
1001 | } | ||
1002 | if (!found && free_slot != -1) { | ||
1003 | era = &pc->regs[free_slot]; | ||
1004 | era->ref = 1; | ||
1005 | era->extra_reg = hwc->extra_reg; | ||
1006 | era->extra_config = hwc->extra_config; | ||
1007 | cpuc->percore_used = 1; | ||
1008 | hwc->extra_alloc = 1; | ||
1009 | c = NULL; | ||
1010 | } | ||
1011 | raw_spin_unlock(&pc->lock); | ||
1012 | return c; | ||
1013 | } | ||
1014 | |||
1015 | return NULL; | ||
1016 | } | ||
1017 | |||
1018 | static struct event_constraint * | ||
910 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | 1019 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) |
911 | { | 1020 | { |
912 | struct event_constraint *c; | 1021 | struct event_constraint *c; |
@@ -919,9 +1028,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
919 | if (c) | 1028 | if (c) |
920 | return c; | 1029 | return c; |
921 | 1030 | ||
1031 | c = intel_percore_constraints(cpuc, event); | ||
1032 | if (c) | ||
1033 | return c; | ||
1034 | |||
922 | return x86_get_event_constraints(cpuc, event); | 1035 | return x86_get_event_constraints(cpuc, event); |
923 | } | 1036 | } |
924 | 1037 | ||
1038 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | ||
1039 | struct perf_event *event) | ||
1040 | { | ||
1041 | struct extra_reg *er; | ||
1042 | struct intel_percore *pc; | ||
1043 | struct er_account *era; | ||
1044 | struct hw_perf_event *hwc = &event->hw; | ||
1045 | int i, allref; | ||
1046 | |||
1047 | if (!cpuc->percore_used) | ||
1048 | return; | ||
1049 | |||
1050 | for (er = x86_pmu.extra_regs; er->msr; er++) { | ||
1051 | if (er->event != (hwc->config & er->config_mask)) | ||
1052 | continue; | ||
1053 | |||
1054 | pc = cpuc->per_core; | ||
1055 | raw_spin_lock(&pc->lock); | ||
1056 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | ||
1057 | era = &pc->regs[i]; | ||
1058 | if (era->ref > 0 && | ||
1059 | era->extra_config == hwc->extra_config && | ||
1060 | era->extra_reg == er->msr) { | ||
1061 | era->ref--; | ||
1062 | hwc->extra_alloc = 0; | ||
1063 | break; | ||
1064 | } | ||
1065 | } | ||
1066 | allref = 0; | ||
1067 | for (i = 0; i < MAX_EXTRA_REGS; i++) | ||
1068 | allref += pc->regs[i].ref; | ||
1069 | if (allref == 0) | ||
1070 | cpuc->percore_used = 0; | ||
1071 | raw_spin_unlock(&pc->lock); | ||
1072 | break; | ||
1073 | } | ||
1074 | } | ||
1075 | |||
925 | static int intel_pmu_hw_config(struct perf_event *event) | 1076 | static int intel_pmu_hw_config(struct perf_event *event) |
926 | { | 1077 | { |
927 | int ret = x86_pmu_hw_config(event); | 1078 | int ret = x86_pmu_hw_config(event); |
@@ -993,11 +1144,43 @@ static __initconst const struct x86_pmu core_pmu = { | |||
993 | */ | 1144 | */ |
994 | .max_period = (1ULL << 31) - 1, | 1145 | .max_period = (1ULL << 31) - 1, |
995 | .get_event_constraints = intel_get_event_constraints, | 1146 | .get_event_constraints = intel_get_event_constraints, |
1147 | .put_event_constraints = intel_put_event_constraints, | ||
996 | .event_constraints = intel_core_event_constraints, | 1148 | .event_constraints = intel_core_event_constraints, |
997 | }; | 1149 | }; |
998 | 1150 | ||
1151 | static int intel_pmu_cpu_prepare(int cpu) | ||
1152 | { | ||
1153 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1154 | |||
1155 | cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), | ||
1156 | GFP_KERNEL, cpu_to_node(cpu)); | ||
1157 | if (!cpuc->per_core) | ||
1158 | return NOTIFY_BAD; | ||
1159 | |||
1160 | raw_spin_lock_init(&cpuc->per_core->lock); | ||
1161 | cpuc->per_core->core_id = -1; | ||
1162 | return NOTIFY_OK; | ||
1163 | } | ||
1164 | |||
999 | static void intel_pmu_cpu_starting(int cpu) | 1165 | static void intel_pmu_cpu_starting(int cpu) |
1000 | { | 1166 | { |
1167 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1168 | int core_id = topology_core_id(cpu); | ||
1169 | int i; | ||
1170 | |||
1171 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | ||
1172 | struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; | ||
1173 | |||
1174 | if (pc && pc->core_id == core_id) { | ||
1175 | kfree(cpuc->per_core); | ||
1176 | cpuc->per_core = pc; | ||
1177 | break; | ||
1178 | } | ||
1179 | } | ||
1180 | |||
1181 | cpuc->per_core->core_id = core_id; | ||
1182 | cpuc->per_core->refcnt++; | ||
1183 | |||
1001 | init_debug_store_on_cpu(cpu); | 1184 | init_debug_store_on_cpu(cpu); |
1002 | /* | 1185 | /* |
1003 | * Deal with CPUs that don't clear their LBRs on power-up. | 1186 | * Deal with CPUs that don't clear their LBRs on power-up. |
@@ -1007,6 +1190,15 @@ static void intel_pmu_cpu_starting(int cpu) | |||
1007 | 1190 | ||
1008 | static void intel_pmu_cpu_dying(int cpu) | 1191 | static void intel_pmu_cpu_dying(int cpu) |
1009 | { | 1192 | { |
1193 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1194 | struct intel_percore *pc = cpuc->per_core; | ||
1195 | |||
1196 | if (pc) { | ||
1197 | if (pc->core_id == -1 || --pc->refcnt == 0) | ||
1198 | kfree(pc); | ||
1199 | cpuc->per_core = NULL; | ||
1200 | } | ||
1201 | |||
1010 | fini_debug_store_on_cpu(cpu); | 1202 | fini_debug_store_on_cpu(cpu); |
1011 | } | 1203 | } |
1012 | 1204 | ||
@@ -1031,7 +1223,9 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
1031 | */ | 1223 | */ |
1032 | .max_period = (1ULL << 31) - 1, | 1224 | .max_period = (1ULL << 31) - 1, |
1033 | .get_event_constraints = intel_get_event_constraints, | 1225 | .get_event_constraints = intel_get_event_constraints, |
1226 | .put_event_constraints = intel_put_event_constraints, | ||
1034 | 1227 | ||
1228 | .cpu_prepare = intel_pmu_cpu_prepare, | ||
1035 | .cpu_starting = intel_pmu_cpu_starting, | 1229 | .cpu_starting = intel_pmu_cpu_starting, |
1036 | .cpu_dying = intel_pmu_cpu_dying, | 1230 | .cpu_dying = intel_pmu_cpu_dying, |
1037 | }; | 1231 | }; |
@@ -1151,7 +1345,9 @@ static __init int intel_pmu_init(void) | |||
1151 | 1345 | ||
1152 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 1346 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
1153 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; | 1347 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; |
1348 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; | ||
1154 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1349 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1350 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | ||
1155 | pr_cont("Nehalem events, "); | 1351 | pr_cont("Nehalem events, "); |
1156 | break; | 1352 | break; |
1157 | 1353 | ||
@@ -1174,8 +1370,10 @@ static __init int intel_pmu_init(void) | |||
1174 | intel_pmu_lbr_init_nhm(); | 1370 | intel_pmu_lbr_init_nhm(); |
1175 | 1371 | ||
1176 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 1372 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
1373 | x86_pmu.percore_constraints = intel_westmere_percore_constraints; | ||
1177 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1374 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1178 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; | 1375 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; |
1376 | x86_pmu.extra_regs = intel_westmere_extra_regs; | ||
1179 | pr_cont("Westmere events, "); | 1377 | pr_cont("Westmere events, "); |
1180 | break; | 1378 | break; |
1181 | 1379 | ||