diff options
author | Stephane Eranian <eranian@google.com> | 2011-06-06 10:57:03 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-07-01 05:06:36 -0400 |
commit | efc9f05df2dd171280dcb736a4d973ffefd5508e (patch) | |
tree | ccc1cee8f1cc0ad5391732eb3637b685b4b155a0 /arch/x86/kernel/cpu/perf_event_intel.c | |
parent | a7ac67ea021b4603095d2aa458bc41641238f22c (diff) |
perf_events: Update Intel extra regs shared constraints management
This patch improves the code managing the extra shared registers
used for offcore_response events on Intel Nehalem/Westmere. The
idea is to use static allocation instead of dynamic allocation.
This simplifies greatly the get and put constraint routines for
those events.
The patch also renames per_core to shared_regs because the same
data structure gets used whether or not HT is on. When HT is
off, those events still need to coordination because they use
a extra MSR that has to be shared within an event group.
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110606145703.GA7258@quad
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 260 |
1 files changed, 128 insertions, 132 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index d38b0020f77..6ad95baff85 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1,25 +1,15 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | 1 | #ifdef CONFIG_CPU_SUP_INTEL |
2 | 2 | ||
3 | #define MAX_EXTRA_REGS 2 | ||
4 | |||
5 | /* | ||
6 | * Per register state. | ||
7 | */ | ||
8 | struct er_account { | ||
9 | int ref; /* reference count */ | ||
10 | unsigned int extra_reg; /* extra MSR number */ | ||
11 | u64 extra_config; /* extra MSR config */ | ||
12 | }; | ||
13 | |||
14 | /* | 3 | /* |
15 | * Per core state | 4 | * Per core/cpu state |
16 | * This used to coordinate shared registers for HT threads. | 5 | * |
6 | * Used to coordinate shared registers between HT threads or | ||
7 | * among events on a single PMU. | ||
17 | */ | 8 | */ |
18 | struct intel_percore { | 9 | struct intel_shared_regs { |
19 | raw_spinlock_t lock; /* protect structure */ | 10 | struct er_account regs[EXTRA_REG_MAX]; |
20 | struct er_account regs[MAX_EXTRA_REGS]; | 11 | int refcnt; /* per-core: #HT threads */ |
21 | int refcnt; /* number of threads */ | 12 | unsigned core_id; /* per-core: core id */ |
22 | unsigned core_id; | ||
23 | }; | 13 | }; |
24 | 14 | ||
25 | /* | 15 | /* |
@@ -88,16 +78,10 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = | |||
88 | 78 | ||
89 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = | 79 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = |
90 | { | 80 | { |
91 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | 81 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
92 | EVENT_EXTRA_END | 82 | EVENT_EXTRA_END |
93 | }; | 83 | }; |
94 | 84 | ||
95 | static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly = | ||
96 | { | ||
97 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
98 | EVENT_CONSTRAINT_END | ||
99 | }; | ||
100 | |||
101 | static struct event_constraint intel_westmere_event_constraints[] __read_mostly = | 85 | static struct event_constraint intel_westmere_event_constraints[] __read_mostly = |
102 | { | 86 | { |
103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 87 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -125,18 +109,11 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = | |||
125 | 109 | ||
126 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = | 110 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = |
127 | { | 111 | { |
128 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | 112 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
129 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), | 113 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), |
130 | EVENT_EXTRA_END | 114 | EVENT_EXTRA_END |
131 | }; | 115 | }; |
132 | 116 | ||
133 | static struct event_constraint intel_westmere_percore_constraints[] __read_mostly = | ||
134 | { | ||
135 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
136 | INTEL_EVENT_CONSTRAINT(0xbb, 0), | ||
137 | EVENT_CONSTRAINT_END | ||
138 | }; | ||
139 | |||
140 | static struct event_constraint intel_gen_event_constraints[] __read_mostly = | 117 | static struct event_constraint intel_gen_event_constraints[] __read_mostly = |
141 | { | 118 | { |
142 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 119 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -1037,65 +1014,89 @@ intel_bts_constraints(struct perf_event *event) | |||
1037 | return NULL; | 1014 | return NULL; |
1038 | } | 1015 | } |
1039 | 1016 | ||
1017 | /* | ||
1018 | * manage allocation of shared extra msr for certain events | ||
1019 | * | ||
1020 | * sharing can be: | ||
1021 | * per-cpu: to be shared between the various events on a single PMU | ||
1022 | * per-core: per-cpu + shared by HT threads | ||
1023 | */ | ||
1040 | static struct event_constraint * | 1024 | static struct event_constraint * |
1041 | intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | 1025 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, |
1026 | struct hw_perf_event_extra *reg) | ||
1042 | { | 1027 | { |
1043 | struct hw_perf_event *hwc = &event->hw; | 1028 | struct event_constraint *c = &emptyconstraint; |
1044 | unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; | ||
1045 | struct event_constraint *c; | ||
1046 | struct intel_percore *pc; | ||
1047 | struct er_account *era; | 1029 | struct er_account *era; |
1048 | int i; | ||
1049 | int free_slot; | ||
1050 | int found; | ||
1051 | 1030 | ||
1052 | if (!x86_pmu.percore_constraints || hwc->extra_alloc) | 1031 | /* already allocated shared msr */ |
1053 | return NULL; | 1032 | if (reg->alloc || !cpuc->shared_regs) |
1033 | return &unconstrained; | ||
1054 | 1034 | ||
1055 | for (c = x86_pmu.percore_constraints; c->cmask; c++) { | 1035 | era = &cpuc->shared_regs->regs[reg->idx]; |
1056 | if (e != c->code) | 1036 | |
1057 | continue; | 1037 | raw_spin_lock(&era->lock); |
1038 | |||
1039 | if (!atomic_read(&era->ref) || era->config == reg->config) { | ||
1040 | |||
1041 | /* lock in msr value */ | ||
1042 | era->config = reg->config; | ||
1043 | era->reg = reg->reg; | ||
1044 | |||
1045 | /* one more user */ | ||
1046 | atomic_inc(&era->ref); | ||
1047 | |||
1048 | /* no need to reallocate during incremental event scheduling */ | ||
1049 | reg->alloc = 1; | ||
1058 | 1050 | ||
1059 | /* | 1051 | /* |
1060 | * Allocate resource per core. | 1052 | * All events using extra_reg are unconstrained. |
1053 | * Avoids calling x86_get_event_constraints() | ||
1054 | * | ||
1055 | * Must revisit if extra_reg controlling events | ||
1056 | * ever have constraints. Worst case we go through | ||
1057 | * the regular event constraint table. | ||
1061 | */ | 1058 | */ |
1062 | pc = cpuc->per_core; | 1059 | c = &unconstrained; |
1063 | if (!pc) | ||
1064 | break; | ||
1065 | c = &emptyconstraint; | ||
1066 | raw_spin_lock(&pc->lock); | ||
1067 | free_slot = -1; | ||
1068 | found = 0; | ||
1069 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | ||
1070 | era = &pc->regs[i]; | ||
1071 | if (era->ref > 0 && hwc->extra_reg == era->extra_reg) { | ||
1072 | /* Allow sharing same config */ | ||
1073 | if (hwc->extra_config == era->extra_config) { | ||
1074 | era->ref++; | ||
1075 | cpuc->percore_used = 1; | ||
1076 | hwc->extra_alloc = 1; | ||
1077 | c = NULL; | ||
1078 | } | ||
1079 | /* else conflict */ | ||
1080 | found = 1; | ||
1081 | break; | ||
1082 | } else if (era->ref == 0 && free_slot == -1) | ||
1083 | free_slot = i; | ||
1084 | } | ||
1085 | if (!found && free_slot != -1) { | ||
1086 | era = &pc->regs[free_slot]; | ||
1087 | era->ref = 1; | ||
1088 | era->extra_reg = hwc->extra_reg; | ||
1089 | era->extra_config = hwc->extra_config; | ||
1090 | cpuc->percore_used = 1; | ||
1091 | hwc->extra_alloc = 1; | ||
1092 | c = NULL; | ||
1093 | } | ||
1094 | raw_spin_unlock(&pc->lock); | ||
1095 | return c; | ||
1096 | } | 1060 | } |
1061 | raw_spin_unlock(&era->lock); | ||
1097 | 1062 | ||
1098 | return NULL; | 1063 | return c; |
1064 | } | ||
1065 | |||
1066 | static void | ||
1067 | __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, | ||
1068 | struct hw_perf_event_extra *reg) | ||
1069 | { | ||
1070 | struct er_account *era; | ||
1071 | |||
1072 | /* | ||
1073 | * only put constraint if extra reg was actually | ||
1074 | * allocated. Also takes care of event which do | ||
1075 | * not use an extra shared reg | ||
1076 | */ | ||
1077 | if (!reg->alloc) | ||
1078 | return; | ||
1079 | |||
1080 | era = &cpuc->shared_regs->regs[reg->idx]; | ||
1081 | |||
1082 | /* one fewer user */ | ||
1083 | atomic_dec(&era->ref); | ||
1084 | |||
1085 | /* allocate again next time */ | ||
1086 | reg->alloc = 0; | ||
1087 | } | ||
1088 | |||
1089 | static struct event_constraint * | ||
1090 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, | ||
1091 | struct perf_event *event) | ||
1092 | { | ||
1093 | struct event_constraint *c = NULL; | ||
1094 | struct hw_perf_event_extra *xreg; | ||
1095 | |||
1096 | xreg = &event->hw.extra_reg; | ||
1097 | if (xreg->idx != EXTRA_REG_NONE) | ||
1098 | c = __intel_shared_reg_get_constraints(cpuc, xreg); | ||
1099 | return c; | ||
1099 | } | 1100 | } |
1100 | 1101 | ||
1101 | static struct event_constraint * | 1102 | static struct event_constraint * |
@@ -1111,49 +1112,28 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
1111 | if (c) | 1112 | if (c) |
1112 | return c; | 1113 | return c; |
1113 | 1114 | ||
1114 | c = intel_percore_constraints(cpuc, event); | 1115 | c = intel_shared_regs_constraints(cpuc, event); |
1115 | if (c) | 1116 | if (c) |
1116 | return c; | 1117 | return c; |
1117 | 1118 | ||
1118 | return x86_get_event_constraints(cpuc, event); | 1119 | return x86_get_event_constraints(cpuc, event); |
1119 | } | 1120 | } |
1120 | 1121 | ||
1121 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | 1122 | static void |
1123 | intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | ||
1122 | struct perf_event *event) | 1124 | struct perf_event *event) |
1123 | { | 1125 | { |
1124 | struct extra_reg *er; | 1126 | struct hw_perf_event_extra *reg; |
1125 | struct intel_percore *pc; | ||
1126 | struct er_account *era; | ||
1127 | struct hw_perf_event *hwc = &event->hw; | ||
1128 | int i, allref; | ||
1129 | |||
1130 | if (!cpuc->percore_used) | ||
1131 | return; | ||
1132 | 1127 | ||
1133 | for (er = x86_pmu.extra_regs; er->msr; er++) { | 1128 | reg = &event->hw.extra_reg; |
1134 | if (er->event != (hwc->config & er->config_mask)) | 1129 | if (reg->idx != EXTRA_REG_NONE) |
1135 | continue; | 1130 | __intel_shared_reg_put_constraints(cpuc, reg); |
1131 | } | ||
1136 | 1132 | ||
1137 | pc = cpuc->per_core; | 1133 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
1138 | raw_spin_lock(&pc->lock); | 1134 | struct perf_event *event) |
1139 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | 1135 | { |
1140 | era = &pc->regs[i]; | 1136 | intel_put_shared_regs_event_constraints(cpuc, event); |
1141 | if (era->ref > 0 && | ||
1142 | era->extra_config == hwc->extra_config && | ||
1143 | era->extra_reg == er->msr) { | ||
1144 | era->ref--; | ||
1145 | hwc->extra_alloc = 0; | ||
1146 | break; | ||
1147 | } | ||
1148 | } | ||
1149 | allref = 0; | ||
1150 | for (i = 0; i < MAX_EXTRA_REGS; i++) | ||
1151 | allref += pc->regs[i].ref; | ||
1152 | if (allref == 0) | ||
1153 | cpuc->percore_used = 0; | ||
1154 | raw_spin_unlock(&pc->lock); | ||
1155 | break; | ||
1156 | } | ||
1157 | } | 1137 | } |
1158 | 1138 | ||
1159 | static int intel_pmu_hw_config(struct perf_event *event) | 1139 | static int intel_pmu_hw_config(struct perf_event *event) |
@@ -1231,20 +1211,36 @@ static __initconst const struct x86_pmu core_pmu = { | |||
1231 | .event_constraints = intel_core_event_constraints, | 1211 | .event_constraints = intel_core_event_constraints, |
1232 | }; | 1212 | }; |
1233 | 1213 | ||
1214 | static struct intel_shared_regs *allocate_shared_regs(int cpu) | ||
1215 | { | ||
1216 | struct intel_shared_regs *regs; | ||
1217 | int i; | ||
1218 | |||
1219 | regs = kzalloc_node(sizeof(struct intel_shared_regs), | ||
1220 | GFP_KERNEL, cpu_to_node(cpu)); | ||
1221 | if (regs) { | ||
1222 | /* | ||
1223 | * initialize the locks to keep lockdep happy | ||
1224 | */ | ||
1225 | for (i = 0; i < EXTRA_REG_MAX; i++) | ||
1226 | raw_spin_lock_init(®s->regs[i].lock); | ||
1227 | |||
1228 | regs->core_id = -1; | ||
1229 | } | ||
1230 | return regs; | ||
1231 | } | ||
1232 | |||
1234 | static int intel_pmu_cpu_prepare(int cpu) | 1233 | static int intel_pmu_cpu_prepare(int cpu) |
1235 | { | 1234 | { |
1236 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 1235 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
1237 | 1236 | ||
1238 | if (!cpu_has_ht_siblings()) | 1237 | if (!x86_pmu.extra_regs) |
1239 | return NOTIFY_OK; | 1238 | return NOTIFY_OK; |
1240 | 1239 | ||
1241 | cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), | 1240 | cpuc->shared_regs = allocate_shared_regs(cpu); |
1242 | GFP_KERNEL, cpu_to_node(cpu)); | 1241 | if (!cpuc->shared_regs) |
1243 | if (!cpuc->per_core) | ||
1244 | return NOTIFY_BAD; | 1242 | return NOTIFY_BAD; |
1245 | 1243 | ||
1246 | raw_spin_lock_init(&cpuc->per_core->lock); | ||
1247 | cpuc->per_core->core_id = -1; | ||
1248 | return NOTIFY_OK; | 1244 | return NOTIFY_OK; |
1249 | } | 1245 | } |
1250 | 1246 | ||
@@ -1260,32 +1256,34 @@ static void intel_pmu_cpu_starting(int cpu) | |||
1260 | */ | 1256 | */ |
1261 | intel_pmu_lbr_reset(); | 1257 | intel_pmu_lbr_reset(); |
1262 | 1258 | ||
1263 | if (!cpu_has_ht_siblings()) | 1259 | if (!cpuc->shared_regs) |
1264 | return; | 1260 | return; |
1265 | 1261 | ||
1266 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | 1262 | for_each_cpu(i, topology_thread_cpumask(cpu)) { |
1267 | struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; | 1263 | struct intel_shared_regs *pc; |
1268 | 1264 | ||
1265 | pc = per_cpu(cpu_hw_events, i).shared_regs; | ||
1269 | if (pc && pc->core_id == core_id) { | 1266 | if (pc && pc->core_id == core_id) { |
1270 | kfree(cpuc->per_core); | 1267 | kfree(cpuc->shared_regs); |
1271 | cpuc->per_core = pc; | 1268 | cpuc->shared_regs = pc; |
1272 | break; | 1269 | break; |
1273 | } | 1270 | } |
1274 | } | 1271 | } |
1275 | 1272 | ||
1276 | cpuc->per_core->core_id = core_id; | 1273 | cpuc->shared_regs->core_id = core_id; |
1277 | cpuc->per_core->refcnt++; | 1274 | cpuc->shared_regs->refcnt++; |
1278 | } | 1275 | } |
1279 | 1276 | ||
1280 | static void intel_pmu_cpu_dying(int cpu) | 1277 | static void intel_pmu_cpu_dying(int cpu) |
1281 | { | 1278 | { |
1282 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 1279 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
1283 | struct intel_percore *pc = cpuc->per_core; | 1280 | struct intel_shared_regs *pc; |
1284 | 1281 | ||
1282 | pc = cpuc->shared_regs; | ||
1285 | if (pc) { | 1283 | if (pc) { |
1286 | if (pc->core_id == -1 || --pc->refcnt == 0) | 1284 | if (pc->core_id == -1 || --pc->refcnt == 0) |
1287 | kfree(pc); | 1285 | kfree(pc); |
1288 | cpuc->per_core = NULL; | 1286 | cpuc->shared_regs = NULL; |
1289 | } | 1287 | } |
1290 | 1288 | ||
1291 | fini_debug_store_on_cpu(cpu); | 1289 | fini_debug_store_on_cpu(cpu); |
@@ -1436,7 +1434,6 @@ static __init int intel_pmu_init(void) | |||
1436 | 1434 | ||
1437 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 1435 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
1438 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; | 1436 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; |
1439 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; | ||
1440 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1437 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1441 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 1438 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
1442 | 1439 | ||
@@ -1481,7 +1478,6 @@ static __init int intel_pmu_init(void) | |||
1481 | intel_pmu_lbr_init_nhm(); | 1478 | intel_pmu_lbr_init_nhm(); |
1482 | 1479 | ||
1483 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 1480 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
1484 | x86_pmu.percore_constraints = intel_westmere_percore_constraints; | ||
1485 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1481 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1486 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; | 1482 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; |
1487 | x86_pmu.extra_regs = intel_westmere_extra_regs; | 1483 | x86_pmu.extra_regs = intel_westmere_extra_regs; |