aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event_intel.c
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2011-06-06 10:57:03 -0400
committerIngo Molnar <mingo@elte.hu>2011-07-01 05:06:36 -0400
commitefc9f05df2dd171280dcb736a4d973ffefd5508e (patch)
treeccc1cee8f1cc0ad5391732eb3637b685b4b155a0 /arch/x86/kernel/cpu/perf_event_intel.c
parenta7ac67ea021b4603095d2aa458bc41641238f22c (diff)
perf_events: Update Intel extra regs shared constraints management
This patch improves the code managing the extra shared registers used for offcore_response events on Intel Nehalem/Westmere. The idea is to use static allocation instead of dynamic allocation. This simplifies greatly the get and put constraint routines for those events. The patch also renames per_core to shared_regs because the same data structure gets used whether or not HT is on. When HT is off, those events still need to coordination because they use a extra MSR that has to be shared within an event group. Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20110606145703.GA7258@quad Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c260
1 files changed, 128 insertions, 132 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index d38b0020f77..6ad95baff85 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,25 +1,15 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#ifdef CONFIG_CPU_SUP_INTEL
2 2
3#define MAX_EXTRA_REGS 2
4
5/*
6 * Per register state.
7 */
8struct er_account {
9 int ref; /* reference count */
10 unsigned int extra_reg; /* extra MSR number */
11 u64 extra_config; /* extra MSR config */
12};
13
14/* 3/*
15 * Per core state 4 * Per core/cpu state
16 * This used to coordinate shared registers for HT threads. 5 *
6 * Used to coordinate shared registers between HT threads or
7 * among events on a single PMU.
17 */ 8 */
18struct intel_percore { 9struct intel_shared_regs {
19 raw_spinlock_t lock; /* protect structure */ 10 struct er_account regs[EXTRA_REG_MAX];
20 struct er_account regs[MAX_EXTRA_REGS]; 11 int refcnt; /* per-core: #HT threads */
21 int refcnt; /* number of threads */ 12 unsigned core_id; /* per-core: core id */
22 unsigned core_id;
23}; 13};
24 14
25/* 15/*
@@ -88,16 +78,10 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
88 78
89static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = 79static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
90{ 80{
91 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), 81 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
92 EVENT_EXTRA_END 82 EVENT_EXTRA_END
93}; 83};
94 84
95static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =
96{
97 INTEL_EVENT_CONSTRAINT(0xb7, 0),
98 EVENT_CONSTRAINT_END
99};
100
101static struct event_constraint intel_westmere_event_constraints[] __read_mostly = 85static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
102{ 86{
103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 87 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -125,18 +109,11 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
125 109
126static struct extra_reg intel_westmere_extra_regs[] __read_mostly = 110static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
127{ 111{
128 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), 112 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
129 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), 113 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
130 EVENT_EXTRA_END 114 EVENT_EXTRA_END
131}; 115};
132 116
133static struct event_constraint intel_westmere_percore_constraints[] __read_mostly =
134{
135 INTEL_EVENT_CONSTRAINT(0xb7, 0),
136 INTEL_EVENT_CONSTRAINT(0xbb, 0),
137 EVENT_CONSTRAINT_END
138};
139
140static struct event_constraint intel_gen_event_constraints[] __read_mostly = 117static struct event_constraint intel_gen_event_constraints[] __read_mostly =
141{ 118{
142 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 119 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -1037,65 +1014,89 @@ intel_bts_constraints(struct perf_event *event)
1037 return NULL; 1014 return NULL;
1038} 1015}
1039 1016
1017/*
1018 * manage allocation of shared extra msr for certain events
1019 *
1020 * sharing can be:
1021 * per-cpu: to be shared between the various events on a single PMU
1022 * per-core: per-cpu + shared by HT threads
1023 */
1040static struct event_constraint * 1024static struct event_constraint *
1041intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 1025__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1026 struct hw_perf_event_extra *reg)
1042{ 1027{
1043 struct hw_perf_event *hwc = &event->hw; 1028 struct event_constraint *c = &emptyconstraint;
1044 unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
1045 struct event_constraint *c;
1046 struct intel_percore *pc;
1047 struct er_account *era; 1029 struct er_account *era;
1048 int i;
1049 int free_slot;
1050 int found;
1051 1030
1052 if (!x86_pmu.percore_constraints || hwc->extra_alloc) 1031 /* already allocated shared msr */
1053 return NULL; 1032 if (reg->alloc || !cpuc->shared_regs)
1033 return &unconstrained;
1054 1034
1055 for (c = x86_pmu.percore_constraints; c->cmask; c++) { 1035 era = &cpuc->shared_regs->regs[reg->idx];
1056 if (e != c->code) 1036
1057 continue; 1037 raw_spin_lock(&era->lock);
1038
1039 if (!atomic_read(&era->ref) || era->config == reg->config) {
1040
1041 /* lock in msr value */
1042 era->config = reg->config;
1043 era->reg = reg->reg;
1044
1045 /* one more user */
1046 atomic_inc(&era->ref);
1047
1048 /* no need to reallocate during incremental event scheduling */
1049 reg->alloc = 1;
1058 1050
1059 /* 1051 /*
1060 * Allocate resource per core. 1052 * All events using extra_reg are unconstrained.
1053 * Avoids calling x86_get_event_constraints()
1054 *
1055 * Must revisit if extra_reg controlling events
1056 * ever have constraints. Worst case we go through
1057 * the regular event constraint table.
1061 */ 1058 */
1062 pc = cpuc->per_core; 1059 c = &unconstrained;
1063 if (!pc)
1064 break;
1065 c = &emptyconstraint;
1066 raw_spin_lock(&pc->lock);
1067 free_slot = -1;
1068 found = 0;
1069 for (i = 0; i < MAX_EXTRA_REGS; i++) {
1070 era = &pc->regs[i];
1071 if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
1072 /* Allow sharing same config */
1073 if (hwc->extra_config == era->extra_config) {
1074 era->ref++;
1075 cpuc->percore_used = 1;
1076 hwc->extra_alloc = 1;
1077 c = NULL;
1078 }
1079 /* else conflict */
1080 found = 1;
1081 break;
1082 } else if (era->ref == 0 && free_slot == -1)
1083 free_slot = i;
1084 }
1085 if (!found && free_slot != -1) {
1086 era = &pc->regs[free_slot];
1087 era->ref = 1;
1088 era->extra_reg = hwc->extra_reg;
1089 era->extra_config = hwc->extra_config;
1090 cpuc->percore_used = 1;
1091 hwc->extra_alloc = 1;
1092 c = NULL;
1093 }
1094 raw_spin_unlock(&pc->lock);
1095 return c;
1096 } 1060 }
1061 raw_spin_unlock(&era->lock);
1097 1062
1098 return NULL; 1063 return c;
1064}
1065
1066static void
1067__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
1068 struct hw_perf_event_extra *reg)
1069{
1070 struct er_account *era;
1071
1072 /*
1073 * only put constraint if extra reg was actually
1074 * allocated. Also takes care of event which do
1075 * not use an extra shared reg
1076 */
1077 if (!reg->alloc)
1078 return;
1079
1080 era = &cpuc->shared_regs->regs[reg->idx];
1081
1082 /* one fewer user */
1083 atomic_dec(&era->ref);
1084
1085 /* allocate again next time */
1086 reg->alloc = 0;
1087}
1088
1089static struct event_constraint *
1090intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
1091 struct perf_event *event)
1092{
1093 struct event_constraint *c = NULL;
1094 struct hw_perf_event_extra *xreg;
1095
1096 xreg = &event->hw.extra_reg;
1097 if (xreg->idx != EXTRA_REG_NONE)
1098 c = __intel_shared_reg_get_constraints(cpuc, xreg);
1099 return c;
1099} 1100}
1100 1101
1101static struct event_constraint * 1102static struct event_constraint *
@@ -1111,49 +1112,28 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
1111 if (c) 1112 if (c)
1112 return c; 1113 return c;
1113 1114
1114 c = intel_percore_constraints(cpuc, event); 1115 c = intel_shared_regs_constraints(cpuc, event);
1115 if (c) 1116 if (c)
1116 return c; 1117 return c;
1117 1118
1118 return x86_get_event_constraints(cpuc, event); 1119 return x86_get_event_constraints(cpuc, event);
1119} 1120}
1120 1121
1121static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 1122static void
1123intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1122 struct perf_event *event) 1124 struct perf_event *event)
1123{ 1125{
1124 struct extra_reg *er; 1126 struct hw_perf_event_extra *reg;
1125 struct intel_percore *pc;
1126 struct er_account *era;
1127 struct hw_perf_event *hwc = &event->hw;
1128 int i, allref;
1129
1130 if (!cpuc->percore_used)
1131 return;
1132 1127
1133 for (er = x86_pmu.extra_regs; er->msr; er++) { 1128 reg = &event->hw.extra_reg;
1134 if (er->event != (hwc->config & er->config_mask)) 1129 if (reg->idx != EXTRA_REG_NONE)
1135 continue; 1130 __intel_shared_reg_put_constraints(cpuc, reg);
1131}
1136 1132
1137 pc = cpuc->per_core; 1133static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1138 raw_spin_lock(&pc->lock); 1134 struct perf_event *event)
1139 for (i = 0; i < MAX_EXTRA_REGS; i++) { 1135{
1140 era = &pc->regs[i]; 1136 intel_put_shared_regs_event_constraints(cpuc, event);
1141 if (era->ref > 0 &&
1142 era->extra_config == hwc->extra_config &&
1143 era->extra_reg == er->msr) {
1144 era->ref--;
1145 hwc->extra_alloc = 0;
1146 break;
1147 }
1148 }
1149 allref = 0;
1150 for (i = 0; i < MAX_EXTRA_REGS; i++)
1151 allref += pc->regs[i].ref;
1152 if (allref == 0)
1153 cpuc->percore_used = 0;
1154 raw_spin_unlock(&pc->lock);
1155 break;
1156 }
1157} 1137}
1158 1138
1159static int intel_pmu_hw_config(struct perf_event *event) 1139static int intel_pmu_hw_config(struct perf_event *event)
@@ -1231,20 +1211,36 @@ static __initconst const struct x86_pmu core_pmu = {
1231 .event_constraints = intel_core_event_constraints, 1211 .event_constraints = intel_core_event_constraints,
1232}; 1212};
1233 1213
1214static struct intel_shared_regs *allocate_shared_regs(int cpu)
1215{
1216 struct intel_shared_regs *regs;
1217 int i;
1218
1219 regs = kzalloc_node(sizeof(struct intel_shared_regs),
1220 GFP_KERNEL, cpu_to_node(cpu));
1221 if (regs) {
1222 /*
1223 * initialize the locks to keep lockdep happy
1224 */
1225 for (i = 0; i < EXTRA_REG_MAX; i++)
1226 raw_spin_lock_init(&regs->regs[i].lock);
1227
1228 regs->core_id = -1;
1229 }
1230 return regs;
1231}
1232
1234static int intel_pmu_cpu_prepare(int cpu) 1233static int intel_pmu_cpu_prepare(int cpu)
1235{ 1234{
1236 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 1235 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1237 1236
1238 if (!cpu_has_ht_siblings()) 1237 if (!x86_pmu.extra_regs)
1239 return NOTIFY_OK; 1238 return NOTIFY_OK;
1240 1239
1241 cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), 1240 cpuc->shared_regs = allocate_shared_regs(cpu);
1242 GFP_KERNEL, cpu_to_node(cpu)); 1241 if (!cpuc->shared_regs)
1243 if (!cpuc->per_core)
1244 return NOTIFY_BAD; 1242 return NOTIFY_BAD;
1245 1243
1246 raw_spin_lock_init(&cpuc->per_core->lock);
1247 cpuc->per_core->core_id = -1;
1248 return NOTIFY_OK; 1244 return NOTIFY_OK;
1249} 1245}
1250 1246
@@ -1260,32 +1256,34 @@ static void intel_pmu_cpu_starting(int cpu)
1260 */ 1256 */
1261 intel_pmu_lbr_reset(); 1257 intel_pmu_lbr_reset();
1262 1258
1263 if (!cpu_has_ht_siblings()) 1259 if (!cpuc->shared_regs)
1264 return; 1260 return;
1265 1261
1266 for_each_cpu(i, topology_thread_cpumask(cpu)) { 1262 for_each_cpu(i, topology_thread_cpumask(cpu)) {
1267 struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; 1263 struct intel_shared_regs *pc;
1268 1264
1265 pc = per_cpu(cpu_hw_events, i).shared_regs;
1269 if (pc && pc->core_id == core_id) { 1266 if (pc && pc->core_id == core_id) {
1270 kfree(cpuc->per_core); 1267 kfree(cpuc->shared_regs);
1271 cpuc->per_core = pc; 1268 cpuc->shared_regs = pc;
1272 break; 1269 break;
1273 } 1270 }
1274 } 1271 }
1275 1272
1276 cpuc->per_core->core_id = core_id; 1273 cpuc->shared_regs->core_id = core_id;
1277 cpuc->per_core->refcnt++; 1274 cpuc->shared_regs->refcnt++;
1278} 1275}
1279 1276
1280static void intel_pmu_cpu_dying(int cpu) 1277static void intel_pmu_cpu_dying(int cpu)
1281{ 1278{
1282 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 1279 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1283 struct intel_percore *pc = cpuc->per_core; 1280 struct intel_shared_regs *pc;
1284 1281
1282 pc = cpuc->shared_regs;
1285 if (pc) { 1283 if (pc) {
1286 if (pc->core_id == -1 || --pc->refcnt == 0) 1284 if (pc->core_id == -1 || --pc->refcnt == 0)
1287 kfree(pc); 1285 kfree(pc);
1288 cpuc->per_core = NULL; 1286 cpuc->shared_regs = NULL;
1289 } 1287 }
1290 1288
1291 fini_debug_store_on_cpu(cpu); 1289 fini_debug_store_on_cpu(cpu);
@@ -1436,7 +1434,6 @@ static __init int intel_pmu_init(void)
1436 1434
1437 x86_pmu.event_constraints = intel_nehalem_event_constraints; 1435 x86_pmu.event_constraints = intel_nehalem_event_constraints;
1438 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; 1436 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
1439 x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
1440 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1437 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1441 x86_pmu.extra_regs = intel_nehalem_extra_regs; 1438 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1442 1439
@@ -1481,7 +1478,6 @@ static __init int intel_pmu_init(void)
1481 intel_pmu_lbr_init_nhm(); 1478 intel_pmu_lbr_init_nhm();
1482 1479
1483 x86_pmu.event_constraints = intel_westmere_event_constraints; 1480 x86_pmu.event_constraints = intel_westmere_event_constraints;
1484 x86_pmu.percore_constraints = intel_westmere_percore_constraints;
1485 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1481 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1486 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; 1482 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
1487 x86_pmu.extra_regs = intel_westmere_extra_regs; 1483 x86_pmu.extra_regs = intel_westmere_extra_regs;