diff options
| -rw-r--r-- | arch/x86/include/asm/perf_event.h | 16 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 775 |
2 files changed, 574 insertions, 217 deletions
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8d9f8548a870..dbc082685d52 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
| @@ -26,7 +26,14 @@ | |||
| 26 | /* | 26 | /* |
| 27 | * Includes eventsel and unit mask as well: | 27 | * Includes eventsel and unit mask as well: |
| 28 | */ | 28 | */ |
| 29 | #define ARCH_PERFMON_EVENT_MASK 0xffff | 29 | |
| 30 | |||
| 31 | #define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL | ||
| 32 | #define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL | ||
| 33 | #define INTEL_ARCH_EDGE_MASK 0x00040000ULL | ||
| 34 | #define INTEL_ARCH_INV_MASK 0x00800000ULL | ||
| 35 | #define INTEL_ARCH_CNT_MASK 0xFF000000ULL | ||
| 36 | #define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK) | ||
| 30 | 37 | ||
| 31 | /* | 38 | /* |
| 32 | * filter mask to validate fixed counter events. | 39 | * filter mask to validate fixed counter events. |
| @@ -37,7 +44,12 @@ | |||
| 37 | * The other filters are supported by fixed counters. | 44 | * The other filters are supported by fixed counters. |
| 38 | * The any-thread option is supported starting with v3. | 45 | * The any-thread option is supported starting with v3. |
| 39 | */ | 46 | */ |
| 40 | #define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 | 47 | #define INTEL_ARCH_FIXED_MASK \ |
| 48 | (INTEL_ARCH_CNT_MASK| \ | ||
| 49 | INTEL_ARCH_INV_MASK| \ | ||
| 50 | INTEL_ARCH_EDGE_MASK|\ | ||
| 51 | INTEL_ARCH_UNIT_MASK|\ | ||
| 52 | INTEL_ARCH_EVENT_MASK) | ||
| 41 | 53 | ||
| 42 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c | 54 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c |
| 43 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | 55 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ed1998b28a7c..995ac4ae379c 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | 7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter |
| 8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> |
| 9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | 9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> |
| 10 | * Copyright (C) 2009 Google, Inc., Stephane Eranian | ||
| 10 | * | 11 | * |
| 11 | * For licencing details see kernel-base/COPYING | 12 | * For licencing details see kernel-base/COPYING |
| 12 | */ | 13 | */ |
| @@ -68,26 +69,37 @@ struct debug_store { | |||
| 68 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | 69 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; |
| 69 | }; | 70 | }; |
| 70 | 71 | ||
| 72 | #define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) | ||
| 73 | |||
| 74 | struct event_constraint { | ||
| 75 | u64 idxmsk[BITS_TO_U64(X86_PMC_IDX_MAX)]; | ||
| 76 | int code; | ||
| 77 | int cmask; | ||
| 78 | }; | ||
| 79 | |||
| 71 | struct cpu_hw_events { | 80 | struct cpu_hw_events { |
| 72 | struct perf_event *events[X86_PMC_IDX_MAX]; | 81 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
| 73 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
| 74 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 82 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
| 75 | unsigned long interrupts; | 83 | unsigned long interrupts; |
| 76 | int enabled; | 84 | int enabled; |
| 77 | struct debug_store *ds; | 85 | struct debug_store *ds; |
| 78 | }; | ||
| 79 | 86 | ||
| 80 | struct event_constraint { | 87 | int n_events; |
| 81 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 88 | int n_added; |
| 82 | int code; | 89 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
| 90 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | ||
| 83 | }; | 91 | }; |
| 84 | 92 | ||
| 85 | #define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } | 93 | #define EVENT_CONSTRAINT(c, n, m) { \ |
| 86 | #define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } | 94 | .code = (c), \ |
| 95 | .cmask = (m), \ | ||
| 96 | .idxmsk[0] = (n) } | ||
| 87 | 97 | ||
| 88 | #define for_each_event_constraint(e, c) \ | 98 | #define EVENT_CONSTRAINT_END \ |
| 89 | for ((e) = (c); (e)->idxmsk[0]; (e)++) | 99 | { .code = 0, .cmask = 0, .idxmsk[0] = 0 } |
| 90 | 100 | ||
| 101 | #define for_each_event_constraint(e, c) \ | ||
| 102 | for ((e) = (c); (e)->cmask; (e)++) | ||
| 91 | 103 | ||
| 92 | /* | 104 | /* |
| 93 | * struct x86_pmu - generic x86 pmu | 105 | * struct x86_pmu - generic x86 pmu |
| @@ -114,8 +126,9 @@ struct x86_pmu { | |||
| 114 | u64 intel_ctrl; | 126 | u64 intel_ctrl; |
| 115 | void (*enable_bts)(u64 config); | 127 | void (*enable_bts)(u64 config); |
| 116 | void (*disable_bts)(void); | 128 | void (*disable_bts)(void); |
| 117 | int (*get_event_idx)(struct cpu_hw_events *cpuc, | 129 | void (*get_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event, u64 *idxmsk); |
| 118 | struct hw_perf_event *hwc); | 130 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); |
| 131 | const struct event_constraint *event_constraints; | ||
| 119 | }; | 132 | }; |
| 120 | 133 | ||
| 121 | static struct x86_pmu x86_pmu __read_mostly; | 134 | static struct x86_pmu x86_pmu __read_mostly; |
| @@ -124,7 +137,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | |||
| 124 | .enabled = 1, | 137 | .enabled = 1, |
| 125 | }; | 138 | }; |
| 126 | 139 | ||
| 127 | static const struct event_constraint *event_constraints; | 140 | static int x86_perf_event_set_period(struct perf_event *event, |
| 141 | struct hw_perf_event *hwc, int idx); | ||
| 128 | 142 | ||
| 129 | /* | 143 | /* |
| 130 | * Not sure about some of these | 144 | * Not sure about some of these |
| @@ -171,14 +185,14 @@ static u64 p6_pmu_raw_event(u64 hw_event) | |||
| 171 | return hw_event & P6_EVNTSEL_MASK; | 185 | return hw_event & P6_EVNTSEL_MASK; |
| 172 | } | 186 | } |
| 173 | 187 | ||
| 174 | static const struct event_constraint intel_p6_event_constraints[] = | 188 | static struct event_constraint intel_p6_event_constraints[] = |
| 175 | { | 189 | { |
| 176 | EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | 190 | EVENT_CONSTRAINT(0xc1, 0x1, INTEL_ARCH_EVENT_MASK), /* FLOPS */ |
| 177 | EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | 191 | EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_COMP_OPS_EXE */ |
| 178 | EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ | 192 | EVENT_CONSTRAINT(0x11, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_ASSIST */ |
| 179 | EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | 193 | EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK), /* MUL */ |
| 180 | EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | 194 | EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK), /* DIV */ |
| 181 | EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | 195 | EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK), /* CYCLES_DIV_BUSY */ |
| 182 | EVENT_CONSTRAINT_END | 196 | EVENT_CONSTRAINT_END |
| 183 | }; | 197 | }; |
| 184 | 198 | ||
| @@ -196,32 +210,43 @@ static const u64 intel_perfmon_event_map[] = | |||
| 196 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | 210 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
| 197 | }; | 211 | }; |
| 198 | 212 | ||
| 199 | static const struct event_constraint intel_core_event_constraints[] = | 213 | static struct event_constraint intel_core_event_constraints[] = |
| 200 | { | 214 | { |
| 201 | EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | 215 | EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */ |
| 202 | EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | 216 | EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */ |
| 203 | EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | 217 | EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_COMP_OPS_EXE */ |
| 204 | EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | 218 | EVENT_CONSTRAINT(0x11, 0x2, INTEL_ARCH_EVENT_MASK), /* FP_ASSIST */ |
| 205 | EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | 219 | EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK), /* MUL */ |
| 206 | EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ | 220 | EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK), /* DIV */ |
| 207 | EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | 221 | EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK), /* CYCLES_DIV_BUSY */ |
| 208 | EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ | 222 | EVENT_CONSTRAINT(0x18, 0x1, INTEL_ARCH_EVENT_MASK), /* IDLE_DURING_DIV */ |
| 209 | EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ | 223 | EVENT_CONSTRAINT(0x19, 0x2, INTEL_ARCH_EVENT_MASK), /* DELAYED_BYPASS */ |
| 224 | EVENT_CONSTRAINT(0xa1, 0x1, INTEL_ARCH_EVENT_MASK), /* RS_UOPS_DISPATCH_CYCLES */ | ||
| 225 | EVENT_CONSTRAINT(0xcb, 0x1, INTEL_ARCH_EVENT_MASK), /* MEM_LOAD_RETIRED */ | ||
| 210 | EVENT_CONSTRAINT_END | 226 | EVENT_CONSTRAINT_END |
| 211 | }; | 227 | }; |
| 212 | 228 | ||
| 213 | static const struct event_constraint intel_nehalem_event_constraints[] = | 229 | static struct event_constraint intel_nehalem_event_constraints[] = |
| 214 | { | 230 | { |
| 215 | EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | 231 | EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */ |
| 216 | EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | 232 | EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */ |
| 217 | EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | 233 | EVENT_CONSTRAINT(0x40, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LD */ |
| 218 | EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ | 234 | EVENT_CONSTRAINT(0x41, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_ST */ |
| 219 | EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ | 235 | EVENT_CONSTRAINT(0x42, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK */ |
| 220 | EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ | 236 | EVENT_CONSTRAINT(0x43, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_ALL_REF */ |
| 221 | EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | 237 | EVENT_CONSTRAINT(0x4e, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_PREFETCH */ |
| 222 | EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ | 238 | EVENT_CONSTRAINT(0x4c, 0x3, INTEL_ARCH_EVENT_MASK), /* LOAD_HIT_PRE */ |
| 223 | EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ | 239 | EVENT_CONSTRAINT(0x51, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D */ |
| 224 | EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ | 240 | EVENT_CONSTRAINT(0x52, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ |
| 241 | EVENT_CONSTRAINT(0x53, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK_FB_HIT */ | ||
| 242 | EVENT_CONSTRAINT(0xc5, 0x3, INTEL_ARCH_EVENT_MASK), /* CACHE_LOCK_CYCLES */ | ||
| 243 | EVENT_CONSTRAINT_END | ||
| 244 | }; | ||
| 245 | |||
| 246 | static struct event_constraint intel_gen_event_constraints[] = | ||
| 247 | { | ||
| 248 | EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */ | ||
| 249 | EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */ | ||
| 225 | EVENT_CONSTRAINT_END | 250 | EVENT_CONSTRAINT_END |
| 226 | }; | 251 | }; |
| 227 | 252 | ||
| @@ -527,11 +552,11 @@ static u64 intel_pmu_raw_event(u64 hw_event) | |||
| 527 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | 552 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL |
| 528 | 553 | ||
| 529 | #define CORE_EVNTSEL_MASK \ | 554 | #define CORE_EVNTSEL_MASK \ |
| 530 | (CORE_EVNTSEL_EVENT_MASK | \ | 555 | (INTEL_ARCH_EVTSEL_MASK | \ |
| 531 | CORE_EVNTSEL_UNIT_MASK | \ | 556 | INTEL_ARCH_UNIT_MASK | \ |
| 532 | CORE_EVNTSEL_EDGE_MASK | \ | 557 | INTEL_ARCH_EDGE_MASK | \ |
| 533 | CORE_EVNTSEL_INV_MASK | \ | 558 | INTEL_ARCH_INV_MASK | \ |
| 534 | CORE_EVNTSEL_REG_MASK) | 559 | INTEL_ARCH_CNT_MASK) |
| 535 | 560 | ||
| 536 | return hw_event & CORE_EVNTSEL_MASK; | 561 | return hw_event & CORE_EVNTSEL_MASK; |
| 537 | } | 562 | } |
| @@ -1120,9 +1145,15 @@ static void amd_pmu_disable_all(void) | |||
| 1120 | 1145 | ||
| 1121 | void hw_perf_disable(void) | 1146 | void hw_perf_disable(void) |
| 1122 | { | 1147 | { |
| 1148 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 1149 | |||
| 1123 | if (!x86_pmu_initialized()) | 1150 | if (!x86_pmu_initialized()) |
| 1124 | return; | 1151 | return; |
| 1125 | return x86_pmu.disable_all(); | 1152 | |
| 1153 | if (cpuc->enabled) | ||
| 1154 | cpuc->n_added = 0; | ||
| 1155 | |||
| 1156 | x86_pmu.disable_all(); | ||
| 1126 | } | 1157 | } |
| 1127 | 1158 | ||
| 1128 | static void p6_pmu_enable_all(void) | 1159 | static void p6_pmu_enable_all(void) |
| @@ -1189,10 +1220,237 @@ static void amd_pmu_enable_all(void) | |||
| 1189 | } | 1220 | } |
| 1190 | } | 1221 | } |
| 1191 | 1222 | ||
| 1223 | static const struct pmu pmu; | ||
| 1224 | |||
| 1225 | static inline int is_x86_event(struct perf_event *event) | ||
| 1226 | { | ||
| 1227 | return event->pmu == &pmu; | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | ||
| 1231 | { | ||
| 1232 | int i, j , w, num; | ||
| 1233 | int weight, wmax; | ||
| 1234 | unsigned long *c; | ||
| 1235 | u64 constraints[X86_PMC_IDX_MAX][BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
| 1236 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
| 1237 | struct hw_perf_event *hwc; | ||
| 1238 | |||
| 1239 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
| 1240 | |||
| 1241 | for (i = 0; i < n; i++) { | ||
| 1242 | x86_pmu.get_event_constraints(cpuc, | ||
| 1243 | cpuc->event_list[i], | ||
| 1244 | constraints[i]); | ||
| 1245 | } | ||
| 1246 | |||
| 1247 | /* | ||
| 1248 | * weight = number of possible counters | ||
| 1249 | * | ||
| 1250 | * 1 = most constrained, only works on one counter | ||
| 1251 | * wmax = least constrained, works on any counter | ||
| 1252 | * | ||
| 1253 | * assign events to counters starting with most | ||
| 1254 | * constrained events. | ||
| 1255 | */ | ||
| 1256 | wmax = x86_pmu.num_events; | ||
| 1257 | |||
| 1258 | /* | ||
| 1259 | * when fixed event counters are present, | ||
| 1260 | * wmax is incremented by 1 to account | ||
| 1261 | * for one more choice | ||
| 1262 | */ | ||
| 1263 | if (x86_pmu.num_events_fixed) | ||
| 1264 | wmax++; | ||
| 1265 | |||
| 1266 | num = n; | ||
| 1267 | for (w = 1; num && w <= wmax; w++) { | ||
| 1268 | /* for each event */ | ||
| 1269 | for (i = 0; i < n; i++) { | ||
| 1270 | c = (unsigned long *)constraints[i]; | ||
| 1271 | hwc = &cpuc->event_list[i]->hw; | ||
| 1272 | |||
| 1273 | weight = bitmap_weight(c, X86_PMC_IDX_MAX); | ||
| 1274 | if (weight != w) | ||
| 1275 | continue; | ||
| 1276 | |||
| 1277 | /* | ||
| 1278 | * try to reuse previous assignment | ||
| 1279 | * | ||
| 1280 | * This is possible despite the fact that | ||
| 1281 | * events or events order may have changed. | ||
| 1282 | * | ||
| 1283 | * What matters is the level of constraints | ||
| 1284 | * of an event and this is constant for now. | ||
| 1285 | * | ||
| 1286 | * This is possible also because we always | ||
| 1287 | * scan from most to least constrained. Thus, | ||
| 1288 | * if a counter can be reused, it means no, | ||
| 1289 | * more constrained events, needed it. And | ||
| 1290 | * next events will either compete for it | ||
| 1291 | * (which cannot be solved anyway) or they | ||
| 1292 | * have fewer constraints, and they can use | ||
| 1293 | * another counter. | ||
| 1294 | */ | ||
| 1295 | j = hwc->idx; | ||
| 1296 | if (j != -1 && !test_bit(j, used_mask)) | ||
| 1297 | goto skip; | ||
| 1298 | |||
| 1299 | for_each_bit(j, c, X86_PMC_IDX_MAX) { | ||
| 1300 | if (!test_bit(j, used_mask)) | ||
| 1301 | break; | ||
| 1302 | } | ||
| 1303 | |||
| 1304 | if (j == X86_PMC_IDX_MAX) | ||
| 1305 | break; | ||
| 1306 | skip: | ||
| 1307 | set_bit(j, used_mask); | ||
| 1308 | |||
| 1309 | #if 0 | ||
| 1310 | pr_debug("CPU%d config=0x%llx idx=%d assign=%c\n", | ||
| 1311 | smp_processor_id(), | ||
| 1312 | hwc->config, | ||
| 1313 | j, | ||
| 1314 | assign ? 'y' : 'n'); | ||
| 1315 | #endif | ||
| 1316 | |||
| 1317 | if (assign) | ||
| 1318 | assign[i] = j; | ||
| 1319 | num--; | ||
| 1320 | } | ||
| 1321 | } | ||
| 1322 | /* | ||
| 1323 | * scheduling failed or is just a simulation, | ||
| 1324 | * free resources if necessary | ||
| 1325 | */ | ||
| 1326 | if (!assign || num) { | ||
| 1327 | for (i = 0; i < n; i++) { | ||
| 1328 | if (x86_pmu.put_event_constraints) | ||
| 1329 | x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); | ||
| 1330 | } | ||
| 1331 | } | ||
| 1332 | return num ? -ENOSPC : 0; | ||
| 1333 | } | ||
| 1334 | |||
| 1335 | /* | ||
| 1336 | * dogrp: true if must collect siblings events (group) | ||
| 1337 | * returns total number of events and error code | ||
| 1338 | */ | ||
| 1339 | static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp) | ||
| 1340 | { | ||
| 1341 | struct perf_event *event; | ||
| 1342 | int n, max_count; | ||
| 1343 | |||
| 1344 | max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; | ||
| 1345 | |||
| 1346 | /* current number of events already accepted */ | ||
| 1347 | n = cpuc->n_events; | ||
| 1348 | |||
| 1349 | if (is_x86_event(leader)) { | ||
| 1350 | if (n >= max_count) | ||
| 1351 | return -ENOSPC; | ||
| 1352 | cpuc->event_list[n] = leader; | ||
| 1353 | n++; | ||
| 1354 | } | ||
| 1355 | if (!dogrp) | ||
| 1356 | return n; | ||
| 1357 | |||
| 1358 | list_for_each_entry(event, &leader->sibling_list, group_entry) { | ||
| 1359 | if (!is_x86_event(event) || | ||
| 1360 | event->state == PERF_EVENT_STATE_OFF) | ||
| 1361 | continue; | ||
| 1362 | |||
| 1363 | if (n >= max_count) | ||
| 1364 | return -ENOSPC; | ||
| 1365 | |||
| 1366 | cpuc->event_list[n] = event; | ||
| 1367 | n++; | ||
| 1368 | } | ||
| 1369 | return n; | ||
| 1370 | } | ||
| 1371 | |||
| 1372 | |||
| 1373 | static inline void x86_assign_hw_event(struct perf_event *event, | ||
| 1374 | struct hw_perf_event *hwc, int idx) | ||
| 1375 | { | ||
| 1376 | hwc->idx = idx; | ||
| 1377 | |||
| 1378 | if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { | ||
| 1379 | hwc->config_base = 0; | ||
| 1380 | hwc->event_base = 0; | ||
| 1381 | } else if (hwc->idx >= X86_PMC_IDX_FIXED) { | ||
| 1382 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | ||
| 1383 | /* | ||
| 1384 | * We set it so that event_base + idx in wrmsr/rdmsr maps to | ||
| 1385 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | ||
| 1386 | */ | ||
| 1387 | hwc->event_base = | ||
| 1388 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | ||
| 1389 | } else { | ||
| 1390 | hwc->config_base = x86_pmu.eventsel; | ||
| 1391 | hwc->event_base = x86_pmu.perfctr; | ||
| 1392 | } | ||
| 1393 | } | ||
| 1394 | |||
| 1192 | void hw_perf_enable(void) | 1395 | void hw_perf_enable(void) |
| 1193 | { | 1396 | { |
| 1397 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 1398 | struct perf_event *event; | ||
| 1399 | struct hw_perf_event *hwc; | ||
| 1400 | int i; | ||
| 1401 | |||
| 1194 | if (!x86_pmu_initialized()) | 1402 | if (!x86_pmu_initialized()) |
| 1195 | return; | 1403 | return; |
| 1404 | if (cpuc->n_added) { | ||
| 1405 | /* | ||
| 1406 | * apply assignment obtained either from | ||
| 1407 | * hw_perf_group_sched_in() or x86_pmu_enable() | ||
| 1408 | * | ||
| 1409 | * step1: save events moving to new counters | ||
| 1410 | * step2: reprogram moved events into new counters | ||
| 1411 | */ | ||
| 1412 | for (i = 0; i < cpuc->n_events; i++) { | ||
| 1413 | |||
| 1414 | event = cpuc->event_list[i]; | ||
| 1415 | hwc = &event->hw; | ||
| 1416 | |||
| 1417 | if (hwc->idx == -1 || hwc->idx == cpuc->assign[i]) | ||
| 1418 | continue; | ||
| 1419 | |||
| 1420 | x86_pmu.disable(hwc, hwc->idx); | ||
| 1421 | |||
| 1422 | clear_bit(hwc->idx, cpuc->active_mask); | ||
| 1423 | barrier(); | ||
| 1424 | cpuc->events[hwc->idx] = NULL; | ||
| 1425 | |||
| 1426 | x86_perf_event_update(event, hwc, hwc->idx); | ||
| 1427 | |||
| 1428 | hwc->idx = -1; | ||
| 1429 | } | ||
| 1430 | |||
| 1431 | for (i = 0; i < cpuc->n_events; i++) { | ||
| 1432 | |||
| 1433 | event = cpuc->event_list[i]; | ||
| 1434 | hwc = &event->hw; | ||
| 1435 | |||
| 1436 | if (hwc->idx == -1) { | ||
| 1437 | x86_assign_hw_event(event, hwc, cpuc->assign[i]); | ||
| 1438 | x86_perf_event_set_period(event, hwc, hwc->idx); | ||
| 1439 | } | ||
| 1440 | /* | ||
| 1441 | * need to mark as active because x86_pmu_disable() | ||
| 1442 | * clear active_mask and eventsp[] yet it preserves | ||
| 1443 | * idx | ||
| 1444 | */ | ||
| 1445 | set_bit(hwc->idx, cpuc->active_mask); | ||
| 1446 | cpuc->events[hwc->idx] = event; | ||
| 1447 | |||
| 1448 | x86_pmu.enable(hwc, hwc->idx); | ||
| 1449 | perf_event_update_userpage(event); | ||
| 1450 | } | ||
| 1451 | cpuc->n_added = 0; | ||
| 1452 | perf_events_lapic_init(); | ||
| 1453 | } | ||
| 1196 | x86_pmu.enable_all(); | 1454 | x86_pmu.enable_all(); |
| 1197 | } | 1455 | } |
| 1198 | 1456 | ||
| @@ -1391,148 +1649,43 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) | |||
| 1391 | x86_pmu_enable_event(hwc, idx); | 1649 | x86_pmu_enable_event(hwc, idx); |
| 1392 | } | 1650 | } |
| 1393 | 1651 | ||
| 1394 | static int fixed_mode_idx(struct hw_perf_event *hwc) | ||
| 1395 | { | ||
| 1396 | unsigned int hw_event; | ||
| 1397 | |||
| 1398 | hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; | ||
| 1399 | |||
| 1400 | if (unlikely((hw_event == | ||
| 1401 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | ||
| 1402 | (hwc->sample_period == 1))) | ||
| 1403 | return X86_PMC_IDX_FIXED_BTS; | ||
| 1404 | |||
| 1405 | if (!x86_pmu.num_events_fixed) | ||
| 1406 | return -1; | ||
| 1407 | |||
| 1408 | /* | ||
| 1409 | * fixed counters do not take all possible filters | ||
| 1410 | */ | ||
| 1411 | if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) | ||
| 1412 | return -1; | ||
| 1413 | |||
| 1414 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | ||
| 1415 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | ||
| 1416 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) | ||
| 1417 | return X86_PMC_IDX_FIXED_CPU_CYCLES; | ||
| 1418 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) | ||
| 1419 | return X86_PMC_IDX_FIXED_BUS_CYCLES; | ||
| 1420 | |||
| 1421 | return -1; | ||
| 1422 | } | ||
| 1423 | |||
| 1424 | /* | ||
| 1425 | * generic counter allocator: get next free counter | ||
| 1426 | */ | ||
| 1427 | static int | ||
| 1428 | gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
| 1429 | { | ||
| 1430 | int idx; | ||
| 1431 | |||
| 1432 | idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); | ||
| 1433 | return idx == x86_pmu.num_events ? -1 : idx; | ||
| 1434 | } | ||
| 1435 | |||
| 1436 | /* | 1652 | /* |
| 1437 | * intel-specific counter allocator: check event constraints | 1653 | * activate a single event |
| 1438 | */ | 1654 | * |
| 1439 | static int | 1655 | * The event is added to the group of enabled events |
| 1440 | intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | 1656 | * but only if it can be scehduled with existing events. |
| 1441 | { | 1657 | * |
| 1442 | const struct event_constraint *event_constraint; | 1658 | * Called with PMU disabled. If successful and return value 1, |
| 1443 | int i, code; | 1659 | * then guaranteed to call perf_enable() and hw_perf_enable() |
| 1444 | |||
| 1445 | if (!event_constraints) | ||
| 1446 | goto skip; | ||
| 1447 | |||
| 1448 | code = hwc->config & CORE_EVNTSEL_EVENT_MASK; | ||
| 1449 | |||
| 1450 | for_each_event_constraint(event_constraint, event_constraints) { | ||
| 1451 | if (code == event_constraint->code) { | ||
| 1452 | for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { | ||
| 1453 | if (!test_and_set_bit(i, cpuc->used_mask)) | ||
| 1454 | return i; | ||
| 1455 | } | ||
| 1456 | return -1; | ||
| 1457 | } | ||
| 1458 | } | ||
| 1459 | skip: | ||
| 1460 | return gen_get_event_idx(cpuc, hwc); | ||
| 1461 | } | ||
| 1462 | |||
| 1463 | static int | ||
| 1464 | x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
| 1465 | { | ||
| 1466 | int idx; | ||
| 1467 | |||
| 1468 | idx = fixed_mode_idx(hwc); | ||
| 1469 | if (idx == X86_PMC_IDX_FIXED_BTS) { | ||
| 1470 | /* BTS is already occupied. */ | ||
| 1471 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
| 1472 | return -EAGAIN; | ||
| 1473 | |||
| 1474 | hwc->config_base = 0; | ||
| 1475 | hwc->event_base = 0; | ||
| 1476 | hwc->idx = idx; | ||
| 1477 | } else if (idx >= 0) { | ||
| 1478 | /* | ||
| 1479 | * Try to get the fixed event, if that is already taken | ||
| 1480 | * then try to get a generic event: | ||
| 1481 | */ | ||
| 1482 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
| 1483 | goto try_generic; | ||
| 1484 | |||
| 1485 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | ||
| 1486 | /* | ||
| 1487 | * We set it so that event_base + idx in wrmsr/rdmsr maps to | ||
| 1488 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | ||
| 1489 | */ | ||
| 1490 | hwc->event_base = | ||
| 1491 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | ||
| 1492 | hwc->idx = idx; | ||
| 1493 | } else { | ||
| 1494 | idx = hwc->idx; | ||
| 1495 | /* Try to get the previous generic event again */ | ||
| 1496 | if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { | ||
| 1497 | try_generic: | ||
| 1498 | idx = x86_pmu.get_event_idx(cpuc, hwc); | ||
| 1499 | if (idx == -1) | ||
| 1500 | return -EAGAIN; | ||
| 1501 | |||
| 1502 | set_bit(idx, cpuc->used_mask); | ||
| 1503 | hwc->idx = idx; | ||
| 1504 | } | ||
| 1505 | hwc->config_base = x86_pmu.eventsel; | ||
| 1506 | hwc->event_base = x86_pmu.perfctr; | ||
| 1507 | } | ||
| 1508 | |||
| 1509 | return idx; | ||
| 1510 | } | ||
| 1511 | |||
| 1512 | /* | ||
| 1513 | * Find a PMC slot for the freshly enabled / scheduled in event: | ||
| 1514 | */ | 1660 | */ |
| 1515 | static int x86_pmu_enable(struct perf_event *event) | 1661 | static int x86_pmu_enable(struct perf_event *event) |
| 1516 | { | 1662 | { |
| 1517 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1663 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 1518 | struct hw_perf_event *hwc = &event->hw; | 1664 | struct hw_perf_event *hwc; |
| 1519 | int idx; | 1665 | int assign[X86_PMC_IDX_MAX]; |
| 1666 | int n, n0, ret; | ||
| 1520 | 1667 | ||
| 1521 | idx = x86_schedule_event(cpuc, hwc); | 1668 | hwc = &event->hw; |
| 1522 | if (idx < 0) | ||
| 1523 | return idx; | ||
| 1524 | 1669 | ||
| 1525 | perf_events_lapic_init(); | 1670 | n0 = cpuc->n_events; |
| 1671 | n = collect_events(cpuc, event, false); | ||
| 1672 | if (n < 0) | ||
| 1673 | return n; | ||
| 1526 | 1674 | ||
| 1527 | x86_pmu.disable(hwc, idx); | 1675 | ret = x86_schedule_events(cpuc, n, assign); |
| 1528 | 1676 | if (ret) | |
| 1529 | cpuc->events[idx] = event; | 1677 | return ret; |
| 1530 | set_bit(idx, cpuc->active_mask); | 1678 | /* |
| 1679 | * copy new assignment, now we know it is possible | ||
| 1680 | * will be used by hw_perf_enable() | ||
| 1681 | */ | ||
| 1682 | memcpy(cpuc->assign, assign, n*sizeof(int)); | ||
| 1531 | 1683 | ||
| 1532 | x86_perf_event_set_period(event, hwc, idx); | 1684 | cpuc->n_events = n; |
| 1533 | x86_pmu.enable(hwc, idx); | 1685 | cpuc->n_added = n - n0; |
| 1534 | 1686 | ||
| 1535 | perf_event_update_userpage(event); | 1687 | if (hwc->idx != -1) |
| 1688 | x86_perf_event_set_period(event, hwc, hwc->idx); | ||
| 1536 | 1689 | ||
| 1537 | return 0; | 1690 | return 0; |
| 1538 | } | 1691 | } |
| @@ -1576,7 +1729,7 @@ void perf_event_print_debug(void) | |||
| 1576 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | 1729 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); |
| 1577 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | 1730 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); |
| 1578 | } | 1731 | } |
| 1579 | pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); | 1732 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
| 1580 | 1733 | ||
| 1581 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1734 | for (idx = 0; idx < x86_pmu.num_events; idx++) { |
| 1582 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1735 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); |
| @@ -1664,7 +1817,7 @@ static void x86_pmu_disable(struct perf_event *event) | |||
| 1664 | { | 1817 | { |
| 1665 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1818 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 1666 | struct hw_perf_event *hwc = &event->hw; | 1819 | struct hw_perf_event *hwc = &event->hw; |
| 1667 | int idx = hwc->idx; | 1820 | int i, idx = hwc->idx; |
| 1668 | 1821 | ||
| 1669 | /* | 1822 | /* |
| 1670 | * Must be done before we disable, otherwise the nmi handler | 1823 | * Must be done before we disable, otherwise the nmi handler |
| @@ -1690,8 +1843,19 @@ static void x86_pmu_disable(struct perf_event *event) | |||
| 1690 | intel_pmu_drain_bts_buffer(cpuc); | 1843 | intel_pmu_drain_bts_buffer(cpuc); |
| 1691 | 1844 | ||
| 1692 | cpuc->events[idx] = NULL; | 1845 | cpuc->events[idx] = NULL; |
| 1693 | clear_bit(idx, cpuc->used_mask); | ||
| 1694 | 1846 | ||
| 1847 | for (i = 0; i < cpuc->n_events; i++) { | ||
| 1848 | if (event == cpuc->event_list[i]) { | ||
| 1849 | |||
| 1850 | if (x86_pmu.put_event_constraints) | ||
| 1851 | x86_pmu.put_event_constraints(cpuc, event); | ||
| 1852 | |||
| 1853 | while (++i < cpuc->n_events) | ||
| 1854 | cpuc->event_list[i-1] = cpuc->event_list[i]; | ||
| 1855 | |||
| 1856 | --cpuc->n_events; | ||
| 1857 | } | ||
| 1858 | } | ||
| 1695 | perf_event_update_userpage(event); | 1859 | perf_event_update_userpage(event); |
| 1696 | } | 1860 | } |
| 1697 | 1861 | ||
| @@ -1962,6 +2126,176 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
| 1962 | return NOTIFY_STOP; | 2126 | return NOTIFY_STOP; |
| 1963 | } | 2127 | } |
| 1964 | 2128 | ||
| 2129 | static struct event_constraint bts_constraint = { | ||
| 2130 | .code = 0, | ||
| 2131 | .cmask = 0, | ||
| 2132 | .idxmsk[0] = 1ULL << X86_PMC_IDX_FIXED_BTS | ||
| 2133 | }; | ||
| 2134 | |||
| 2135 | static int intel_special_constraints(struct perf_event *event, | ||
| 2136 | u64 *idxmsk) | ||
| 2137 | { | ||
| 2138 | unsigned int hw_event; | ||
| 2139 | |||
| 2140 | hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
| 2141 | |||
| 2142 | if (unlikely((hw_event == | ||
| 2143 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | ||
| 2144 | (event->hw.sample_period == 1))) { | ||
| 2145 | |||
| 2146 | bitmap_copy((unsigned long *)idxmsk, | ||
| 2147 | (unsigned long *)bts_constraint.idxmsk, | ||
| 2148 | X86_PMC_IDX_MAX); | ||
| 2149 | return 1; | ||
| 2150 | } | ||
| 2151 | return 0; | ||
| 2152 | } | ||
| 2153 | |||
| 2154 | static void intel_get_event_constraints(struct cpu_hw_events *cpuc, | ||
| 2155 | struct perf_event *event, | ||
| 2156 | u64 *idxmsk) | ||
| 2157 | { | ||
| 2158 | const struct event_constraint *c; | ||
| 2159 | |||
| 2160 | /* | ||
| 2161 | * cleanup bitmask | ||
| 2162 | */ | ||
| 2163 | bitmap_zero((unsigned long *)idxmsk, X86_PMC_IDX_MAX); | ||
| 2164 | |||
| 2165 | if (intel_special_constraints(event, idxmsk)) | ||
| 2166 | return; | ||
| 2167 | |||
| 2168 | if (x86_pmu.event_constraints) { | ||
| 2169 | for_each_event_constraint(c, x86_pmu.event_constraints) { | ||
| 2170 | if ((event->hw.config & c->cmask) == c->code) { | ||
| 2171 | |||
| 2172 | bitmap_copy((unsigned long *)idxmsk, | ||
| 2173 | (unsigned long *)c->idxmsk, | ||
| 2174 | X86_PMC_IDX_MAX); | ||
| 2175 | return; | ||
| 2176 | } | ||
| 2177 | } | ||
| 2178 | } | ||
| 2179 | /* no constraints, means supports all generic counters */ | ||
| 2180 | bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events); | ||
| 2181 | } | ||
| 2182 | |||
| 2183 | static void amd_get_event_constraints(struct cpu_hw_events *cpuc, | ||
| 2184 | struct perf_event *event, | ||
| 2185 | u64 *idxmsk) | ||
| 2186 | { | ||
| 2187 | } | ||
| 2188 | |||
| 2189 | static int x86_event_sched_in(struct perf_event *event, | ||
| 2190 | struct perf_cpu_context *cpuctx, int cpu) | ||
| 2191 | { | ||
| 2192 | int ret = 0; | ||
| 2193 | |||
| 2194 | event->state = PERF_EVENT_STATE_ACTIVE; | ||
| 2195 | event->oncpu = cpu; | ||
| 2196 | event->tstamp_running += event->ctx->time - event->tstamp_stopped; | ||
| 2197 | |||
| 2198 | if (!is_x86_event(event)) | ||
| 2199 | ret = event->pmu->enable(event); | ||
| 2200 | |||
| 2201 | if (!ret && !is_software_event(event)) | ||
| 2202 | cpuctx->active_oncpu++; | ||
| 2203 | |||
| 2204 | if (!ret && event->attr.exclusive) | ||
| 2205 | cpuctx->exclusive = 1; | ||
| 2206 | |||
| 2207 | return ret; | ||
| 2208 | } | ||
| 2209 | |||
| 2210 | static void x86_event_sched_out(struct perf_event *event, | ||
| 2211 | struct perf_cpu_context *cpuctx, int cpu) | ||
| 2212 | { | ||
| 2213 | event->state = PERF_EVENT_STATE_INACTIVE; | ||
| 2214 | event->oncpu = -1; | ||
| 2215 | |||
| 2216 | if (!is_x86_event(event)) | ||
| 2217 | event->pmu->disable(event); | ||
| 2218 | |||
| 2219 | event->tstamp_running -= event->ctx->time - event->tstamp_stopped; | ||
| 2220 | |||
| 2221 | if (!is_software_event(event)) | ||
| 2222 | cpuctx->active_oncpu--; | ||
| 2223 | |||
| 2224 | if (event->attr.exclusive || !cpuctx->active_oncpu) | ||
| 2225 | cpuctx->exclusive = 0; | ||
| 2226 | } | ||
| 2227 | |||
| 2228 | /* | ||
| 2229 | * Called to enable a whole group of events. | ||
| 2230 | * Returns 1 if the group was enabled, or -EAGAIN if it could not be. | ||
| 2231 | * Assumes the caller has disabled interrupts and has | ||
| 2232 | * frozen the PMU with hw_perf_save_disable. | ||
| 2233 | * | ||
| 2234 | * called with PMU disabled. If successful and return value 1, | ||
| 2235 | * then guaranteed to call perf_enable() and hw_perf_enable() | ||
| 2236 | */ | ||
| 2237 | int hw_perf_group_sched_in(struct perf_event *leader, | ||
| 2238 | struct perf_cpu_context *cpuctx, | ||
| 2239 | struct perf_event_context *ctx, int cpu) | ||
| 2240 | { | ||
| 2241 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
| 2242 | struct perf_event *sub; | ||
| 2243 | int assign[X86_PMC_IDX_MAX]; | ||
| 2244 | int n0, n1, ret; | ||
| 2245 | |||
| 2246 | /* n0 = total number of events */ | ||
| 2247 | n0 = collect_events(cpuc, leader, true); | ||
| 2248 | if (n0 < 0) | ||
| 2249 | return n0; | ||
| 2250 | |||
| 2251 | ret = x86_schedule_events(cpuc, n0, assign); | ||
| 2252 | if (ret) | ||
| 2253 | return ret; | ||
| 2254 | |||
| 2255 | ret = x86_event_sched_in(leader, cpuctx, cpu); | ||
| 2256 | if (ret) | ||
| 2257 | return ret; | ||
| 2258 | |||
| 2259 | n1 = 1; | ||
| 2260 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
| 2261 | if (sub->state != PERF_EVENT_STATE_OFF) { | ||
| 2262 | ret = x86_event_sched_in(sub, cpuctx, cpu); | ||
| 2263 | if (ret) | ||
| 2264 | goto undo; | ||
| 2265 | ++n1; | ||
| 2266 | } | ||
| 2267 | } | ||
| 2268 | /* | ||
| 2269 | * copy new assignment, now we know it is possible | ||
| 2270 | * will be used by hw_perf_enable() | ||
| 2271 | */ | ||
| 2272 | memcpy(cpuc->assign, assign, n0*sizeof(int)); | ||
| 2273 | |||
| 2274 | cpuc->n_events = n0; | ||
| 2275 | cpuc->n_added = n1; | ||
| 2276 | ctx->nr_active += n1; | ||
| 2277 | |||
| 2278 | /* | ||
| 2279 | * 1 means successful and events are active | ||
| 2280 | * This is not quite true because we defer | ||
| 2281 | * actual activation until hw_perf_enable() but | ||
| 2282 | * this way we* ensure caller won't try to enable | ||
| 2283 | * individual events | ||
| 2284 | */ | ||
| 2285 | return 1; | ||
| 2286 | undo: | ||
| 2287 | x86_event_sched_out(leader, cpuctx, cpu); | ||
| 2288 | n0 = 1; | ||
| 2289 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
| 2290 | if (sub->state == PERF_EVENT_STATE_ACTIVE) { | ||
| 2291 | x86_event_sched_out(sub, cpuctx, cpu); | ||
| 2292 | if (++n0 == n1) | ||
| 2293 | break; | ||
| 2294 | } | ||
| 2295 | } | ||
| 2296 | return ret; | ||
| 2297 | } | ||
| 2298 | |||
| 1965 | static __read_mostly struct notifier_block perf_event_nmi_notifier = { | 2299 | static __read_mostly struct notifier_block perf_event_nmi_notifier = { |
| 1966 | .notifier_call = perf_event_nmi_handler, | 2300 | .notifier_call = perf_event_nmi_handler, |
| 1967 | .next = NULL, | 2301 | .next = NULL, |
| @@ -1993,7 +2327,8 @@ static __initconst struct x86_pmu p6_pmu = { | |||
| 1993 | */ | 2327 | */ |
| 1994 | .event_bits = 32, | 2328 | .event_bits = 32, |
| 1995 | .event_mask = (1ULL << 32) - 1, | 2329 | .event_mask = (1ULL << 32) - 1, |
| 1996 | .get_event_idx = intel_get_event_idx, | 2330 | .get_event_constraints = intel_get_event_constraints, |
| 2331 | .event_constraints = intel_p6_event_constraints | ||
| 1997 | }; | 2332 | }; |
| 1998 | 2333 | ||
| 1999 | static __initconst struct x86_pmu intel_pmu = { | 2334 | static __initconst struct x86_pmu intel_pmu = { |
| @@ -2017,7 +2352,7 @@ static __initconst struct x86_pmu intel_pmu = { | |||
| 2017 | .max_period = (1ULL << 31) - 1, | 2352 | .max_period = (1ULL << 31) - 1, |
| 2018 | .enable_bts = intel_pmu_enable_bts, | 2353 | .enable_bts = intel_pmu_enable_bts, |
| 2019 | .disable_bts = intel_pmu_disable_bts, | 2354 | .disable_bts = intel_pmu_disable_bts, |
| 2020 | .get_event_idx = intel_get_event_idx, | 2355 | .get_event_constraints = intel_get_event_constraints |
| 2021 | }; | 2356 | }; |
| 2022 | 2357 | ||
| 2023 | static __initconst struct x86_pmu amd_pmu = { | 2358 | static __initconst struct x86_pmu amd_pmu = { |
| @@ -2038,7 +2373,7 @@ static __initconst struct x86_pmu amd_pmu = { | |||
| 2038 | .apic = 1, | 2373 | .apic = 1, |
| 2039 | /* use highest bit to detect overflow */ | 2374 | /* use highest bit to detect overflow */ |
| 2040 | .max_period = (1ULL << 47) - 1, | 2375 | .max_period = (1ULL << 47) - 1, |
| 2041 | .get_event_idx = gen_get_event_idx, | 2376 | .get_event_constraints = amd_get_event_constraints |
| 2042 | }; | 2377 | }; |
| 2043 | 2378 | ||
| 2044 | static __init int p6_pmu_init(void) | 2379 | static __init int p6_pmu_init(void) |
| @@ -2051,12 +2386,9 @@ static __init int p6_pmu_init(void) | |||
| 2051 | case 7: | 2386 | case 7: |
| 2052 | case 8: | 2387 | case 8: |
| 2053 | case 11: /* Pentium III */ | 2388 | case 11: /* Pentium III */ |
| 2054 | event_constraints = intel_p6_event_constraints; | ||
| 2055 | break; | ||
| 2056 | case 9: | 2389 | case 9: |
| 2057 | case 13: | 2390 | case 13: |
| 2058 | /* Pentium M */ | 2391 | /* Pentium M */ |
| 2059 | event_constraints = intel_p6_event_constraints; | ||
| 2060 | break; | 2392 | break; |
| 2061 | default: | 2393 | default: |
| 2062 | pr_cont("unsupported p6 CPU model %d ", | 2394 | pr_cont("unsupported p6 CPU model %d ", |
| @@ -2121,23 +2453,29 @@ static __init int intel_pmu_init(void) | |||
| 2121 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | 2453 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, |
| 2122 | sizeof(hw_cache_event_ids)); | 2454 | sizeof(hw_cache_event_ids)); |
| 2123 | 2455 | ||
| 2456 | x86_pmu.event_constraints = intel_core_event_constraints; | ||
| 2124 | pr_cont("Core2 events, "); | 2457 | pr_cont("Core2 events, "); |
| 2125 | event_constraints = intel_core_event_constraints; | ||
| 2126 | break; | 2458 | break; |
| 2127 | default: | ||
| 2128 | case 26: | 2459 | case 26: |
| 2129 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 2460 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
| 2130 | sizeof(hw_cache_event_ids)); | 2461 | sizeof(hw_cache_event_ids)); |
| 2131 | 2462 | ||
| 2132 | event_constraints = intel_nehalem_event_constraints; | 2463 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
| 2133 | pr_cont("Nehalem/Corei7 events, "); | 2464 | pr_cont("Nehalem/Corei7 events, "); |
| 2134 | break; | 2465 | break; |
| 2135 | case 28: | 2466 | case 28: |
| 2136 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | 2467 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
| 2137 | sizeof(hw_cache_event_ids)); | 2468 | sizeof(hw_cache_event_ids)); |
| 2138 | 2469 | ||
| 2470 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
| 2139 | pr_cont("Atom events, "); | 2471 | pr_cont("Atom events, "); |
| 2140 | break; | 2472 | break; |
| 2473 | default: | ||
| 2474 | /* | ||
| 2475 | * default constraints for v2 and up | ||
| 2476 | */ | ||
| 2477 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
| 2478 | pr_cont("generic architected perfmon, "); | ||
| 2141 | } | 2479 | } |
| 2142 | return 0; | 2480 | return 0; |
| 2143 | } | 2481 | } |
| @@ -2234,36 +2572,43 @@ static const struct pmu pmu = { | |||
| 2234 | .unthrottle = x86_pmu_unthrottle, | 2572 | .unthrottle = x86_pmu_unthrottle, |
| 2235 | }; | 2573 | }; |
| 2236 | 2574 | ||
| 2237 | static int | 2575 | /* |
| 2238 | validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) | 2576 | * validate a single event group |
| 2239 | { | 2577 | * |
| 2240 | struct hw_perf_event fake_event = event->hw; | 2578 | * validation include: |
| 2241 | 2579 | * - check events are compatible which each other | |
| 2242 | if (event->pmu && event->pmu != &pmu) | 2580 | * - events do not compete for the same counter |
| 2243 | return 0; | 2581 | * - number of events <= number of counters |
| 2244 | 2582 | * | |
| 2245 | return x86_schedule_event(cpuc, &fake_event) >= 0; | 2583 | * validation ensures the group can be loaded onto the |
| 2246 | } | 2584 | * PMU if it was the only group available. |
| 2247 | 2585 | */ | |
| 2248 | static int validate_group(struct perf_event *event) | 2586 | static int validate_group(struct perf_event *event) |
| 2249 | { | 2587 | { |
| 2250 | struct perf_event *sibling, *leader = event->group_leader; | 2588 | struct perf_event *leader = event->group_leader; |
| 2251 | struct cpu_hw_events fake_pmu; | 2589 | struct cpu_hw_events fake_cpuc; |
| 2590 | int n; | ||
| 2252 | 2591 | ||
| 2253 | memset(&fake_pmu, 0, sizeof(fake_pmu)); | 2592 | memset(&fake_cpuc, 0, sizeof(fake_cpuc)); |
| 2254 | 2593 | ||
| 2255 | if (!validate_event(&fake_pmu, leader)) | 2594 | /* |
| 2595 | * the event is not yet connected with its | ||
| 2596 | * siblings therefore we must first collect | ||
| 2597 | * existing siblings, then add the new event | ||
| 2598 | * before we can simulate the scheduling | ||
| 2599 | */ | ||
| 2600 | n = collect_events(&fake_cpuc, leader, true); | ||
| 2601 | if (n < 0) | ||
| 2256 | return -ENOSPC; | 2602 | return -ENOSPC; |
| 2257 | 2603 | ||
| 2258 | list_for_each_entry(sibling, &leader->sibling_list, group_entry) { | 2604 | fake_cpuc.n_events = n; |
| 2259 | if (!validate_event(&fake_pmu, sibling)) | 2605 | n = collect_events(&fake_cpuc, event, false); |
| 2260 | return -ENOSPC; | 2606 | if (n < 0) |
| 2261 | } | ||
| 2262 | |||
| 2263 | if (!validate_event(&fake_pmu, event)) | ||
| 2264 | return -ENOSPC; | 2607 | return -ENOSPC; |
| 2265 | 2608 | ||
| 2266 | return 0; | 2609 | fake_cpuc.n_events = n; |
| 2610 | |||
| 2611 | return x86_schedule_events(&fake_cpuc, n, NULL); | ||
| 2267 | } | 2612 | } |
| 2268 | 2613 | ||
| 2269 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 2614 | const struct pmu *hw_perf_event_init(struct perf_event *event) |
