diff options
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 91 |
1 files changed, 61 insertions, 30 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 995ac4ae379c..0bd23d01af34 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
| @@ -1245,6 +1245,46 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
| 1245 | } | 1245 | } |
| 1246 | 1246 | ||
| 1247 | /* | 1247 | /* |
| 1248 | * fastpath, try to reuse previous register | ||
| 1249 | */ | ||
| 1250 | for (i = 0, num = n; i < n; i++, num--) { | ||
| 1251 | hwc = &cpuc->event_list[i]->hw; | ||
| 1252 | c = (unsigned long *)constraints[i]; | ||
| 1253 | |||
| 1254 | /* never assigned */ | ||
| 1255 | if (hwc->idx == -1) | ||
| 1256 | break; | ||
| 1257 | |||
| 1258 | /* constraint still honored */ | ||
| 1259 | if (!test_bit(hwc->idx, c)) | ||
| 1260 | break; | ||
| 1261 | |||
| 1262 | /* not already used */ | ||
| 1263 | if (test_bit(hwc->idx, used_mask)) | ||
| 1264 | break; | ||
| 1265 | |||
| 1266 | #if 0 | ||
| 1267 | pr_debug("CPU%d fast config=0x%llx idx=%d assign=%c\n", | ||
| 1268 | smp_processor_id(), | ||
| 1269 | hwc->config, | ||
| 1270 | hwc->idx, | ||
| 1271 | assign ? 'y' : 'n'); | ||
| 1272 | #endif | ||
| 1273 | |||
| 1274 | set_bit(hwc->idx, used_mask); | ||
| 1275 | if (assign) | ||
| 1276 | assign[i] = hwc->idx; | ||
| 1277 | } | ||
| 1278 | if (!num) | ||
| 1279 | goto done; | ||
| 1280 | |||
| 1281 | /* | ||
| 1282 | * begin slow path | ||
| 1283 | */ | ||
| 1284 | |||
| 1285 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
| 1286 | |||
| 1287 | /* | ||
| 1248 | * weight = number of possible counters | 1288 | * weight = number of possible counters |
| 1249 | * | 1289 | * |
| 1250 | * 1 = most constrained, only works on one counter | 1290 | * 1 = most constrained, only works on one counter |
| @@ -1263,10 +1303,9 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
| 1263 | if (x86_pmu.num_events_fixed) | 1303 | if (x86_pmu.num_events_fixed) |
| 1264 | wmax++; | 1304 | wmax++; |
| 1265 | 1305 | ||
| 1266 | num = n; | 1306 | for (w = 1, num = n; num && w <= wmax; w++) { |
| 1267 | for (w = 1; num && w <= wmax; w++) { | ||
| 1268 | /* for each event */ | 1307 | /* for each event */ |
| 1269 | for (i = 0; i < n; i++) { | 1308 | for (i = 0; num && i < n; i++) { |
| 1270 | c = (unsigned long *)constraints[i]; | 1309 | c = (unsigned long *)constraints[i]; |
| 1271 | hwc = &cpuc->event_list[i]->hw; | 1310 | hwc = &cpuc->event_list[i]->hw; |
| 1272 | 1311 | ||
| @@ -1274,28 +1313,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
| 1274 | if (weight != w) | 1313 | if (weight != w) |
| 1275 | continue; | 1314 | continue; |
| 1276 | 1315 | ||
| 1277 | /* | ||
| 1278 | * try to reuse previous assignment | ||
| 1279 | * | ||
| 1280 | * This is possible despite the fact that | ||
| 1281 | * events or events order may have changed. | ||
| 1282 | * | ||
| 1283 | * What matters is the level of constraints | ||
| 1284 | * of an event and this is constant for now. | ||
| 1285 | * | ||
| 1286 | * This is possible also because we always | ||
| 1287 | * scan from most to least constrained. Thus, | ||
| 1288 | * if a counter can be reused, it means no, | ||
| 1289 | * more constrained events, needed it. And | ||
| 1290 | * next events will either compete for it | ||
| 1291 | * (which cannot be solved anyway) or they | ||
| 1292 | * have fewer constraints, and they can use | ||
| 1293 | * another counter. | ||
| 1294 | */ | ||
| 1295 | j = hwc->idx; | ||
| 1296 | if (j != -1 && !test_bit(j, used_mask)) | ||
| 1297 | goto skip; | ||
| 1298 | |||
| 1299 | for_each_bit(j, c, X86_PMC_IDX_MAX) { | 1316 | for_each_bit(j, c, X86_PMC_IDX_MAX) { |
| 1300 | if (!test_bit(j, used_mask)) | 1317 | if (!test_bit(j, used_mask)) |
| 1301 | break; | 1318 | break; |
| @@ -1303,22 +1320,23 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
| 1303 | 1320 | ||
| 1304 | if (j == X86_PMC_IDX_MAX) | 1321 | if (j == X86_PMC_IDX_MAX) |
| 1305 | break; | 1322 | break; |
| 1306 | skip: | ||
| 1307 | set_bit(j, used_mask); | ||
| 1308 | 1323 | ||
| 1309 | #if 0 | 1324 | #if 0 |
| 1310 | pr_debug("CPU%d config=0x%llx idx=%d assign=%c\n", | 1325 | pr_debug("CPU%d slow config=0x%llx idx=%d assign=%c\n", |
| 1311 | smp_processor_id(), | 1326 | smp_processor_id(), |
| 1312 | hwc->config, | 1327 | hwc->config, |
| 1313 | j, | 1328 | j, |
| 1314 | assign ? 'y' : 'n'); | 1329 | assign ? 'y' : 'n'); |
| 1315 | #endif | 1330 | #endif |
| 1316 | 1331 | ||
| 1332 | set_bit(j, used_mask); | ||
| 1333 | |||
| 1317 | if (assign) | 1334 | if (assign) |
| 1318 | assign[i] = j; | 1335 | assign[i] = j; |
| 1319 | num--; | 1336 | num--; |
| 1320 | } | 1337 | } |
| 1321 | } | 1338 | } |
| 1339 | done: | ||
| 1322 | /* | 1340 | /* |
| 1323 | * scheduling failed or is just a simulation, | 1341 | * scheduling failed or is just a simulation, |
| 1324 | * free resources if necessary | 1342 | * free resources if necessary |
| @@ -1357,7 +1375,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, | |||
| 1357 | 1375 | ||
| 1358 | list_for_each_entry(event, &leader->sibling_list, group_entry) { | 1376 | list_for_each_entry(event, &leader->sibling_list, group_entry) { |
| 1359 | if (!is_x86_event(event) || | 1377 | if (!is_x86_event(event) || |
| 1360 | event->state == PERF_EVENT_STATE_OFF) | 1378 | event->state <= PERF_EVENT_STATE_OFF) |
| 1361 | continue; | 1379 | continue; |
| 1362 | 1380 | ||
| 1363 | if (n >= max_count) | 1381 | if (n >= max_count) |
| @@ -2184,6 +2202,8 @@ static void amd_get_event_constraints(struct cpu_hw_events *cpuc, | |||
| 2184 | struct perf_event *event, | 2202 | struct perf_event *event, |
| 2185 | u64 *idxmsk) | 2203 | u64 *idxmsk) |
| 2186 | { | 2204 | { |
| 2205 | /* no constraints, means supports all generic counters */ | ||
| 2206 | bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events); | ||
| 2187 | } | 2207 | } |
| 2188 | 2208 | ||
| 2189 | static int x86_event_sched_in(struct perf_event *event, | 2209 | static int x86_event_sched_in(struct perf_event *event, |
| @@ -2258,7 +2278,7 @@ int hw_perf_group_sched_in(struct perf_event *leader, | |||
| 2258 | 2278 | ||
| 2259 | n1 = 1; | 2279 | n1 = 1; |
| 2260 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | 2280 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { |
| 2261 | if (sub->state != PERF_EVENT_STATE_OFF) { | 2281 | if (sub->state > PERF_EVENT_STATE_OFF) { |
| 2262 | ret = x86_event_sched_in(sub, cpuctx, cpu); | 2282 | ret = x86_event_sched_in(sub, cpuctx, cpu); |
| 2263 | if (ret) | 2283 | if (ret) |
| 2264 | goto undo; | 2284 | goto undo; |
| @@ -2613,12 +2633,23 @@ static int validate_group(struct perf_event *event) | |||
| 2613 | 2633 | ||
| 2614 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 2634 | const struct pmu *hw_perf_event_init(struct perf_event *event) |
| 2615 | { | 2635 | { |
| 2636 | const struct pmu *tmp; | ||
| 2616 | int err; | 2637 | int err; |
| 2617 | 2638 | ||
| 2618 | err = __hw_perf_event_init(event); | 2639 | err = __hw_perf_event_init(event); |
| 2619 | if (!err) { | 2640 | if (!err) { |
| 2641 | /* | ||
| 2642 | * we temporarily connect event to its pmu | ||
| 2643 | * such that validate_group() can classify | ||
| 2644 | * it as an x86 event using is_x86_event() | ||
| 2645 | */ | ||
| 2646 | tmp = event->pmu; | ||
| 2647 | event->pmu = &pmu; | ||
| 2648 | |||
| 2620 | if (event->group_leader != event) | 2649 | if (event->group_leader != event) |
| 2621 | err = validate_group(event); | 2650 | err = validate_group(event); |
| 2651 | |||
| 2652 | event->pmu = tmp; | ||
| 2622 | } | 2653 | } |
| 2623 | if (err) { | 2654 | if (err) { |
| 2624 | if (event->destroy) | 2655 | if (event->destroy) |
