diff options
author | Stephane Eranian <eranian@google.com> | 2010-01-21 10:39:01 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-01-29 03:01:34 -0500 |
commit | 8113070d6639d2245c6c79afb8df42cedab30540 (patch) | |
tree | e3735ea7f38bbfffcbda5e7610ee7f13ff5e7c83 /arch/x86/kernel/cpu/perf_event.c | |
parent | 1da53e023029c067ba1277a33038c65d6e4c99b3 (diff) |
perf_events: Add fast-path to the rescheduling code
Implement correct fastpath scheduling, i.e., reuse previous assignment.
Signed-off-by: Stephane Eranian <eranian@google.com>
[ split from larger patch]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4b588464.1818d00a.4456.383b@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 91 |
1 files changed, 61 insertions, 30 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 995ac4ae379c..0bd23d01af34 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -1245,6 +1245,46 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
1245 | } | 1245 | } |
1246 | 1246 | ||
1247 | /* | 1247 | /* |
1248 | * fastpath, try to reuse previous register | ||
1249 | */ | ||
1250 | for (i = 0, num = n; i < n; i++, num--) { | ||
1251 | hwc = &cpuc->event_list[i]->hw; | ||
1252 | c = (unsigned long *)constraints[i]; | ||
1253 | |||
1254 | /* never assigned */ | ||
1255 | if (hwc->idx == -1) | ||
1256 | break; | ||
1257 | |||
1258 | /* constraint still honored */ | ||
1259 | if (!test_bit(hwc->idx, c)) | ||
1260 | break; | ||
1261 | |||
1262 | /* not already used */ | ||
1263 | if (test_bit(hwc->idx, used_mask)) | ||
1264 | break; | ||
1265 | |||
1266 | #if 0 | ||
1267 | pr_debug("CPU%d fast config=0x%llx idx=%d assign=%c\n", | ||
1268 | smp_processor_id(), | ||
1269 | hwc->config, | ||
1270 | hwc->idx, | ||
1271 | assign ? 'y' : 'n'); | ||
1272 | #endif | ||
1273 | |||
1274 | set_bit(hwc->idx, used_mask); | ||
1275 | if (assign) | ||
1276 | assign[i] = hwc->idx; | ||
1277 | } | ||
1278 | if (!num) | ||
1279 | goto done; | ||
1280 | |||
1281 | /* | ||
1282 | * begin slow path | ||
1283 | */ | ||
1284 | |||
1285 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
1286 | |||
1287 | /* | ||
1248 | * weight = number of possible counters | 1288 | * weight = number of possible counters |
1249 | * | 1289 | * |
1250 | * 1 = most constrained, only works on one counter | 1290 | * 1 = most constrained, only works on one counter |
@@ -1263,10 +1303,9 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
1263 | if (x86_pmu.num_events_fixed) | 1303 | if (x86_pmu.num_events_fixed) |
1264 | wmax++; | 1304 | wmax++; |
1265 | 1305 | ||
1266 | num = n; | 1306 | for (w = 1, num = n; num && w <= wmax; w++) { |
1267 | for (w = 1; num && w <= wmax; w++) { | ||
1268 | /* for each event */ | 1307 | /* for each event */ |
1269 | for (i = 0; i < n; i++) { | 1308 | for (i = 0; num && i < n; i++) { |
1270 | c = (unsigned long *)constraints[i]; | 1309 | c = (unsigned long *)constraints[i]; |
1271 | hwc = &cpuc->event_list[i]->hw; | 1310 | hwc = &cpuc->event_list[i]->hw; |
1272 | 1311 | ||
@@ -1274,28 +1313,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
1274 | if (weight != w) | 1313 | if (weight != w) |
1275 | continue; | 1314 | continue; |
1276 | 1315 | ||
1277 | /* | ||
1278 | * try to reuse previous assignment | ||
1279 | * | ||
1280 | * This is possible despite the fact that | ||
1281 | * events or events order may have changed. | ||
1282 | * | ||
1283 | * What matters is the level of constraints | ||
1284 | * of an event and this is constant for now. | ||
1285 | * | ||
1286 | * This is possible also because we always | ||
1287 | * scan from most to least constrained. Thus, | ||
1288 | * if a counter can be reused, it means no, | ||
1289 | * more constrained events, needed it. And | ||
1290 | * next events will either compete for it | ||
1291 | * (which cannot be solved anyway) or they | ||
1292 | * have fewer constraints, and they can use | ||
1293 | * another counter. | ||
1294 | */ | ||
1295 | j = hwc->idx; | ||
1296 | if (j != -1 && !test_bit(j, used_mask)) | ||
1297 | goto skip; | ||
1298 | |||
1299 | for_each_bit(j, c, X86_PMC_IDX_MAX) { | 1316 | for_each_bit(j, c, X86_PMC_IDX_MAX) { |
1300 | if (!test_bit(j, used_mask)) | 1317 | if (!test_bit(j, used_mask)) |
1301 | break; | 1318 | break; |
@@ -1303,22 +1320,23 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
1303 | 1320 | ||
1304 | if (j == X86_PMC_IDX_MAX) | 1321 | if (j == X86_PMC_IDX_MAX) |
1305 | break; | 1322 | break; |
1306 | skip: | ||
1307 | set_bit(j, used_mask); | ||
1308 | 1323 | ||
1309 | #if 0 | 1324 | #if 0 |
1310 | pr_debug("CPU%d config=0x%llx idx=%d assign=%c\n", | 1325 | pr_debug("CPU%d slow config=0x%llx idx=%d assign=%c\n", |
1311 | smp_processor_id(), | 1326 | smp_processor_id(), |
1312 | hwc->config, | 1327 | hwc->config, |
1313 | j, | 1328 | j, |
1314 | assign ? 'y' : 'n'); | 1329 | assign ? 'y' : 'n'); |
1315 | #endif | 1330 | #endif |
1316 | 1331 | ||
1332 | set_bit(j, used_mask); | ||
1333 | |||
1317 | if (assign) | 1334 | if (assign) |
1318 | assign[i] = j; | 1335 | assign[i] = j; |
1319 | num--; | 1336 | num--; |
1320 | } | 1337 | } |
1321 | } | 1338 | } |
1339 | done: | ||
1322 | /* | 1340 | /* |
1323 | * scheduling failed or is just a simulation, | 1341 | * scheduling failed or is just a simulation, |
1324 | * free resources if necessary | 1342 | * free resources if necessary |
@@ -1357,7 +1375,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, | |||
1357 | 1375 | ||
1358 | list_for_each_entry(event, &leader->sibling_list, group_entry) { | 1376 | list_for_each_entry(event, &leader->sibling_list, group_entry) { |
1359 | if (!is_x86_event(event) || | 1377 | if (!is_x86_event(event) || |
1360 | event->state == PERF_EVENT_STATE_OFF) | 1378 | event->state <= PERF_EVENT_STATE_OFF) |
1361 | continue; | 1379 | continue; |
1362 | 1380 | ||
1363 | if (n >= max_count) | 1381 | if (n >= max_count) |
@@ -2184,6 +2202,8 @@ static void amd_get_event_constraints(struct cpu_hw_events *cpuc, | |||
2184 | struct perf_event *event, | 2202 | struct perf_event *event, |
2185 | u64 *idxmsk) | 2203 | u64 *idxmsk) |
2186 | { | 2204 | { |
2205 | /* no constraints, means supports all generic counters */ | ||
2206 | bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events); | ||
2187 | } | 2207 | } |
2188 | 2208 | ||
2189 | static int x86_event_sched_in(struct perf_event *event, | 2209 | static int x86_event_sched_in(struct perf_event *event, |
@@ -2258,7 +2278,7 @@ int hw_perf_group_sched_in(struct perf_event *leader, | |||
2258 | 2278 | ||
2259 | n1 = 1; | 2279 | n1 = 1; |
2260 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | 2280 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { |
2261 | if (sub->state != PERF_EVENT_STATE_OFF) { | 2281 | if (sub->state > PERF_EVENT_STATE_OFF) { |
2262 | ret = x86_event_sched_in(sub, cpuctx, cpu); | 2282 | ret = x86_event_sched_in(sub, cpuctx, cpu); |
2263 | if (ret) | 2283 | if (ret) |
2264 | goto undo; | 2284 | goto undo; |
@@ -2613,12 +2633,23 @@ static int validate_group(struct perf_event *event) | |||
2613 | 2633 | ||
2614 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 2634 | const struct pmu *hw_perf_event_init(struct perf_event *event) |
2615 | { | 2635 | { |
2636 | const struct pmu *tmp; | ||
2616 | int err; | 2637 | int err; |
2617 | 2638 | ||
2618 | err = __hw_perf_event_init(event); | 2639 | err = __hw_perf_event_init(event); |
2619 | if (!err) { | 2640 | if (!err) { |
2641 | /* | ||
2642 | * we temporarily connect event to its pmu | ||
2643 | * such that validate_group() can classify | ||
2644 | * it as an x86 event using is_x86_event() | ||
2645 | */ | ||
2646 | tmp = event->pmu; | ||
2647 | event->pmu = &pmu; | ||
2648 | |||
2620 | if (event->group_leader != event) | 2649 | if (event->group_leader != event) |
2621 | err = validate_group(event); | 2650 | err = validate_group(event); |
2651 | |||
2652 | event->pmu = tmp; | ||
2622 | } | 2653 | } |
2623 | if (err) { | 2654 | if (err) { |
2624 | if (event->destroy) | 2655 | if (event->destroy) |