aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event.c
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2010-01-21 10:39:01 -0500
committerIngo Molnar <mingo@elte.hu>2010-01-29 03:01:34 -0500
commit8113070d6639d2245c6c79afb8df42cedab30540 (patch)
treee3735ea7f38bbfffcbda5e7610ee7f13ff5e7c83 /arch/x86/kernel/cpu/perf_event.c
parent1da53e023029c067ba1277a33038c65d6e4c99b3 (diff)
perf_events: Add fast-path to the rescheduling code
Implement correct fastpath scheduling, i.e., reuse previous assignment. Signed-off-by: Stephane Eranian <eranian@google.com> [ split from larger patch] Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <4b588464.1818d00a.4456.383b@mx.google.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c91
1 files changed, 61 insertions, 30 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 995ac4ae379c..0bd23d01af34 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1245,6 +1245,46 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
1245 } 1245 }
1246 1246
1247 /* 1247 /*
1248 * fastpath, try to reuse previous register
1249 */
1250 for (i = 0, num = n; i < n; i++, num--) {
1251 hwc = &cpuc->event_list[i]->hw;
1252 c = (unsigned long *)constraints[i];
1253
1254 /* never assigned */
1255 if (hwc->idx == -1)
1256 break;
1257
1258 /* constraint still honored */
1259 if (!test_bit(hwc->idx, c))
1260 break;
1261
1262 /* not already used */
1263 if (test_bit(hwc->idx, used_mask))
1264 break;
1265
1266#if 0
1267 pr_debug("CPU%d fast config=0x%llx idx=%d assign=%c\n",
1268 smp_processor_id(),
1269 hwc->config,
1270 hwc->idx,
1271 assign ? 'y' : 'n');
1272#endif
1273
1274 set_bit(hwc->idx, used_mask);
1275 if (assign)
1276 assign[i] = hwc->idx;
1277 }
1278 if (!num)
1279 goto done;
1280
1281 /*
1282 * begin slow path
1283 */
1284
1285 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1286
1287 /*
1248 * weight = number of possible counters 1288 * weight = number of possible counters
1249 * 1289 *
1250 * 1 = most constrained, only works on one counter 1290 * 1 = most constrained, only works on one counter
@@ -1263,10 +1303,9 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
1263 if (x86_pmu.num_events_fixed) 1303 if (x86_pmu.num_events_fixed)
1264 wmax++; 1304 wmax++;
1265 1305
1266 num = n; 1306 for (w = 1, num = n; num && w <= wmax; w++) {
1267 for (w = 1; num && w <= wmax; w++) {
1268 /* for each event */ 1307 /* for each event */
1269 for (i = 0; i < n; i++) { 1308 for (i = 0; num && i < n; i++) {
1270 c = (unsigned long *)constraints[i]; 1309 c = (unsigned long *)constraints[i];
1271 hwc = &cpuc->event_list[i]->hw; 1310 hwc = &cpuc->event_list[i]->hw;
1272 1311
@@ -1274,28 +1313,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
1274 if (weight != w) 1313 if (weight != w)
1275 continue; 1314 continue;
1276 1315
1277 /*
1278 * try to reuse previous assignment
1279 *
1280 * This is possible despite the fact that
1281 * events or events order may have changed.
1282 *
1283 * What matters is the level of constraints
1284 * of an event and this is constant for now.
1285 *
1286 * This is possible also because we always
1287 * scan from most to least constrained. Thus,
1288 * if a counter can be reused, it means no,
1289 * more constrained events, needed it. And
1290 * next events will either compete for it
1291 * (which cannot be solved anyway) or they
1292 * have fewer constraints, and they can use
1293 * another counter.
1294 */
1295 j = hwc->idx;
1296 if (j != -1 && !test_bit(j, used_mask))
1297 goto skip;
1298
1299 for_each_bit(j, c, X86_PMC_IDX_MAX) { 1316 for_each_bit(j, c, X86_PMC_IDX_MAX) {
1300 if (!test_bit(j, used_mask)) 1317 if (!test_bit(j, used_mask))
1301 break; 1318 break;
@@ -1303,22 +1320,23 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
1303 1320
1304 if (j == X86_PMC_IDX_MAX) 1321 if (j == X86_PMC_IDX_MAX)
1305 break; 1322 break;
1306skip:
1307 set_bit(j, used_mask);
1308 1323
1309#if 0 1324#if 0
1310 pr_debug("CPU%d config=0x%llx idx=%d assign=%c\n", 1325 pr_debug("CPU%d slow config=0x%llx idx=%d assign=%c\n",
1311 smp_processor_id(), 1326 smp_processor_id(),
1312 hwc->config, 1327 hwc->config,
1313 j, 1328 j,
1314 assign ? 'y' : 'n'); 1329 assign ? 'y' : 'n');
1315#endif 1330#endif
1316 1331
1332 set_bit(j, used_mask);
1333
1317 if (assign) 1334 if (assign)
1318 assign[i] = j; 1335 assign[i] = j;
1319 num--; 1336 num--;
1320 } 1337 }
1321 } 1338 }
1339done:
1322 /* 1340 /*
1323 * scheduling failed or is just a simulation, 1341 * scheduling failed or is just a simulation,
1324 * free resources if necessary 1342 * free resources if necessary
@@ -1357,7 +1375,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
1357 1375
1358 list_for_each_entry(event, &leader->sibling_list, group_entry) { 1376 list_for_each_entry(event, &leader->sibling_list, group_entry) {
1359 if (!is_x86_event(event) || 1377 if (!is_x86_event(event) ||
1360 event->state == PERF_EVENT_STATE_OFF) 1378 event->state <= PERF_EVENT_STATE_OFF)
1361 continue; 1379 continue;
1362 1380
1363 if (n >= max_count) 1381 if (n >= max_count)
@@ -2184,6 +2202,8 @@ static void amd_get_event_constraints(struct cpu_hw_events *cpuc,
2184 struct perf_event *event, 2202 struct perf_event *event,
2185 u64 *idxmsk) 2203 u64 *idxmsk)
2186{ 2204{
2205 /* no constraints, means supports all generic counters */
2206 bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events);
2187} 2207}
2188 2208
2189static int x86_event_sched_in(struct perf_event *event, 2209static int x86_event_sched_in(struct perf_event *event,
@@ -2258,7 +2278,7 @@ int hw_perf_group_sched_in(struct perf_event *leader,
2258 2278
2259 n1 = 1; 2279 n1 = 1;
2260 list_for_each_entry(sub, &leader->sibling_list, group_entry) { 2280 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
2261 if (sub->state != PERF_EVENT_STATE_OFF) { 2281 if (sub->state > PERF_EVENT_STATE_OFF) {
2262 ret = x86_event_sched_in(sub, cpuctx, cpu); 2282 ret = x86_event_sched_in(sub, cpuctx, cpu);
2263 if (ret) 2283 if (ret)
2264 goto undo; 2284 goto undo;
@@ -2613,12 +2633,23 @@ static int validate_group(struct perf_event *event)
2613 2633
2614const struct pmu *hw_perf_event_init(struct perf_event *event) 2634const struct pmu *hw_perf_event_init(struct perf_event *event)
2615{ 2635{
2636 const struct pmu *tmp;
2616 int err; 2637 int err;
2617 2638
2618 err = __hw_perf_event_init(event); 2639 err = __hw_perf_event_init(event);
2619 if (!err) { 2640 if (!err) {
2641 /*
2642 * we temporarily connect event to its pmu
2643 * such that validate_group() can classify
2644 * it as an x86 event using is_x86_event()
2645 */
2646 tmp = event->pmu;
2647 event->pmu = &pmu;
2648
2620 if (event->group_leader != event) 2649 if (event->group_leader != event)
2621 err = validate_group(event); 2650 err = validate_group(event);
2651
2652 event->pmu = tmp;
2622 } 2653 }
2623 if (err) { 2654 if (err) {
2624 if (event->destroy) 2655 if (event->destroy)