diff options
| author | Adrian Hunter <adrian.hunter@intel.com> | 2017-06-30 04:36:45 -0400 |
|---|---|---|
| committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2017-06-30 10:48:28 -0400 |
| commit | 3797307576191d7fb4c974cd461188162ac36f33 (patch) | |
| tree | 30e9304b9e1580d681981d8ddf1dff21aeb411bd /tools/perf | |
| parent | 4a9fd4e0effc94b9ec79250946a0054d4dd1a963 (diff) | |
perf intel-pt: Synthesize new power and "ptwrite" events
Synthesize new power and ptwrite events.
Power events report changes to C-state but I have also added support
for the existing CBR (core-to-bus ratio) packet and included that
when outputting power events.
The PTWRITE packet is associated with the new "ptwrite" instruction,
which is essentially just a way to stuff a 32 or 64 bit value into the
PT trace.
More details can be found in the patches that add documentation and in
the Intel SDM.
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/1498811805-2335-1-git-send-email-adrian.hunter@intel.com
[ Copy the description of such packet from the patchkit cover message ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
| -rw-r--r-- | tools/perf/util/intel-pt.c | 283 |
1 files changed, 283 insertions, 0 deletions
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index ace79a405f98..b58f9fd1e2ee 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c | |||
| @@ -92,6 +92,18 @@ struct intel_pt { | |||
| 92 | u64 transactions_sample_type; | 92 | u64 transactions_sample_type; |
| 93 | u64 transactions_id; | 93 | u64 transactions_id; |
| 94 | 94 | ||
| 95 | bool sample_ptwrites; | ||
| 96 | u64 ptwrites_sample_type; | ||
| 97 | u64 ptwrites_id; | ||
| 98 | |||
| 99 | bool sample_pwr_events; | ||
| 100 | u64 pwr_events_sample_type; | ||
| 101 | u64 mwait_id; | ||
| 102 | u64 pwre_id; | ||
| 103 | u64 exstop_id; | ||
| 104 | u64 pwrx_id; | ||
| 105 | u64 cbr_id; | ||
| 106 | |||
| 95 | bool synth_needs_swap; | 107 | bool synth_needs_swap; |
| 96 | 108 | ||
| 97 | u64 tsc_bit; | 109 | u64 tsc_bit; |
| @@ -102,6 +114,7 @@ struct intel_pt { | |||
| 102 | u64 cyc_bit; | 114 | u64 cyc_bit; |
| 103 | u64 noretcomp_bit; | 115 | u64 noretcomp_bit; |
| 104 | unsigned max_non_turbo_ratio; | 116 | unsigned max_non_turbo_ratio; |
| 117 | unsigned cbr2khz; | ||
| 105 | 118 | ||
| 106 | unsigned long num_events; | 119 | unsigned long num_events; |
| 107 | 120 | ||
| @@ -1236,6 +1249,175 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) | |||
| 1236 | pt->transactions_sample_type); | 1249 | pt->transactions_sample_type); |
| 1237 | } | 1250 | } |
| 1238 | 1251 | ||
| 1252 | static void intel_pt_prep_p_sample(struct intel_pt *pt, | ||
| 1253 | struct intel_pt_queue *ptq, | ||
| 1254 | union perf_event *event, | ||
| 1255 | struct perf_sample *sample) | ||
| 1256 | { | ||
| 1257 | intel_pt_prep_sample(pt, ptq, event, sample); | ||
| 1258 | |||
| 1259 | /* | ||
| 1260 | * Zero IP is used to mean "trace start" but that is not the case for | ||
| 1261 | * power or PTWRITE events with no IP, so clear the flags. | ||
| 1262 | */ | ||
| 1263 | if (!sample->ip) | ||
| 1264 | sample->flags = 0; | ||
| 1265 | } | ||
| 1266 | |||
| 1267 | static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq) | ||
| 1268 | { | ||
| 1269 | struct intel_pt *pt = ptq->pt; | ||
| 1270 | union perf_event *event = ptq->event_buf; | ||
| 1271 | struct perf_sample sample = { .ip = 0, }; | ||
| 1272 | struct perf_synth_intel_ptwrite raw; | ||
| 1273 | |||
| 1274 | if (intel_pt_skip_event(pt)) | ||
| 1275 | return 0; | ||
| 1276 | |||
| 1277 | intel_pt_prep_p_sample(pt, ptq, event, &sample); | ||
| 1278 | |||
| 1279 | sample.id = ptq->pt->ptwrites_id; | ||
| 1280 | sample.stream_id = ptq->pt->ptwrites_id; | ||
| 1281 | |||
| 1282 | raw.flags = 0; | ||
| 1283 | raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP); | ||
| 1284 | raw.payload = cpu_to_le64(ptq->state->ptw_payload); | ||
| 1285 | |||
| 1286 | sample.raw_size = perf_synth__raw_size(raw); | ||
| 1287 | sample.raw_data = perf_synth__raw_data(&raw); | ||
| 1288 | |||
| 1289 | return intel_pt_deliver_synth_event(pt, ptq, event, &sample, | ||
| 1290 | pt->ptwrites_sample_type); | ||
| 1291 | } | ||
| 1292 | |||
| 1293 | static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) | ||
| 1294 | { | ||
| 1295 | struct intel_pt *pt = ptq->pt; | ||
| 1296 | union perf_event *event = ptq->event_buf; | ||
| 1297 | struct perf_sample sample = { .ip = 0, }; | ||
| 1298 | struct perf_synth_intel_cbr raw; | ||
| 1299 | u32 flags; | ||
| 1300 | |||
| 1301 | if (intel_pt_skip_event(pt)) | ||
| 1302 | return 0; | ||
| 1303 | |||
| 1304 | intel_pt_prep_p_sample(pt, ptq, event, &sample); | ||
| 1305 | |||
| 1306 | sample.id = ptq->pt->cbr_id; | ||
| 1307 | sample.stream_id = ptq->pt->cbr_id; | ||
| 1308 | |||
| 1309 | flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16); | ||
| 1310 | raw.flags = cpu_to_le32(flags); | ||
| 1311 | raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz); | ||
| 1312 | raw.reserved3 = 0; | ||
| 1313 | |||
| 1314 | sample.raw_size = perf_synth__raw_size(raw); | ||
| 1315 | sample.raw_data = perf_synth__raw_data(&raw); | ||
| 1316 | |||
| 1317 | return intel_pt_deliver_synth_event(pt, ptq, event, &sample, | ||
| 1318 | pt->pwr_events_sample_type); | ||
| 1319 | } | ||
| 1320 | |||
| 1321 | static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) | ||
| 1322 | { | ||
| 1323 | struct intel_pt *pt = ptq->pt; | ||
| 1324 | union perf_event *event = ptq->event_buf; | ||
| 1325 | struct perf_sample sample = { .ip = 0, }; | ||
| 1326 | struct perf_synth_intel_mwait raw; | ||
| 1327 | |||
| 1328 | if (intel_pt_skip_event(pt)) | ||
| 1329 | return 0; | ||
| 1330 | |||
| 1331 | intel_pt_prep_p_sample(pt, ptq, event, &sample); | ||
| 1332 | |||
| 1333 | sample.id = ptq->pt->mwait_id; | ||
| 1334 | sample.stream_id = ptq->pt->mwait_id; | ||
| 1335 | |||
| 1336 | raw.reserved = 0; | ||
| 1337 | raw.payload = cpu_to_le64(ptq->state->mwait_payload); | ||
| 1338 | |||
| 1339 | sample.raw_size = perf_synth__raw_size(raw); | ||
| 1340 | sample.raw_data = perf_synth__raw_data(&raw); | ||
| 1341 | |||
| 1342 | return intel_pt_deliver_synth_event(pt, ptq, event, &sample, | ||
| 1343 | pt->pwr_events_sample_type); | ||
| 1344 | } | ||
| 1345 | |||
| 1346 | static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) | ||
| 1347 | { | ||
| 1348 | struct intel_pt *pt = ptq->pt; | ||
| 1349 | union perf_event *event = ptq->event_buf; | ||
| 1350 | struct perf_sample sample = { .ip = 0, }; | ||
| 1351 | struct perf_synth_intel_pwre raw; | ||
| 1352 | |||
| 1353 | if (intel_pt_skip_event(pt)) | ||
| 1354 | return 0; | ||
| 1355 | |||
| 1356 | intel_pt_prep_p_sample(pt, ptq, event, &sample); | ||
| 1357 | |||
| 1358 | sample.id = ptq->pt->pwre_id; | ||
| 1359 | sample.stream_id = ptq->pt->pwre_id; | ||
| 1360 | |||
| 1361 | raw.reserved = 0; | ||
| 1362 | raw.payload = cpu_to_le64(ptq->state->pwre_payload); | ||
| 1363 | |||
| 1364 | sample.raw_size = perf_synth__raw_size(raw); | ||
| 1365 | sample.raw_data = perf_synth__raw_data(&raw); | ||
| 1366 | |||
| 1367 | return intel_pt_deliver_synth_event(pt, ptq, event, &sample, | ||
| 1368 | pt->pwr_events_sample_type); | ||
| 1369 | } | ||
| 1370 | |||
| 1371 | static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) | ||
| 1372 | { | ||
| 1373 | struct intel_pt *pt = ptq->pt; | ||
| 1374 | union perf_event *event = ptq->event_buf; | ||
| 1375 | struct perf_sample sample = { .ip = 0, }; | ||
| 1376 | struct perf_synth_intel_exstop raw; | ||
| 1377 | |||
| 1378 | if (intel_pt_skip_event(pt)) | ||
| 1379 | return 0; | ||
| 1380 | |||
| 1381 | intel_pt_prep_p_sample(pt, ptq, event, &sample); | ||
| 1382 | |||
| 1383 | sample.id = ptq->pt->exstop_id; | ||
| 1384 | sample.stream_id = ptq->pt->exstop_id; | ||
| 1385 | |||
| 1386 | raw.flags = 0; | ||
| 1387 | raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP); | ||
| 1388 | |||
| 1389 | sample.raw_size = perf_synth__raw_size(raw); | ||
| 1390 | sample.raw_data = perf_synth__raw_data(&raw); | ||
| 1391 | |||
| 1392 | return intel_pt_deliver_synth_event(pt, ptq, event, &sample, | ||
| 1393 | pt->pwr_events_sample_type); | ||
| 1394 | } | ||
| 1395 | |||
| 1396 | static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) | ||
| 1397 | { | ||
| 1398 | struct intel_pt *pt = ptq->pt; | ||
| 1399 | union perf_event *event = ptq->event_buf; | ||
| 1400 | struct perf_sample sample = { .ip = 0, }; | ||
| 1401 | struct perf_synth_intel_pwrx raw; | ||
| 1402 | |||
| 1403 | if (intel_pt_skip_event(pt)) | ||
| 1404 | return 0; | ||
| 1405 | |||
| 1406 | intel_pt_prep_p_sample(pt, ptq, event, &sample); | ||
| 1407 | |||
| 1408 | sample.id = ptq->pt->pwrx_id; | ||
| 1409 | sample.stream_id = ptq->pt->pwrx_id; | ||
| 1410 | |||
| 1411 | raw.reserved = 0; | ||
| 1412 | raw.payload = cpu_to_le64(ptq->state->pwrx_payload); | ||
| 1413 | |||
| 1414 | sample.raw_size = perf_synth__raw_size(raw); | ||
| 1415 | sample.raw_data = perf_synth__raw_data(&raw); | ||
| 1416 | |||
| 1417 | return intel_pt_deliver_synth_event(pt, ptq, event, &sample, | ||
| 1418 | pt->pwr_events_sample_type); | ||
| 1419 | } | ||
| 1420 | |||
| 1239 | static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, | 1421 | static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, |
| 1240 | pid_t pid, pid_t tid, u64 ip) | 1422 | pid_t pid, pid_t tid, u64 ip) |
| 1241 | { | 1423 | { |
| @@ -1287,6 +1469,10 @@ static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip) | |||
| 1287 | PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); | 1469 | PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); |
| 1288 | } | 1470 | } |
| 1289 | 1471 | ||
| 1472 | #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \ | ||
| 1473 | INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \ | ||
| 1474 | INTEL_PT_CBR_CHG) | ||
| 1475 | |||
| 1290 | static int intel_pt_sample(struct intel_pt_queue *ptq) | 1476 | static int intel_pt_sample(struct intel_pt_queue *ptq) |
| 1291 | { | 1477 | { |
| 1292 | const struct intel_pt_state *state = ptq->state; | 1478 | const struct intel_pt_state *state = ptq->state; |
| @@ -1298,6 +1484,34 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) | |||
| 1298 | 1484 | ||
| 1299 | ptq->have_sample = false; | 1485 | ptq->have_sample = false; |
| 1300 | 1486 | ||
| 1487 | if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) { | ||
| 1488 | if (state->type & INTEL_PT_CBR_CHG) { | ||
| 1489 | err = intel_pt_synth_cbr_sample(ptq); | ||
| 1490 | if (err) | ||
| 1491 | return err; | ||
| 1492 | } | ||
| 1493 | if (state->type & INTEL_PT_MWAIT_OP) { | ||
| 1494 | err = intel_pt_synth_mwait_sample(ptq); | ||
| 1495 | if (err) | ||
| 1496 | return err; | ||
| 1497 | } | ||
| 1498 | if (state->type & INTEL_PT_PWR_ENTRY) { | ||
| 1499 | err = intel_pt_synth_pwre_sample(ptq); | ||
| 1500 | if (err) | ||
| 1501 | return err; | ||
| 1502 | } | ||
| 1503 | if (state->type & INTEL_PT_EX_STOP) { | ||
| 1504 | err = intel_pt_synth_exstop_sample(ptq); | ||
| 1505 | if (err) | ||
| 1506 | return err; | ||
| 1507 | } | ||
| 1508 | if (state->type & INTEL_PT_PWR_EXIT) { | ||
| 1509 | err = intel_pt_synth_pwrx_sample(ptq); | ||
| 1510 | if (err) | ||
| 1511 | return err; | ||
| 1512 | } | ||
| 1513 | } | ||
| 1514 | |||
| 1301 | if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) { | 1515 | if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) { |
| 1302 | err = intel_pt_synth_instruction_sample(ptq); | 1516 | err = intel_pt_synth_instruction_sample(ptq); |
| 1303 | if (err) | 1517 | if (err) |
| @@ -1310,6 +1524,12 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) | |||
| 1310 | return err; | 1524 | return err; |
| 1311 | } | 1525 | } |
| 1312 | 1526 | ||
| 1527 | if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) { | ||
| 1528 | err = intel_pt_synth_ptwrite_sample(ptq); | ||
| 1529 | if (err) | ||
| 1530 | return err; | ||
| 1531 | } | ||
| 1532 | |||
| 1313 | if (!(state->type & INTEL_PT_BRANCH)) | 1533 | if (!(state->type & INTEL_PT_BRANCH)) |
| 1314 | return 0; | 1534 | return 0; |
| 1315 | 1535 | ||
| @@ -2047,6 +2267,68 @@ static int intel_pt_synth_events(struct intel_pt *pt, | |||
| 2047 | id += 1; | 2267 | id += 1; |
| 2048 | } | 2268 | } |
| 2049 | 2269 | ||
| 2270 | attr.type = PERF_TYPE_SYNTH; | ||
| 2271 | attr.sample_type |= PERF_SAMPLE_RAW; | ||
| 2272 | |||
| 2273 | if (pt->synth_opts.ptwrites) { | ||
| 2274 | attr.config = PERF_SYNTH_INTEL_PTWRITE; | ||
| 2275 | err = intel_pt_synth_event(session, "ptwrite", &attr, id); | ||
| 2276 | if (err) | ||
| 2277 | return err; | ||
| 2278 | pt->sample_ptwrites = true; | ||
| 2279 | pt->ptwrites_sample_type = attr.sample_type; | ||
| 2280 | pt->ptwrites_id = id; | ||
| 2281 | intel_pt_set_event_name(evlist, id, "ptwrite"); | ||
| 2282 | id += 1; | ||
| 2283 | } | ||
| 2284 | |||
| 2285 | if (pt->synth_opts.pwr_events) { | ||
| 2286 | pt->sample_pwr_events = true; | ||
| 2287 | pt->pwr_events_sample_type = attr.sample_type; | ||
| 2288 | |||
| 2289 | attr.config = PERF_SYNTH_INTEL_CBR; | ||
| 2290 | err = intel_pt_synth_event(session, "cbr", &attr, id); | ||
| 2291 | if (err) | ||
| 2292 | return err; | ||
| 2293 | pt->cbr_id = id; | ||
| 2294 | intel_pt_set_event_name(evlist, id, "cbr"); | ||
| 2295 | id += 1; | ||
| 2296 | } | ||
| 2297 | |||
| 2298 | if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) { | ||
| 2299 | attr.config = PERF_SYNTH_INTEL_MWAIT; | ||
| 2300 | err = intel_pt_synth_event(session, "mwait", &attr, id); | ||
| 2301 | if (err) | ||
| 2302 | return err; | ||
| 2303 | pt->mwait_id = id; | ||
| 2304 | intel_pt_set_event_name(evlist, id, "mwait"); | ||
| 2305 | id += 1; | ||
| 2306 | |||
| 2307 | attr.config = PERF_SYNTH_INTEL_PWRE; | ||
| 2308 | err = intel_pt_synth_event(session, "pwre", &attr, id); | ||
| 2309 | if (err) | ||
| 2310 | return err; | ||
| 2311 | pt->pwre_id = id; | ||
| 2312 | intel_pt_set_event_name(evlist, id, "pwre"); | ||
| 2313 | id += 1; | ||
| 2314 | |||
| 2315 | attr.config = PERF_SYNTH_INTEL_EXSTOP; | ||
| 2316 | err = intel_pt_synth_event(session, "exstop", &attr, id); | ||
| 2317 | if (err) | ||
| 2318 | return err; | ||
| 2319 | pt->exstop_id = id; | ||
| 2320 | intel_pt_set_event_name(evlist, id, "exstop"); | ||
| 2321 | id += 1; | ||
| 2322 | |||
| 2323 | attr.config = PERF_SYNTH_INTEL_PWRX; | ||
| 2324 | err = intel_pt_synth_event(session, "pwrx", &attr, id); | ||
| 2325 | if (err) | ||
| 2326 | return err; | ||
| 2327 | pt->pwrx_id = id; | ||
| 2328 | intel_pt_set_event_name(evlist, id, "pwrx"); | ||
| 2329 | id += 1; | ||
| 2330 | } | ||
| 2331 | |||
| 2050 | pt->synth_needs_swap = evsel->needs_swap; | 2332 | pt->synth_needs_swap = evsel->needs_swap; |
| 2051 | 2333 | ||
| 2052 | return 0; | 2334 | return 0; |
| @@ -2313,6 +2595,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, | |||
| 2313 | intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); | 2595 | intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); |
| 2314 | intel_pt_log("Maximum non-turbo ratio %u\n", | 2596 | intel_pt_log("Maximum non-turbo ratio %u\n", |
| 2315 | pt->max_non_turbo_ratio); | 2597 | pt->max_non_turbo_ratio); |
| 2598 | pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000; | ||
| 2316 | } | 2599 | } |
| 2317 | 2600 | ||
| 2318 | if (pt->synth_opts.calls) | 2601 | if (pt->synth_opts.calls) |
