diff options
author | Adrian Hunter <adrian.hunter@intel.com> | 2017-06-30 04:36:45 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2017-06-30 10:48:28 -0400 |
commit | 3797307576191d7fb4c974cd461188162ac36f33 (patch) | |
tree | 30e9304b9e1580d681981d8ddf1dff21aeb411bd /tools | |
parent | 4a9fd4e0effc94b9ec79250946a0054d4dd1a963 (diff) |
perf intel-pt: Synthesize new power and "ptwrite" events
Synthesize new power and ptwrite events.
Power events report changes to C-state but I have also added support
for the existing CBR (core-to-bus ratio) packet and included that
when outputting power events.
The PTWRITE packet is associated with the new "ptwrite" instruction,
which is essentially just a way to stuff a 32 or 64 bit value into the
PT trace.
More details can be found in the patches that add documentation and in
the Intel SDM.
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/1498811805-2335-1-git-send-email-adrian.hunter@intel.com
[ Copy the description of such packet from the patchkit cover message ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/util/intel-pt.c | 283 |
1 files changed, 283 insertions, 0 deletions
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index ace79a405f98..b58f9fd1e2ee 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c | |||
@@ -92,6 +92,18 @@ struct intel_pt { | |||
92 | u64 transactions_sample_type; | 92 | u64 transactions_sample_type; |
93 | u64 transactions_id; | 93 | u64 transactions_id; |
94 | 94 | ||
95 | bool sample_ptwrites; | ||
96 | u64 ptwrites_sample_type; | ||
97 | u64 ptwrites_id; | ||
98 | |||
99 | bool sample_pwr_events; | ||
100 | u64 pwr_events_sample_type; | ||
101 | u64 mwait_id; | ||
102 | u64 pwre_id; | ||
103 | u64 exstop_id; | ||
104 | u64 pwrx_id; | ||
105 | u64 cbr_id; | ||
106 | |||
95 | bool synth_needs_swap; | 107 | bool synth_needs_swap; |
96 | 108 | ||
97 | u64 tsc_bit; | 109 | u64 tsc_bit; |
@@ -102,6 +114,7 @@ struct intel_pt { | |||
102 | u64 cyc_bit; | 114 | u64 cyc_bit; |
103 | u64 noretcomp_bit; | 115 | u64 noretcomp_bit; |
104 | unsigned max_non_turbo_ratio; | 116 | unsigned max_non_turbo_ratio; |
117 | unsigned cbr2khz; | ||
105 | 118 | ||
106 | unsigned long num_events; | 119 | unsigned long num_events; |
107 | 120 | ||
@@ -1236,6 +1249,175 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) | |||
1236 | pt->transactions_sample_type); | 1249 | pt->transactions_sample_type); |
1237 | } | 1250 | } |
1238 | 1251 | ||
1252 | static void intel_pt_prep_p_sample(struct intel_pt *pt, | ||
1253 | struct intel_pt_queue *ptq, | ||
1254 | union perf_event *event, | ||
1255 | struct perf_sample *sample) | ||
1256 | { | ||
1257 | intel_pt_prep_sample(pt, ptq, event, sample); | ||
1258 | |||
1259 | /* | ||
1260 | * Zero IP is used to mean "trace start" but that is not the case for | ||
1261 | * power or PTWRITE events with no IP, so clear the flags. | ||
1262 | */ | ||
1263 | if (!sample->ip) | ||
1264 | sample->flags = 0; | ||
1265 | } | ||
1266 | |||
1267 | static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq) | ||
1268 | { | ||
1269 | struct intel_pt *pt = ptq->pt; | ||
1270 | union perf_event *event = ptq->event_buf; | ||
1271 | struct perf_sample sample = { .ip = 0, }; | ||
1272 | struct perf_synth_intel_ptwrite raw; | ||
1273 | |||
1274 | if (intel_pt_skip_event(pt)) | ||
1275 | return 0; | ||
1276 | |||
1277 | intel_pt_prep_p_sample(pt, ptq, event, &sample); | ||
1278 | |||
1279 | sample.id = ptq->pt->ptwrites_id; | ||
1280 | sample.stream_id = ptq->pt->ptwrites_id; | ||
1281 | |||
1282 | raw.flags = 0; | ||
1283 | raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP); | ||
1284 | raw.payload = cpu_to_le64(ptq->state->ptw_payload); | ||
1285 | |||
1286 | sample.raw_size = perf_synth__raw_size(raw); | ||
1287 | sample.raw_data = perf_synth__raw_data(&raw); | ||
1288 | |||
1289 | return intel_pt_deliver_synth_event(pt, ptq, event, &sample, | ||
1290 | pt->ptwrites_sample_type); | ||
1291 | } | ||
1292 | |||
1293 | static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) | ||
1294 | { | ||
1295 | struct intel_pt *pt = ptq->pt; | ||
1296 | union perf_event *event = ptq->event_buf; | ||
1297 | struct perf_sample sample = { .ip = 0, }; | ||
1298 | struct perf_synth_intel_cbr raw; | ||
1299 | u32 flags; | ||
1300 | |||
1301 | if (intel_pt_skip_event(pt)) | ||
1302 | return 0; | ||
1303 | |||
1304 | intel_pt_prep_p_sample(pt, ptq, event, &sample); | ||
1305 | |||
1306 | sample.id = ptq->pt->cbr_id; | ||
1307 | sample.stream_id = ptq->pt->cbr_id; | ||
1308 | |||
1309 | flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16); | ||
1310 | raw.flags = cpu_to_le32(flags); | ||
1311 | raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz); | ||
1312 | raw.reserved3 = 0; | ||
1313 | |||
1314 | sample.raw_size = perf_synth__raw_size(raw); | ||
1315 | sample.raw_data = perf_synth__raw_data(&raw); | ||
1316 | |||
1317 | return intel_pt_deliver_synth_event(pt, ptq, event, &sample, | ||
1318 | pt->pwr_events_sample_type); | ||
1319 | } | ||
1320 | |||
1321 | static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) | ||
1322 | { | ||
1323 | struct intel_pt *pt = ptq->pt; | ||
1324 | union perf_event *event = ptq->event_buf; | ||
1325 | struct perf_sample sample = { .ip = 0, }; | ||
1326 | struct perf_synth_intel_mwait raw; | ||
1327 | |||
1328 | if (intel_pt_skip_event(pt)) | ||
1329 | return 0; | ||
1330 | |||
1331 | intel_pt_prep_p_sample(pt, ptq, event, &sample); | ||
1332 | |||
1333 | sample.id = ptq->pt->mwait_id; | ||
1334 | sample.stream_id = ptq->pt->mwait_id; | ||
1335 | |||
1336 | raw.reserved = 0; | ||
1337 | raw.payload = cpu_to_le64(ptq->state->mwait_payload); | ||
1338 | |||
1339 | sample.raw_size = perf_synth__raw_size(raw); | ||
1340 | sample.raw_data = perf_synth__raw_data(&raw); | ||
1341 | |||
1342 | return intel_pt_deliver_synth_event(pt, ptq, event, &sample, | ||
1343 | pt->pwr_events_sample_type); | ||
1344 | } | ||
1345 | |||
1346 | static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) | ||
1347 | { | ||
1348 | struct intel_pt *pt = ptq->pt; | ||
1349 | union perf_event *event = ptq->event_buf; | ||
1350 | struct perf_sample sample = { .ip = 0, }; | ||
1351 | struct perf_synth_intel_pwre raw; | ||
1352 | |||
1353 | if (intel_pt_skip_event(pt)) | ||
1354 | return 0; | ||
1355 | |||
1356 | intel_pt_prep_p_sample(pt, ptq, event, &sample); | ||
1357 | |||
1358 | sample.id = ptq->pt->pwre_id; | ||
1359 | sample.stream_id = ptq->pt->pwre_id; | ||
1360 | |||
1361 | raw.reserved = 0; | ||
1362 | raw.payload = cpu_to_le64(ptq->state->pwre_payload); | ||
1363 | |||
1364 | sample.raw_size = perf_synth__raw_size(raw); | ||
1365 | sample.raw_data = perf_synth__raw_data(&raw); | ||
1366 | |||
1367 | return intel_pt_deliver_synth_event(pt, ptq, event, &sample, | ||
1368 | pt->pwr_events_sample_type); | ||
1369 | } | ||
1370 | |||
1371 | static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) | ||
1372 | { | ||
1373 | struct intel_pt *pt = ptq->pt; | ||
1374 | union perf_event *event = ptq->event_buf; | ||
1375 | struct perf_sample sample = { .ip = 0, }; | ||
1376 | struct perf_synth_intel_exstop raw; | ||
1377 | |||
1378 | if (intel_pt_skip_event(pt)) | ||
1379 | return 0; | ||
1380 | |||
1381 | intel_pt_prep_p_sample(pt, ptq, event, &sample); | ||
1382 | |||
1383 | sample.id = ptq->pt->exstop_id; | ||
1384 | sample.stream_id = ptq->pt->exstop_id; | ||
1385 | |||
1386 | raw.flags = 0; | ||
1387 | raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP); | ||
1388 | |||
1389 | sample.raw_size = perf_synth__raw_size(raw); | ||
1390 | sample.raw_data = perf_synth__raw_data(&raw); | ||
1391 | |||
1392 | return intel_pt_deliver_synth_event(pt, ptq, event, &sample, | ||
1393 | pt->pwr_events_sample_type); | ||
1394 | } | ||
1395 | |||
1396 | static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) | ||
1397 | { | ||
1398 | struct intel_pt *pt = ptq->pt; | ||
1399 | union perf_event *event = ptq->event_buf; | ||
1400 | struct perf_sample sample = { .ip = 0, }; | ||
1401 | struct perf_synth_intel_pwrx raw; | ||
1402 | |||
1403 | if (intel_pt_skip_event(pt)) | ||
1404 | return 0; | ||
1405 | |||
1406 | intel_pt_prep_p_sample(pt, ptq, event, &sample); | ||
1407 | |||
1408 | sample.id = ptq->pt->pwrx_id; | ||
1409 | sample.stream_id = ptq->pt->pwrx_id; | ||
1410 | |||
1411 | raw.reserved = 0; | ||
1412 | raw.payload = cpu_to_le64(ptq->state->pwrx_payload); | ||
1413 | |||
1414 | sample.raw_size = perf_synth__raw_size(raw); | ||
1415 | sample.raw_data = perf_synth__raw_data(&raw); | ||
1416 | |||
1417 | return intel_pt_deliver_synth_event(pt, ptq, event, &sample, | ||
1418 | pt->pwr_events_sample_type); | ||
1419 | } | ||
1420 | |||
1239 | static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, | 1421 | static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, |
1240 | pid_t pid, pid_t tid, u64 ip) | 1422 | pid_t pid, pid_t tid, u64 ip) |
1241 | { | 1423 | { |
@@ -1287,6 +1469,10 @@ static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip) | |||
1287 | PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); | 1469 | PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); |
1288 | } | 1470 | } |
1289 | 1471 | ||
1472 | #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \ | ||
1473 | INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \ | ||
1474 | INTEL_PT_CBR_CHG) | ||
1475 | |||
1290 | static int intel_pt_sample(struct intel_pt_queue *ptq) | 1476 | static int intel_pt_sample(struct intel_pt_queue *ptq) |
1291 | { | 1477 | { |
1292 | const struct intel_pt_state *state = ptq->state; | 1478 | const struct intel_pt_state *state = ptq->state; |
@@ -1298,6 +1484,34 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) | |||
1298 | 1484 | ||
1299 | ptq->have_sample = false; | 1485 | ptq->have_sample = false; |
1300 | 1486 | ||
1487 | if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) { | ||
1488 | if (state->type & INTEL_PT_CBR_CHG) { | ||
1489 | err = intel_pt_synth_cbr_sample(ptq); | ||
1490 | if (err) | ||
1491 | return err; | ||
1492 | } | ||
1493 | if (state->type & INTEL_PT_MWAIT_OP) { | ||
1494 | err = intel_pt_synth_mwait_sample(ptq); | ||
1495 | if (err) | ||
1496 | return err; | ||
1497 | } | ||
1498 | if (state->type & INTEL_PT_PWR_ENTRY) { | ||
1499 | err = intel_pt_synth_pwre_sample(ptq); | ||
1500 | if (err) | ||
1501 | return err; | ||
1502 | } | ||
1503 | if (state->type & INTEL_PT_EX_STOP) { | ||
1504 | err = intel_pt_synth_exstop_sample(ptq); | ||
1505 | if (err) | ||
1506 | return err; | ||
1507 | } | ||
1508 | if (state->type & INTEL_PT_PWR_EXIT) { | ||
1509 | err = intel_pt_synth_pwrx_sample(ptq); | ||
1510 | if (err) | ||
1511 | return err; | ||
1512 | } | ||
1513 | } | ||
1514 | |||
1301 | if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) { | 1515 | if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) { |
1302 | err = intel_pt_synth_instruction_sample(ptq); | 1516 | err = intel_pt_synth_instruction_sample(ptq); |
1303 | if (err) | 1517 | if (err) |
@@ -1310,6 +1524,12 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) | |||
1310 | return err; | 1524 | return err; |
1311 | } | 1525 | } |
1312 | 1526 | ||
1527 | if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) { | ||
1528 | err = intel_pt_synth_ptwrite_sample(ptq); | ||
1529 | if (err) | ||
1530 | return err; | ||
1531 | } | ||
1532 | |||
1313 | if (!(state->type & INTEL_PT_BRANCH)) | 1533 | if (!(state->type & INTEL_PT_BRANCH)) |
1314 | return 0; | 1534 | return 0; |
1315 | 1535 | ||
@@ -2047,6 +2267,68 @@ static int intel_pt_synth_events(struct intel_pt *pt, | |||
2047 | id += 1; | 2267 | id += 1; |
2048 | } | 2268 | } |
2049 | 2269 | ||
2270 | attr.type = PERF_TYPE_SYNTH; | ||
2271 | attr.sample_type |= PERF_SAMPLE_RAW; | ||
2272 | |||
2273 | if (pt->synth_opts.ptwrites) { | ||
2274 | attr.config = PERF_SYNTH_INTEL_PTWRITE; | ||
2275 | err = intel_pt_synth_event(session, "ptwrite", &attr, id); | ||
2276 | if (err) | ||
2277 | return err; | ||
2278 | pt->sample_ptwrites = true; | ||
2279 | pt->ptwrites_sample_type = attr.sample_type; | ||
2280 | pt->ptwrites_id = id; | ||
2281 | intel_pt_set_event_name(evlist, id, "ptwrite"); | ||
2282 | id += 1; | ||
2283 | } | ||
2284 | |||
2285 | if (pt->synth_opts.pwr_events) { | ||
2286 | pt->sample_pwr_events = true; | ||
2287 | pt->pwr_events_sample_type = attr.sample_type; | ||
2288 | |||
2289 | attr.config = PERF_SYNTH_INTEL_CBR; | ||
2290 | err = intel_pt_synth_event(session, "cbr", &attr, id); | ||
2291 | if (err) | ||
2292 | return err; | ||
2293 | pt->cbr_id = id; | ||
2294 | intel_pt_set_event_name(evlist, id, "cbr"); | ||
2295 | id += 1; | ||
2296 | } | ||
2297 | |||
2298 | if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) { | ||
2299 | attr.config = PERF_SYNTH_INTEL_MWAIT; | ||
2300 | err = intel_pt_synth_event(session, "mwait", &attr, id); | ||
2301 | if (err) | ||
2302 | return err; | ||
2303 | pt->mwait_id = id; | ||
2304 | intel_pt_set_event_name(evlist, id, "mwait"); | ||
2305 | id += 1; | ||
2306 | |||
2307 | attr.config = PERF_SYNTH_INTEL_PWRE; | ||
2308 | err = intel_pt_synth_event(session, "pwre", &attr, id); | ||
2309 | if (err) | ||
2310 | return err; | ||
2311 | pt->pwre_id = id; | ||
2312 | intel_pt_set_event_name(evlist, id, "pwre"); | ||
2313 | id += 1; | ||
2314 | |||
2315 | attr.config = PERF_SYNTH_INTEL_EXSTOP; | ||
2316 | err = intel_pt_synth_event(session, "exstop", &attr, id); | ||
2317 | if (err) | ||
2318 | return err; | ||
2319 | pt->exstop_id = id; | ||
2320 | intel_pt_set_event_name(evlist, id, "exstop"); | ||
2321 | id += 1; | ||
2322 | |||
2323 | attr.config = PERF_SYNTH_INTEL_PWRX; | ||
2324 | err = intel_pt_synth_event(session, "pwrx", &attr, id); | ||
2325 | if (err) | ||
2326 | return err; | ||
2327 | pt->pwrx_id = id; | ||
2328 | intel_pt_set_event_name(evlist, id, "pwrx"); | ||
2329 | id += 1; | ||
2330 | } | ||
2331 | |||
2050 | pt->synth_needs_swap = evsel->needs_swap; | 2332 | pt->synth_needs_swap = evsel->needs_swap; |
2051 | 2333 | ||
2052 | return 0; | 2334 | return 0; |
@@ -2313,6 +2595,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, | |||
2313 | intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); | 2595 | intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); |
2314 | intel_pt_log("Maximum non-turbo ratio %u\n", | 2596 | intel_pt_log("Maximum non-turbo ratio %u\n", |
2315 | pt->max_non_turbo_ratio); | 2597 | pt->max_non_turbo_ratio); |
2598 | pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000; | ||
2316 | } | 2599 | } |
2317 | 2600 | ||
2318 | if (pt->synth_opts.calls) | 2601 | if (pt->synth_opts.calls) |