aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2017-06-30 04:36:45 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2017-06-30 10:48:28 -0400
commit3797307576191d7fb4c974cd461188162ac36f33 (patch)
tree30e9304b9e1580d681981d8ddf1dff21aeb411bd /tools
parent4a9fd4e0effc94b9ec79250946a0054d4dd1a963 (diff)
perf intel-pt: Synthesize new power and "ptwrite" events
Synthesize new power and ptwrite events. Power events report changes to C-state but I have also added support for the existing CBR (core-to-bus ratio) packet and included that when outputting power events. The PTWRITE packet is associated with the new "ptwrite" instruction, which is essentially just a way to stuff a 32 or 64 bit value into the PT trace. More details can be found in the patches that add documentation and in the Intel SDM. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Link: http://lkml.kernel.org/r/1498811805-2335-1-git-send-email-adrian.hunter@intel.com [ Copy the description of such packet from the patchkit cover message ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/util/intel-pt.c283
1 files changed, 283 insertions, 0 deletions
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index ace79a405f98..b58f9fd1e2ee 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -92,6 +92,18 @@ struct intel_pt {
92 u64 transactions_sample_type; 92 u64 transactions_sample_type;
93 u64 transactions_id; 93 u64 transactions_id;
94 94
95 bool sample_ptwrites;
96 u64 ptwrites_sample_type;
97 u64 ptwrites_id;
98
99 bool sample_pwr_events;
100 u64 pwr_events_sample_type;
101 u64 mwait_id;
102 u64 pwre_id;
103 u64 exstop_id;
104 u64 pwrx_id;
105 u64 cbr_id;
106
95 bool synth_needs_swap; 107 bool synth_needs_swap;
96 108
97 u64 tsc_bit; 109 u64 tsc_bit;
@@ -102,6 +114,7 @@ struct intel_pt {
102 u64 cyc_bit; 114 u64 cyc_bit;
103 u64 noretcomp_bit; 115 u64 noretcomp_bit;
104 unsigned max_non_turbo_ratio; 116 unsigned max_non_turbo_ratio;
117 unsigned cbr2khz;
105 118
106 unsigned long num_events; 119 unsigned long num_events;
107 120
@@ -1236,6 +1249,175 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
1236 pt->transactions_sample_type); 1249 pt->transactions_sample_type);
1237} 1250}
1238 1251
1252static void intel_pt_prep_p_sample(struct intel_pt *pt,
1253 struct intel_pt_queue *ptq,
1254 union perf_event *event,
1255 struct perf_sample *sample)
1256{
1257 intel_pt_prep_sample(pt, ptq, event, sample);
1258
1259 /*
1260 * Zero IP is used to mean "trace start" but that is not the case for
1261 * power or PTWRITE events with no IP, so clear the flags.
1262 */
1263 if (!sample->ip)
1264 sample->flags = 0;
1265}
1266
1267static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
1268{
1269 struct intel_pt *pt = ptq->pt;
1270 union perf_event *event = ptq->event_buf;
1271 struct perf_sample sample = { .ip = 0, };
1272 struct perf_synth_intel_ptwrite raw;
1273
1274 if (intel_pt_skip_event(pt))
1275 return 0;
1276
1277 intel_pt_prep_p_sample(pt, ptq, event, &sample);
1278
1279 sample.id = ptq->pt->ptwrites_id;
1280 sample.stream_id = ptq->pt->ptwrites_id;
1281
1282 raw.flags = 0;
1283 raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
1284 raw.payload = cpu_to_le64(ptq->state->ptw_payload);
1285
1286 sample.raw_size = perf_synth__raw_size(raw);
1287 sample.raw_data = perf_synth__raw_data(&raw);
1288
1289 return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1290 pt->ptwrites_sample_type);
1291}
1292
1293static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
1294{
1295 struct intel_pt *pt = ptq->pt;
1296 union perf_event *event = ptq->event_buf;
1297 struct perf_sample sample = { .ip = 0, };
1298 struct perf_synth_intel_cbr raw;
1299 u32 flags;
1300
1301 if (intel_pt_skip_event(pt))
1302 return 0;
1303
1304 intel_pt_prep_p_sample(pt, ptq, event, &sample);
1305
1306 sample.id = ptq->pt->cbr_id;
1307 sample.stream_id = ptq->pt->cbr_id;
1308
1309 flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16);
1310 raw.flags = cpu_to_le32(flags);
1311 raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz);
1312 raw.reserved3 = 0;
1313
1314 sample.raw_size = perf_synth__raw_size(raw);
1315 sample.raw_data = perf_synth__raw_data(&raw);
1316
1317 return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1318 pt->pwr_events_sample_type);
1319}
1320
1321static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
1322{
1323 struct intel_pt *pt = ptq->pt;
1324 union perf_event *event = ptq->event_buf;
1325 struct perf_sample sample = { .ip = 0, };
1326 struct perf_synth_intel_mwait raw;
1327
1328 if (intel_pt_skip_event(pt))
1329 return 0;
1330
1331 intel_pt_prep_p_sample(pt, ptq, event, &sample);
1332
1333 sample.id = ptq->pt->mwait_id;
1334 sample.stream_id = ptq->pt->mwait_id;
1335
1336 raw.reserved = 0;
1337 raw.payload = cpu_to_le64(ptq->state->mwait_payload);
1338
1339 sample.raw_size = perf_synth__raw_size(raw);
1340 sample.raw_data = perf_synth__raw_data(&raw);
1341
1342 return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1343 pt->pwr_events_sample_type);
1344}
1345
1346static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
1347{
1348 struct intel_pt *pt = ptq->pt;
1349 union perf_event *event = ptq->event_buf;
1350 struct perf_sample sample = { .ip = 0, };
1351 struct perf_synth_intel_pwre raw;
1352
1353 if (intel_pt_skip_event(pt))
1354 return 0;
1355
1356 intel_pt_prep_p_sample(pt, ptq, event, &sample);
1357
1358 sample.id = ptq->pt->pwre_id;
1359 sample.stream_id = ptq->pt->pwre_id;
1360
1361 raw.reserved = 0;
1362 raw.payload = cpu_to_le64(ptq->state->pwre_payload);
1363
1364 sample.raw_size = perf_synth__raw_size(raw);
1365 sample.raw_data = perf_synth__raw_data(&raw);
1366
1367 return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1368 pt->pwr_events_sample_type);
1369}
1370
1371static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
1372{
1373 struct intel_pt *pt = ptq->pt;
1374 union perf_event *event = ptq->event_buf;
1375 struct perf_sample sample = { .ip = 0, };
1376 struct perf_synth_intel_exstop raw;
1377
1378 if (intel_pt_skip_event(pt))
1379 return 0;
1380
1381 intel_pt_prep_p_sample(pt, ptq, event, &sample);
1382
1383 sample.id = ptq->pt->exstop_id;
1384 sample.stream_id = ptq->pt->exstop_id;
1385
1386 raw.flags = 0;
1387 raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
1388
1389 sample.raw_size = perf_synth__raw_size(raw);
1390 sample.raw_data = perf_synth__raw_data(&raw);
1391
1392 return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1393 pt->pwr_events_sample_type);
1394}
1395
1396static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
1397{
1398 struct intel_pt *pt = ptq->pt;
1399 union perf_event *event = ptq->event_buf;
1400 struct perf_sample sample = { .ip = 0, };
1401 struct perf_synth_intel_pwrx raw;
1402
1403 if (intel_pt_skip_event(pt))
1404 return 0;
1405
1406 intel_pt_prep_p_sample(pt, ptq, event, &sample);
1407
1408 sample.id = ptq->pt->pwrx_id;
1409 sample.stream_id = ptq->pt->pwrx_id;
1410
1411 raw.reserved = 0;
1412 raw.payload = cpu_to_le64(ptq->state->pwrx_payload);
1413
1414 sample.raw_size = perf_synth__raw_size(raw);
1415 sample.raw_data = perf_synth__raw_data(&raw);
1416
1417 return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
1418 pt->pwr_events_sample_type);
1419}
1420
1239static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, 1421static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1240 pid_t pid, pid_t tid, u64 ip) 1422 pid_t pid, pid_t tid, u64 ip)
1241{ 1423{
@@ -1287,6 +1469,10 @@ static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1287 PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); 1469 PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
1288} 1470}
1289 1471
1472#define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
1473 INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \
1474 INTEL_PT_CBR_CHG)
1475
1290static int intel_pt_sample(struct intel_pt_queue *ptq) 1476static int intel_pt_sample(struct intel_pt_queue *ptq)
1291{ 1477{
1292 const struct intel_pt_state *state = ptq->state; 1478 const struct intel_pt_state *state = ptq->state;
@@ -1298,6 +1484,34 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
1298 1484
1299 ptq->have_sample = false; 1485 ptq->have_sample = false;
1300 1486
1487 if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) {
1488 if (state->type & INTEL_PT_CBR_CHG) {
1489 err = intel_pt_synth_cbr_sample(ptq);
1490 if (err)
1491 return err;
1492 }
1493 if (state->type & INTEL_PT_MWAIT_OP) {
1494 err = intel_pt_synth_mwait_sample(ptq);
1495 if (err)
1496 return err;
1497 }
1498 if (state->type & INTEL_PT_PWR_ENTRY) {
1499 err = intel_pt_synth_pwre_sample(ptq);
1500 if (err)
1501 return err;
1502 }
1503 if (state->type & INTEL_PT_EX_STOP) {
1504 err = intel_pt_synth_exstop_sample(ptq);
1505 if (err)
1506 return err;
1507 }
1508 if (state->type & INTEL_PT_PWR_EXIT) {
1509 err = intel_pt_synth_pwrx_sample(ptq);
1510 if (err)
1511 return err;
1512 }
1513 }
1514
1301 if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) { 1515 if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) {
1302 err = intel_pt_synth_instruction_sample(ptq); 1516 err = intel_pt_synth_instruction_sample(ptq);
1303 if (err) 1517 if (err)
@@ -1310,6 +1524,12 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
1310 return err; 1524 return err;
1311 } 1525 }
1312 1526
1527 if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) {
1528 err = intel_pt_synth_ptwrite_sample(ptq);
1529 if (err)
1530 return err;
1531 }
1532
1313 if (!(state->type & INTEL_PT_BRANCH)) 1533 if (!(state->type & INTEL_PT_BRANCH))
1314 return 0; 1534 return 0;
1315 1535
@@ -2047,6 +2267,68 @@ static int intel_pt_synth_events(struct intel_pt *pt,
2047 id += 1; 2267 id += 1;
2048 } 2268 }
2049 2269
2270 attr.type = PERF_TYPE_SYNTH;
2271 attr.sample_type |= PERF_SAMPLE_RAW;
2272
2273 if (pt->synth_opts.ptwrites) {
2274 attr.config = PERF_SYNTH_INTEL_PTWRITE;
2275 err = intel_pt_synth_event(session, "ptwrite", &attr, id);
2276 if (err)
2277 return err;
2278 pt->sample_ptwrites = true;
2279 pt->ptwrites_sample_type = attr.sample_type;
2280 pt->ptwrites_id = id;
2281 intel_pt_set_event_name(evlist, id, "ptwrite");
2282 id += 1;
2283 }
2284
2285 if (pt->synth_opts.pwr_events) {
2286 pt->sample_pwr_events = true;
2287 pt->pwr_events_sample_type = attr.sample_type;
2288
2289 attr.config = PERF_SYNTH_INTEL_CBR;
2290 err = intel_pt_synth_event(session, "cbr", &attr, id);
2291 if (err)
2292 return err;
2293 pt->cbr_id = id;
2294 intel_pt_set_event_name(evlist, id, "cbr");
2295 id += 1;
2296 }
2297
2298 if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) {
2299 attr.config = PERF_SYNTH_INTEL_MWAIT;
2300 err = intel_pt_synth_event(session, "mwait", &attr, id);
2301 if (err)
2302 return err;
2303 pt->mwait_id = id;
2304 intel_pt_set_event_name(evlist, id, "mwait");
2305 id += 1;
2306
2307 attr.config = PERF_SYNTH_INTEL_PWRE;
2308 err = intel_pt_synth_event(session, "pwre", &attr, id);
2309 if (err)
2310 return err;
2311 pt->pwre_id = id;
2312 intel_pt_set_event_name(evlist, id, "pwre");
2313 id += 1;
2314
2315 attr.config = PERF_SYNTH_INTEL_EXSTOP;
2316 err = intel_pt_synth_event(session, "exstop", &attr, id);
2317 if (err)
2318 return err;
2319 pt->exstop_id = id;
2320 intel_pt_set_event_name(evlist, id, "exstop");
2321 id += 1;
2322
2323 attr.config = PERF_SYNTH_INTEL_PWRX;
2324 err = intel_pt_synth_event(session, "pwrx", &attr, id);
2325 if (err)
2326 return err;
2327 pt->pwrx_id = id;
2328 intel_pt_set_event_name(evlist, id, "pwrx");
2329 id += 1;
2330 }
2331
2050 pt->synth_needs_swap = evsel->needs_swap; 2332 pt->synth_needs_swap = evsel->needs_swap;
2051 2333
2052 return 0; 2334 return 0;
@@ -2313,6 +2595,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
2313 intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); 2595 intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
2314 intel_pt_log("Maximum non-turbo ratio %u\n", 2596 intel_pt_log("Maximum non-turbo ratio %u\n",
2315 pt->max_non_turbo_ratio); 2597 pt->max_non_turbo_ratio);
2598 pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
2316 } 2599 }
2317 2600
2318 if (pt->synth_opts.calls) 2601 if (pt->synth_opts.calls)