aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobert Walker <robert.walker@arm.com>2018-02-14 06:24:39 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2018-02-16 12:55:44 -0500
commite573e978fb12e16094c0b39fad3dc4e6b4803c2c (patch)
tree89f2ee3bcdcbcff64b2add5aa27cbc9af2bd459d
parent7e99b1972263c2f611d7f2fb67d09f3384006593 (diff)
perf cs-etm: Inject capabilitity for CoreSight traces
Added user space perf functionality to translate CoreSight traces into instruction events with branch stack. To invoke the new functionality, use the perf inject tool with --itrace=il. For example, to translate the ETM trace from perf.data into last branch records in a new inj.data file: $ perf inject --itrace=i100000il128 -i perf.data -o perf.data.new The 'i' parameter to itrace generates periodic instruction events. The period between instruction events can be specified as a number of instructions suffixed by i (default 100000). The parameter to 'l' specifies the number of entries in the branch stack attached to instruction events. The 'b' parameter to itrace generates events on taken branches. This patch also fixes the contents of the branch events used in perf report - previously branch events were generated for each contiguous range of instructions executed. These are fixed to generate branch events between the last address of a range ending in an executed branch instruction and the start address of the next range. Based on patches by Sebastian Pop <s.pop@samsung.com> with additional fixes and support for specifying the instruction period. Originally-by: Sebastian Pop <s.pop@samsung.com> Signed-off-by: Robert Walker <robert.walker@arm.com> Acked-by: Mathieu Poirier <mathieu.poirier@linaro.org> Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1518607481-4059-2-git-send-email-robert.walker@arm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c65
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.h1
-rw-r--r--tools/perf/util/cs-etm.c434
3 files changed, 436 insertions, 64 deletions
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 1fb01849f1c7..8ff69dfd725a 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -78,6 +78,8 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder)
78{ 78{
79 ocsd_datapath_resp_t dp_ret; 79 ocsd_datapath_resp_t dp_ret;
80 80
81 decoder->prev_return = OCSD_RESP_CONT;
82
81 dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET, 83 dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET,
82 0, 0, NULL, NULL); 84 0, 0, NULL, NULL);
83 if (OCSD_DATA_RESP_IS_FATAL(dp_ret)) 85 if (OCSD_DATA_RESP_IS_FATAL(dp_ret))
@@ -253,16 +255,16 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
253 decoder->packet_count = 0; 255 decoder->packet_count = 0;
254 for (i = 0; i < MAX_BUFFER; i++) { 256 for (i = 0; i < MAX_BUFFER; i++) {
255 decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL; 257 decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL;
256 decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL; 258 decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL;
257 decoder->packet_buffer[i].exc = false; 259 decoder->packet_buffer[i].last_instr_taken_branch = false;
258 decoder->packet_buffer[i].exc_ret = false; 260 decoder->packet_buffer[i].exc = false;
259 decoder->packet_buffer[i].cpu = INT_MIN; 261 decoder->packet_buffer[i].exc_ret = false;
262 decoder->packet_buffer[i].cpu = INT_MIN;
260 } 263 }
261} 264}
262 265
263static ocsd_datapath_resp_t 266static ocsd_datapath_resp_t
264cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, 267cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
265 const ocsd_generic_trace_elem *elem,
266 const u8 trace_chan_id, 268 const u8 trace_chan_id,
267 enum cs_etm_sample_type sample_type) 269 enum cs_etm_sample_type sample_type)
268{ 270{
@@ -278,18 +280,16 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
278 return OCSD_RESP_FATAL_SYS_ERR; 280 return OCSD_RESP_FATAL_SYS_ERR;
279 281
280 et = decoder->tail; 282 et = decoder->tail;
283 et = (et + 1) & (MAX_BUFFER - 1);
284 decoder->tail = et;
285 decoder->packet_count++;
286
281 decoder->packet_buffer[et].sample_type = sample_type; 287 decoder->packet_buffer[et].sample_type = sample_type;
282 decoder->packet_buffer[et].start_addr = elem->st_addr;
283 decoder->packet_buffer[et].end_addr = elem->en_addr;
284 decoder->packet_buffer[et].exc = false; 288 decoder->packet_buffer[et].exc = false;
285 decoder->packet_buffer[et].exc_ret = false; 289 decoder->packet_buffer[et].exc_ret = false;
286 decoder->packet_buffer[et].cpu = *((int *)inode->priv); 290 decoder->packet_buffer[et].cpu = *((int *)inode->priv);
287 291 decoder->packet_buffer[et].start_addr = 0xdeadbeefdeadbeefUL;
288 /* Wrap around if need be */ 292 decoder->packet_buffer[et].end_addr = 0xdeadbeefdeadbeefUL;
289 et = (et + 1) & (MAX_BUFFER - 1);
290
291 decoder->tail = et;
292 decoder->packet_count++;
293 293
294 if (decoder->packet_count == MAX_BUFFER - 1) 294 if (decoder->packet_count == MAX_BUFFER - 1)
295 return OCSD_RESP_WAIT; 295 return OCSD_RESP_WAIT;
@@ -297,6 +297,40 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
297 return OCSD_RESP_CONT; 297 return OCSD_RESP_CONT;
298} 298}
299 299
300static ocsd_datapath_resp_t
301cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
302 const ocsd_generic_trace_elem *elem,
303 const uint8_t trace_chan_id)
304{
305 int ret = 0;
306 struct cs_etm_packet *packet;
307
308 ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
309 CS_ETM_RANGE);
310 if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
311 return ret;
312
313 packet = &decoder->packet_buffer[decoder->tail];
314
315 packet->start_addr = elem->st_addr;
316 packet->end_addr = elem->en_addr;
317 switch (elem->last_i_type) {
318 case OCSD_INSTR_BR:
319 case OCSD_INSTR_BR_INDIRECT:
320 packet->last_instr_taken_branch = elem->last_instr_exec;
321 break;
322 case OCSD_INSTR_ISB:
323 case OCSD_INSTR_DSB_DMB:
324 case OCSD_INSTR_OTHER:
325 default:
326 packet->last_instr_taken_branch = false;
327 break;
328 }
329
330 return ret;
331
332}
333
300static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( 334static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
301 const void *context, 335 const void *context,
302 const ocsd_trc_index_t indx __maybe_unused, 336 const ocsd_trc_index_t indx __maybe_unused,
@@ -316,9 +350,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
316 decoder->trace_on = true; 350 decoder->trace_on = true;
317 break; 351 break;
318 case OCSD_GEN_TRC_ELEM_INSTR_RANGE: 352 case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
319 resp = cs_etm_decoder__buffer_packet(decoder, elem, 353 resp = cs_etm_decoder__buffer_range(decoder, elem,
320 trace_chan_id, 354 trace_chan_id);
321 CS_ETM_RANGE);
322 break; 355 break;
323 case OCSD_GEN_TRC_ELEM_EXCEPTION: 356 case OCSD_GEN_TRC_ELEM_EXCEPTION:
324 decoder->packet_buffer[decoder->tail].exc = true; 357 decoder->packet_buffer[decoder->tail].exc = true;
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 3d2e6205d186..a4fdd285b145 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -30,6 +30,7 @@ struct cs_etm_packet {
30 enum cs_etm_sample_type sample_type; 30 enum cs_etm_sample_type sample_type;
31 u64 start_addr; 31 u64 start_addr;
32 u64 end_addr; 32 u64 end_addr;
33 u8 last_instr_taken_branch;
33 u8 exc; 34 u8 exc;
34 u8 exc_ret; 35 u8 exc_ret;
35 int cpu; 36 int cpu;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index f2c98774e665..6e595d96c04d 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -32,6 +32,14 @@
32 32
33#define MAX_TIMESTAMP (~0ULL) 33#define MAX_TIMESTAMP (~0ULL)
34 34
35/*
36 * A64 instructions are always 4 bytes
37 *
38 * Only A64 is supported, so can use this constant for converting between
39 * addresses and instruction counts, calculting offsets etc
40 */
41#define A64_INSTR_SIZE 4
42
35struct cs_etm_auxtrace { 43struct cs_etm_auxtrace {
36 struct auxtrace auxtrace; 44 struct auxtrace auxtrace;
37 struct auxtrace_queues queues; 45 struct auxtrace_queues queues;
@@ -45,11 +53,15 @@ struct cs_etm_auxtrace {
45 u8 snapshot_mode; 53 u8 snapshot_mode;
46 u8 data_queued; 54 u8 data_queued;
47 u8 sample_branches; 55 u8 sample_branches;
56 u8 sample_instructions;
48 57
49 int num_cpu; 58 int num_cpu;
50 u32 auxtrace_type; 59 u32 auxtrace_type;
51 u64 branches_sample_type; 60 u64 branches_sample_type;
52 u64 branches_id; 61 u64 branches_id;
62 u64 instructions_sample_type;
63 u64 instructions_sample_period;
64 u64 instructions_id;
53 u64 **metadata; 65 u64 **metadata;
54 u64 kernel_start; 66 u64 kernel_start;
55 unsigned int pmu_type; 67 unsigned int pmu_type;
@@ -68,6 +80,12 @@ struct cs_etm_queue {
68 u64 time; 80 u64 time;
69 u64 timestamp; 81 u64 timestamp;
70 u64 offset; 82 u64 offset;
83 u64 period_instructions;
84 struct branch_stack *last_branch;
85 struct branch_stack *last_branch_rb;
86 size_t last_branch_pos;
87 struct cs_etm_packet *prev_packet;
88 struct cs_etm_packet *packet;
71}; 89};
72 90
73static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); 91static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
@@ -180,6 +198,10 @@ static void cs_etm__free_queue(void *priv)
180 thread__zput(etmq->thread); 198 thread__zput(etmq->thread);
181 cs_etm_decoder__free(etmq->decoder); 199 cs_etm_decoder__free(etmq->decoder);
182 zfree(&etmq->event_buf); 200 zfree(&etmq->event_buf);
201 zfree(&etmq->last_branch);
202 zfree(&etmq->last_branch_rb);
203 zfree(&etmq->prev_packet);
204 zfree(&etmq->packet);
183 free(etmq); 205 free(etmq);
184} 206}
185 207
@@ -276,11 +298,35 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
276 struct cs_etm_decoder_params d_params; 298 struct cs_etm_decoder_params d_params;
277 struct cs_etm_trace_params *t_params; 299 struct cs_etm_trace_params *t_params;
278 struct cs_etm_queue *etmq; 300 struct cs_etm_queue *etmq;
301 size_t szp = sizeof(struct cs_etm_packet);
279 302
280 etmq = zalloc(sizeof(*etmq)); 303 etmq = zalloc(sizeof(*etmq));
281 if (!etmq) 304 if (!etmq)
282 return NULL; 305 return NULL;
283 306
307 etmq->packet = zalloc(szp);
308 if (!etmq->packet)
309 goto out_free;
310
311 if (etm->synth_opts.last_branch || etm->sample_branches) {
312 etmq->prev_packet = zalloc(szp);
313 if (!etmq->prev_packet)
314 goto out_free;
315 }
316
317 if (etm->synth_opts.last_branch) {
318 size_t sz = sizeof(struct branch_stack);
319
320 sz += etm->synth_opts.last_branch_sz *
321 sizeof(struct branch_entry);
322 etmq->last_branch = zalloc(sz);
323 if (!etmq->last_branch)
324 goto out_free;
325 etmq->last_branch_rb = zalloc(sz);
326 if (!etmq->last_branch_rb)
327 goto out_free;
328 }
329
284 etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 330 etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
285 if (!etmq->event_buf) 331 if (!etmq->event_buf)
286 goto out_free; 332 goto out_free;
@@ -335,6 +381,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
335 goto out_free_decoder; 381 goto out_free_decoder;
336 382
337 etmq->offset = 0; 383 etmq->offset = 0;
384 etmq->period_instructions = 0;
338 385
339 return etmq; 386 return etmq;
340 387
@@ -342,6 +389,10 @@ out_free_decoder:
342 cs_etm_decoder__free(etmq->decoder); 389 cs_etm_decoder__free(etmq->decoder);
343out_free: 390out_free:
344 zfree(&etmq->event_buf); 391 zfree(&etmq->event_buf);
392 zfree(&etmq->last_branch);
393 zfree(&etmq->last_branch_rb);
394 zfree(&etmq->prev_packet);
395 zfree(&etmq->packet);
345 free(etmq); 396 free(etmq);
346 397
347 return NULL; 398 return NULL;
@@ -395,6 +446,129 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm)
395 return 0; 446 return 0;
396} 447}
397 448
449static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq)
450{
451 struct branch_stack *bs_src = etmq->last_branch_rb;
452 struct branch_stack *bs_dst = etmq->last_branch;
453 size_t nr = 0;
454
455 /*
456 * Set the number of records before early exit: ->nr is used to
457 * determine how many branches to copy from ->entries.
458 */
459 bs_dst->nr = bs_src->nr;
460
461 /*
462 * Early exit when there is nothing to copy.
463 */
464 if (!bs_src->nr)
465 return;
466
467 /*
468 * As bs_src->entries is a circular buffer, we need to copy from it in
469 * two steps. First, copy the branches from the most recently inserted
470 * branch ->last_branch_pos until the end of bs_src->entries buffer.
471 */
472 nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos;
473 memcpy(&bs_dst->entries[0],
474 &bs_src->entries[etmq->last_branch_pos],
475 sizeof(struct branch_entry) * nr);
476
477 /*
478 * If we wrapped around at least once, the branches from the beginning
479 * of the bs_src->entries buffer and until the ->last_branch_pos element
480 * are older valid branches: copy them over. The total number of
481 * branches copied over will be equal to the number of branches asked by
482 * the user in last_branch_sz.
483 */
484 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
485 memcpy(&bs_dst->entries[nr],
486 &bs_src->entries[0],
487 sizeof(struct branch_entry) * etmq->last_branch_pos);
488 }
489}
490
491static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
492{
493 etmq->last_branch_pos = 0;
494 etmq->last_branch_rb->nr = 0;
495}
496
497static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
498{
499 /*
500 * The packet records the execution range with an exclusive end address
501 *
502 * A64 instructions are constant size, so the last executed
503 * instruction is A64_INSTR_SIZE before the end address
504 * Will need to do instruction level decode for T32 instructions as
505 * they can be variable size (not yet supported).
506 */
507 return packet->end_addr - A64_INSTR_SIZE;
508}
509
510static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
511{
512 /*
513 * Only A64 instructions are currently supported, so can get
514 * instruction count by dividing.
515 * Will need to do instruction level decode for T32 instructions as
516 * they can be variable size (not yet supported).
517 */
518 return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
519}
520
521static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
522 u64 offset)
523{
524 /*
525 * Only A64 instructions are currently supported, so can get
526 * instruction address by muliplying.
527 * Will need to do instruction level decode for T32 instructions as
528 * they can be variable size (not yet supported).
529 */
530 return packet->start_addr + offset * A64_INSTR_SIZE;
531}
532
533static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
534{
535 struct branch_stack *bs = etmq->last_branch_rb;
536 struct branch_entry *be;
537
538 /*
539 * The branches are recorded in a circular buffer in reverse
540 * chronological order: we start recording from the last element of the
541 * buffer down. After writing the first element of the stack, move the
542 * insert position back to the end of the buffer.
543 */
544 if (!etmq->last_branch_pos)
545 etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
546
547 etmq->last_branch_pos -= 1;
548
549 be = &bs->entries[etmq->last_branch_pos];
550 be->from = cs_etm__last_executed_instr(etmq->prev_packet);
551 be->to = etmq->packet->start_addr;
552 /* No support for mispredict */
553 be->flags.mispred = 0;
554 be->flags.predicted = 1;
555
556 /*
557 * Increment bs->nr until reaching the number of last branches asked by
558 * the user on the command line.
559 */
560 if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
561 bs->nr += 1;
562}
563
564static int cs_etm__inject_event(union perf_event *event,
565 struct perf_sample *sample, u64 type)
566{
567 event->header.size = perf_event__sample_event_size(sample, type, 0);
568 return perf_event__synthesize_sample(event, type, 0, sample);
569}
570
571
398static int 572static int
399cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) 573cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
400{ 574{
@@ -459,35 +633,105 @@ static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
459 } 633 }
460} 634}
461 635
636static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
637 u64 addr, u64 period)
638{
639 int ret = 0;
640 struct cs_etm_auxtrace *etm = etmq->etm;
641 union perf_event *event = etmq->event_buf;
642 struct perf_sample sample = {.ip = 0,};
643
644 event->sample.header.type = PERF_RECORD_SAMPLE;
645 event->sample.header.misc = PERF_RECORD_MISC_USER;
646 event->sample.header.size = sizeof(struct perf_event_header);
647
648 sample.ip = addr;
649 sample.pid = etmq->pid;
650 sample.tid = etmq->tid;
651 sample.id = etmq->etm->instructions_id;
652 sample.stream_id = etmq->etm->instructions_id;
653 sample.period = period;
654 sample.cpu = etmq->packet->cpu;
655 sample.flags = 0;
656 sample.insn_len = 1;
657 sample.cpumode = event->header.misc;
658
659 if (etm->synth_opts.last_branch) {
660 cs_etm__copy_last_branch_rb(etmq);
661 sample.branch_stack = etmq->last_branch;
662 }
663
664 if (etm->synth_opts.inject) {
665 ret = cs_etm__inject_event(event, &sample,
666 etm->instructions_sample_type);
667 if (ret)
668 return ret;
669 }
670
671 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
672
673 if (ret)
674 pr_err(
675 "CS ETM Trace: failed to deliver instruction event, error %d\n",
676 ret);
677
678 if (etm->synth_opts.last_branch)
679 cs_etm__reset_last_branch_rb(etmq);
680
681 return ret;
682}
683
462/* 684/*
463 * The cs etm packet encodes an instruction range between a branch target 685 * The cs etm packet encodes an instruction range between a branch target
464 * and the next taken branch. Generate sample accordingly. 686 * and the next taken branch. Generate sample accordingly.
465 */ 687 */
466static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, 688static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
467 struct cs_etm_packet *packet)
468{ 689{
469 int ret = 0; 690 int ret = 0;
470 struct cs_etm_auxtrace *etm = etmq->etm; 691 struct cs_etm_auxtrace *etm = etmq->etm;
471 struct perf_sample sample = {.ip = 0,}; 692 struct perf_sample sample = {.ip = 0,};
472 union perf_event *event = etmq->event_buf; 693 union perf_event *event = etmq->event_buf;
473 u64 start_addr = packet->start_addr; 694 struct dummy_branch_stack {
474 u64 end_addr = packet->end_addr; 695 u64 nr;
696 struct branch_entry entries;
697 } dummy_bs;
475 698
476 event->sample.header.type = PERF_RECORD_SAMPLE; 699 event->sample.header.type = PERF_RECORD_SAMPLE;
477 event->sample.header.misc = PERF_RECORD_MISC_USER; 700 event->sample.header.misc = PERF_RECORD_MISC_USER;
478 event->sample.header.size = sizeof(struct perf_event_header); 701 event->sample.header.size = sizeof(struct perf_event_header);
479 702
480 sample.ip = start_addr; 703 sample.ip = cs_etm__last_executed_instr(etmq->prev_packet);
481 sample.pid = etmq->pid; 704 sample.pid = etmq->pid;
482 sample.tid = etmq->tid; 705 sample.tid = etmq->tid;
483 sample.addr = end_addr; 706 sample.addr = etmq->packet->start_addr;
484 sample.id = etmq->etm->branches_id; 707 sample.id = etmq->etm->branches_id;
485 sample.stream_id = etmq->etm->branches_id; 708 sample.stream_id = etmq->etm->branches_id;
486 sample.period = 1; 709 sample.period = 1;
487 sample.cpu = packet->cpu; 710 sample.cpu = etmq->packet->cpu;
488 sample.flags = 0; 711 sample.flags = 0;
489 sample.cpumode = PERF_RECORD_MISC_USER; 712 sample.cpumode = PERF_RECORD_MISC_USER;
490 713
714 /*
715 * perf report cannot handle events without a branch stack
716 */
717 if (etm->synth_opts.last_branch) {
718 dummy_bs = (struct dummy_branch_stack){
719 .nr = 1,
720 .entries = {
721 .from = sample.ip,
722 .to = sample.addr,
723 },
724 };
725 sample.branch_stack = (struct branch_stack *)&dummy_bs;
726 }
727
728 if (etm->synth_opts.inject) {
729 ret = cs_etm__inject_event(event, &sample,
730 etm->branches_sample_type);
731 if (ret)
732 return ret;
733 }
734
491 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 735 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
492 736
493 if (ret) 737 if (ret)
@@ -584,6 +828,24 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
584 etm->sample_branches = true; 828 etm->sample_branches = true;
585 etm->branches_sample_type = attr.sample_type; 829 etm->branches_sample_type = attr.sample_type;
586 etm->branches_id = id; 830 etm->branches_id = id;
831 id += 1;
832 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
833 }
834
835 if (etm->synth_opts.last_branch)
836 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
837
838 if (etm->synth_opts.instructions) {
839 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
840 attr.sample_period = etm->synth_opts.period;
841 etm->instructions_sample_period = attr.sample_period;
842 err = cs_etm__synth_event(session, &attr, id);
843 if (err)
844 return err;
845 etm->sample_instructions = true;
846 etm->instructions_sample_type = attr.sample_type;
847 etm->instructions_id = id;
848 id += 1;
587 } 849 }
588 850
589 return 0; 851 return 0;
@@ -591,20 +853,68 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
591 853
592static int cs_etm__sample(struct cs_etm_queue *etmq) 854static int cs_etm__sample(struct cs_etm_queue *etmq)
593{ 855{
856 struct cs_etm_auxtrace *etm = etmq->etm;
857 struct cs_etm_packet *tmp;
594 int ret; 858 int ret;
595 struct cs_etm_packet packet; 859 u64 instrs_executed;
596 860
597 while (1) { 861 instrs_executed = cs_etm__instr_count(etmq->packet);
598 ret = cs_etm_decoder__get_packet(etmq->decoder, &packet); 862 etmq->period_instructions += instrs_executed;
599 if (ret <= 0) 863
864 /*
865 * Record a branch when the last instruction in
866 * PREV_PACKET is a branch.
867 */
868 if (etm->synth_opts.last_branch &&
869 etmq->prev_packet &&
870 etmq->prev_packet->last_instr_taken_branch)
871 cs_etm__update_last_branch_rb(etmq);
872
873 if (etm->sample_instructions &&
874 etmq->period_instructions >= etm->instructions_sample_period) {
875 /*
876 * Emit instruction sample periodically
877 * TODO: allow period to be defined in cycles and clock time
878 */
879
880 /* Get number of instructions executed after the sample point */
881 u64 instrs_over = etmq->period_instructions -
882 etm->instructions_sample_period;
883
884 /*
885 * Calculate the address of the sampled instruction (-1 as
886 * sample is reported as though instruction has just been
887 * executed, but PC has not advanced to next instruction)
888 */
889 u64 offset = (instrs_executed - instrs_over - 1);
890 u64 addr = cs_etm__instr_addr(etmq->packet, offset);
891
892 ret = cs_etm__synth_instruction_sample(
893 etmq, addr, etm->instructions_sample_period);
894 if (ret)
895 return ret;
896
897 /* Carry remaining instructions into next sample period */
898 etmq->period_instructions = instrs_over;
899 }
900
901 if (etm->sample_branches &&
902 etmq->prev_packet &&
903 etmq->prev_packet->sample_type == CS_ETM_RANGE &&
904 etmq->prev_packet->last_instr_taken_branch) {
905 ret = cs_etm__synth_branch_sample(etmq);
906 if (ret)
600 return ret; 907 return ret;
908 }
601 909
910 if (etm->sample_branches || etm->synth_opts.last_branch) {
602 /* 911 /*
603 * If the packet contains an instruction range, generate an 912 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
604 * instruction sequence event. 913 * the next incoming packet.
605 */ 914 */
606 if (packet.sample_type & CS_ETM_RANGE) 915 tmp = etmq->packet;
607 cs_etm__synth_branch_sample(etmq, &packet); 916 etmq->packet = etmq->prev_packet;
917 etmq->prev_packet = tmp;
608 } 918 }
609 919
610 return 0; 920 return 0;
@@ -621,45 +931,73 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
621 etm->kernel_start = machine__kernel_start(etm->machine); 931 etm->kernel_start = machine__kernel_start(etm->machine);
622 932
623 /* Go through each buffer in the queue and decode them one by one */ 933 /* Go through each buffer in the queue and decode them one by one */
624more: 934 while (1) {
625 buffer_used = 0; 935 buffer_used = 0;
626 memset(&buffer, 0, sizeof(buffer)); 936 memset(&buffer, 0, sizeof(buffer));
627 err = cs_etm__get_trace(&buffer, etmq); 937 err = cs_etm__get_trace(&buffer, etmq);
628 if (err <= 0) 938 if (err <= 0)
629 return err; 939 return err;
630 /* 940 /*
631 * We cannot assume consecutive blocks in the data file are contiguous, 941 * We cannot assume consecutive blocks in the data file are
632 * reset the decoder to force re-sync. 942 * contiguous, reset the decoder to force re-sync.
633 */ 943 */
634 err = cs_etm_decoder__reset(etmq->decoder); 944 err = cs_etm_decoder__reset(etmq->decoder);
635 if (err != 0) 945 if (err != 0)
636 return err;
637
638 /* Run trace decoder until buffer consumed or end of trace */
639 do {
640 processed = 0;
641
642 err = cs_etm_decoder__process_data_block(
643 etmq->decoder,
644 etmq->offset,
645 &buffer.buf[buffer_used],
646 buffer.len - buffer_used,
647 &processed);
648
649 if (err)
650 return err; 946 return err;
651 947
652 etmq->offset += processed; 948 /* Run trace decoder until buffer consumed or end of trace */
653 buffer_used += processed; 949 do {
950 processed = 0;
951 err = cs_etm_decoder__process_data_block(
952 etmq->decoder,
953 etmq->offset,
954 &buffer.buf[buffer_used],
955 buffer.len - buffer_used,
956 &processed);
957 if (err)
958 return err;
959
960 etmq->offset += processed;
961 buffer_used += processed;
962
963 /* Process each packet in this chunk */
964 while (1) {
965 err = cs_etm_decoder__get_packet(etmq->decoder,
966 etmq->packet);
967 if (err <= 0)
968 /*
969 * Stop processing this chunk on
970 * end of data or error
971 */
972 break;
973
974 /*
975 * If the packet contains an instruction
976 * range, generate instruction sequence
977 * events.
978 */
979 if (etmq->packet->sample_type & CS_ETM_RANGE)
980 err = cs_etm__sample(etmq);
981 }
982 } while (buffer.len > buffer_used);
654 983
655 /* 984 /*
656 * Nothing to do with an error condition, let's hope the next 985 * Generate a last branch event for the branches left in
657 * chunk will be better. 986 * the circular buffer at the end of the trace.
658 */ 987 */
659 err = cs_etm__sample(etmq); 988 if (etm->sample_instructions &&
660 } while (buffer.len > buffer_used); 989 etmq->etm->synth_opts.last_branch) {
990 struct branch_stack *bs = etmq->last_branch_rb;
991 struct branch_entry *be =
992 &bs->entries[etmq->last_branch_pos];
993
994 err = cs_etm__synth_instruction_sample(
995 etmq, be->to, etmq->period_instructions);
996 if (err)
997 return err;
998 }
661 999
662goto more; 1000 }
663 1001
664 return err; 1002 return err;
665} 1003}