diff options
author | Stephane Eranian <eranian@google.com> | 2013-01-24 10:10:31 -0500 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2013-04-01 11:15:59 -0400 |
commit | d6be9ad6c960f43800a6f118932bc8a5a4eadcd1 (patch) | |
tree | 4b3a888665a729d2ccfd06c9bab532aaa7955e44 | |
parent | c3feedf2aaf9ac8bad6f19f5d21e4ee0b4b87e9c (diff) |
perf: Add generic memory sampling interface
This patch adds PERF_SAMPLE_DATA_SRC.
PERF_SAMPLE_DATA_SRC collects the data source, i.e., where
did the data associated with the sampled instruction
come from. Information is stored in a perf_mem_data_src
structure. It contains opcode, mem level, tlb, snoop,
lock information, subject to availability in hardware.
Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: peterz@infradead.org
Cc: ak@linux.intel.com
Cc: acme@redhat.com
Cc: jolsa@redhat.com
Cc: namhyung.kim@lge.com
Link: http://lkml.kernel.org/r/1359040242-8269-8-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r-- | include/linux/perf_event.h | 2 | ||||
-rw-r--r-- | include/uapi/linux/perf_event.h | 68 | ||||
-rw-r--r-- | kernel/events/core.c | 6 |
3 files changed, 74 insertions, 2 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7ce0b37b155b..42a6daaf4e0a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -568,6 +568,7 @@ struct perf_sample_data { | |||
568 | u32 reserved; | 568 | u32 reserved; |
569 | } cpu_entry; | 569 | } cpu_entry; |
570 | u64 period; | 570 | u64 period; |
571 | union perf_mem_data_src data_src; | ||
571 | struct perf_callchain_entry *callchain; | 572 | struct perf_callchain_entry *callchain; |
572 | struct perf_raw_record *raw; | 573 | struct perf_raw_record *raw; |
573 | struct perf_branch_stack *br_stack; | 574 | struct perf_branch_stack *br_stack; |
@@ -588,6 +589,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, | |||
588 | data->regs_user.regs = NULL; | 589 | data->regs_user.regs = NULL; |
589 | data->stack_user_size = 0; | 590 | data->stack_user_size = 0; |
590 | data->weight = 0; | 591 | data->weight = 0; |
592 | data->data_src.val = 0; | ||
591 | } | 593 | } |
592 | 594 | ||
593 | extern void perf_output_sample(struct perf_output_handle *handle, | 595 | extern void perf_output_sample(struct perf_output_handle *handle, |
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index cdc255da02e2..5b5762006855 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h | |||
@@ -133,9 +133,9 @@ enum perf_event_sample_format { | |||
133 | PERF_SAMPLE_REGS_USER = 1U << 12, | 133 | PERF_SAMPLE_REGS_USER = 1U << 12, |
134 | PERF_SAMPLE_STACK_USER = 1U << 13, | 134 | PERF_SAMPLE_STACK_USER = 1U << 13, |
135 | PERF_SAMPLE_WEIGHT = 1U << 14, | 135 | PERF_SAMPLE_WEIGHT = 1U << 14, |
136 | PERF_SAMPLE_DATA_SRC = 1U << 15, | ||
136 | 137 | ||
137 | PERF_SAMPLE_MAX = 1U << 15, /* non-ABI */ | 138 | PERF_SAMPLE_MAX = 1U << 16, /* non-ABI */ |
138 | |||
139 | }; | 139 | }; |
140 | 140 | ||
141 | /* | 141 | /* |
@@ -592,6 +592,7 @@ enum perf_event_type { | |||
592 | * u64 dyn_size; } && PERF_SAMPLE_STACK_USER | 592 | * u64 dyn_size; } && PERF_SAMPLE_STACK_USER |
593 | * | 593 | * |
594 | * { u64 weight; } && PERF_SAMPLE_WEIGHT | 594 | * { u64 weight; } && PERF_SAMPLE_WEIGHT |
595 | * { u64 data_src; } && PERF_SAMPLE_DATA_SRC | ||
595 | * }; | 596 | * }; |
596 | */ | 597 | */ |
597 | PERF_RECORD_SAMPLE = 9, | 598 | PERF_RECORD_SAMPLE = 9, |
@@ -617,4 +618,67 @@ enum perf_callchain_context { | |||
617 | #define PERF_FLAG_FD_OUTPUT (1U << 1) | 618 | #define PERF_FLAG_FD_OUTPUT (1U << 1) |
618 | #define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */ | 619 | #define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */ |
619 | 620 | ||
621 | union perf_mem_data_src { | ||
622 | __u64 val; | ||
623 | struct { | ||
624 | __u64 mem_op:5, /* type of opcode */ | ||
625 | mem_lvl:14, /* memory hierarchy level */ | ||
626 | mem_snoop:5, /* snoop mode */ | ||
627 | mem_lock:2, /* lock instr */ | ||
628 | mem_dtlb:7, /* tlb access */ | ||
629 | mem_rsvd:31; | ||
630 | }; | ||
631 | }; | ||
632 | |||
633 | /* type of opcode (load/store/prefetch,code) */ | ||
634 | #define PERF_MEM_OP_NA 0x01 /* not available */ | ||
635 | #define PERF_MEM_OP_LOAD 0x02 /* load instruction */ | ||
636 | #define PERF_MEM_OP_STORE 0x04 /* store instruction */ | ||
637 | #define PERF_MEM_OP_PFETCH 0x08 /* prefetch */ | ||
638 | #define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ | ||
639 | #define PERF_MEM_OP_SHIFT 0 | ||
640 | |||
641 | /* memory hierarchy (memory level, hit or miss) */ | ||
642 | #define PERF_MEM_LVL_NA 0x01 /* not available */ | ||
643 | #define PERF_MEM_LVL_HIT 0x02 /* hit level */ | ||
644 | #define PERF_MEM_LVL_MISS 0x04 /* miss level */ | ||
645 | #define PERF_MEM_LVL_L1 0x08 /* L1 */ | ||
646 | #define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */ | ||
647 | #define PERF_MEM_LVL_L2 0x20 /* L2 hit */ | ||
648 | #define PERF_MEM_LVL_L3 0x40 /* L3 hit */ | ||
649 | #define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */ | ||
650 | #define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */ | ||
651 | #define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */ | ||
652 | #define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */ | ||
653 | #define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */ | ||
654 | #define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ | ||
655 | #define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ | ||
656 | #define PERF_MEM_LVL_SHIFT 5 | ||
657 | |||
658 | /* snoop mode */ | ||
659 | #define PERF_MEM_SNOOP_NA 0x01 /* not available */ | ||
660 | #define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ | ||
661 | #define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ | ||
662 | #define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ | ||
663 | #define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ | ||
664 | #define PERF_MEM_SNOOP_SHIFT 19 | ||
665 | |||
666 | /* locked instruction */ | ||
667 | #define PERF_MEM_LOCK_NA 0x01 /* not available */ | ||
668 | #define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ | ||
669 | #define PERF_MEM_LOCK_SHIFT 24 | ||
670 | |||
671 | /* TLB access */ | ||
672 | #define PERF_MEM_TLB_NA 0x01 /* not available */ | ||
673 | #define PERF_MEM_TLB_HIT 0x02 /* hit level */ | ||
674 | #define PERF_MEM_TLB_MISS 0x04 /* miss level */ | ||
675 | #define PERF_MEM_TLB_L1 0x08 /* L1 */ | ||
676 | #define PERF_MEM_TLB_L2 0x10 /* L2 */ | ||
677 | #define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/ | ||
678 | #define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ | ||
679 | #define PERF_MEM_TLB_SHIFT 26 | ||
680 | |||
681 | #define PERF_MEM_S(a, s) \ | ||
682 | (((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) | ||
683 | |||
620 | #endif /* _UAPI_LINUX_PERF_EVENT_H */ | 684 | #endif /* _UAPI_LINUX_PERF_EVENT_H */ |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 9e3edb272b3e..77c96d18c23a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -982,6 +982,9 @@ static void perf_event__header_size(struct perf_event *event) | |||
982 | if (sample_type & PERF_SAMPLE_READ) | 982 | if (sample_type & PERF_SAMPLE_READ) |
983 | size += event->read_size; | 983 | size += event->read_size; |
984 | 984 | ||
985 | if (sample_type & PERF_SAMPLE_DATA_SRC) | ||
986 | size += sizeof(data->data_src.val); | ||
987 | |||
985 | event->header_size = size; | 988 | event->header_size = size; |
986 | } | 989 | } |
987 | 990 | ||
@@ -4199,6 +4202,9 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
4199 | 4202 | ||
4200 | if (sample_type & PERF_SAMPLE_WEIGHT) | 4203 | if (sample_type & PERF_SAMPLE_WEIGHT) |
4201 | perf_output_put(handle, data->weight); | 4204 | perf_output_put(handle, data->weight); |
4205 | |||
4206 | if (sample_type & PERF_SAMPLE_DATA_SRC) | ||
4207 | perf_output_put(handle, data->data_src.val); | ||
4202 | } | 4208 | } |
4203 | 4209 | ||
4204 | void perf_prepare_sample(struct perf_event_header *header, | 4210 | void perf_prepare_sample(struct perf_event_header *header, |