diff options
| author | Ingo Molnar <mingo@elte.hu> | 2012-03-12 15:46:35 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2012-03-12 15:47:05 -0400 |
| commit | bea95c152dee1791dd02cbc708afbb115bb00f9a (patch) | |
| tree | af9994c42c5fdd81ba3dadd7b812e2fa85273353 /include/linux | |
| parent | f9b4eeb809c6d031cc9561cc34dd691701cb2c2a (diff) | |
| parent | 24bff2dc0f77b1f186b7bdf30060caf3df191a68 (diff) | |
Merge branch 'perf/hw-branch-sampling' into perf/core
Merge reason: The 'perf record -b' hardware branch sampling feature is ready for upstream.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/perf_event.h | 82 |
1 files changed, 77 insertions, 5 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 64426b71381f..bd9f55a5958d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
| @@ -129,11 +129,40 @@ enum perf_event_sample_format { | |||
| 129 | PERF_SAMPLE_PERIOD = 1U << 8, | 129 | PERF_SAMPLE_PERIOD = 1U << 8, |
| 130 | PERF_SAMPLE_STREAM_ID = 1U << 9, | 130 | PERF_SAMPLE_STREAM_ID = 1U << 9, |
| 131 | PERF_SAMPLE_RAW = 1U << 10, | 131 | PERF_SAMPLE_RAW = 1U << 10, |
| 132 | PERF_SAMPLE_BRANCH_STACK = 1U << 11, | ||
| 132 | 133 | ||
| 133 | PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ | 134 | PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */ |
| 134 | }; | 135 | }; |
| 135 | 136 | ||
| 136 | /* | 137 | /* |
| 138 | * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set | ||
| 139 | * | ||
| 140 | * If the user does not pass priv level information via branch_sample_type, | ||
| 141 | * the kernel uses the event's priv level. Branch and event priv levels do | ||
| 142 | * not have to match. Branch priv level is checked for permissions. | ||
| 143 | * | ||
| 144 | * The branch types can be combined, however BRANCH_ANY covers all types | ||
| 145 | * of branches and therefore it supersedes all the other types. | ||
| 146 | */ | ||
| 147 | enum perf_branch_sample_type { | ||
| 148 | PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */ | ||
| 149 | PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */ | ||
| 150 | PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */ | ||
| 151 | |||
| 152 | PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */ | ||
| 153 | PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */ | ||
| 154 | PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */ | ||
| 155 | PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */ | ||
| 156 | |||
| 157 | PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */ | ||
| 158 | }; | ||
| 159 | |||
| 160 | #define PERF_SAMPLE_BRANCH_PLM_ALL \ | ||
| 161 | (PERF_SAMPLE_BRANCH_USER|\ | ||
| 162 | PERF_SAMPLE_BRANCH_KERNEL|\ | ||
| 163 | PERF_SAMPLE_BRANCH_HV) | ||
| 164 | |||
| 165 | /* | ||
| 137 | * The format of the data returned by read() on a perf event fd, | 166 | * The format of the data returned by read() on a perf event fd, |
| 138 | * as specified by attr.read_format: | 167 | * as specified by attr.read_format: |
| 139 | * | 168 | * |
| @@ -163,6 +192,8 @@ enum perf_event_read_format { | |||
| 163 | }; | 192 | }; |
| 164 | 193 | ||
| 165 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ | 194 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ |
| 195 | #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ | ||
| 196 | #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ | ||
| 166 | 197 | ||
| 167 | /* | 198 | /* |
| 168 | * Hardware event_id to monitor via a performance monitoring event: | 199 | * Hardware event_id to monitor via a performance monitoring event: |
| @@ -240,6 +271,7 @@ struct perf_event_attr { | |||
| 240 | __u64 bp_len; | 271 | __u64 bp_len; |
| 241 | __u64 config2; /* extension of config1 */ | 272 | __u64 config2; /* extension of config1 */ |
| 242 | }; | 273 | }; |
| 274 | __u64 branch_sample_type; /* enum branch_sample_type */ | ||
| 243 | }; | 275 | }; |
| 244 | 276 | ||
| 245 | /* | 277 | /* |
| @@ -458,6 +490,8 @@ enum perf_event_type { | |||
| 458 | * | 490 | * |
| 459 | * { u32 size; | 491 | * { u32 size; |
| 460 | * char data[size];}&& PERF_SAMPLE_RAW | 492 | * char data[size];}&& PERF_SAMPLE_RAW |
| 493 | * | ||
| 494 | * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK | ||
| 461 | * }; | 495 | * }; |
| 462 | */ | 496 | */ |
| 463 | PERF_RECORD_SAMPLE = 9, | 497 | PERF_RECORD_SAMPLE = 9, |
| @@ -530,12 +564,34 @@ struct perf_raw_record { | |||
| 530 | void *data; | 564 | void *data; |
| 531 | }; | 565 | }; |
| 532 | 566 | ||
| 567 | /* | ||
| 568 | * single taken branch record layout: | ||
| 569 | * | ||
| 570 | * from: source instruction (may not always be a branch insn) | ||
| 571 | * to: branch target | ||
| 572 | * mispred: branch target was mispredicted | ||
| 573 | * predicted: branch target was predicted | ||
| 574 | * | ||
| 575 | * support for mispred, predicted is optional. In case it | ||
| 576 | * is not supported mispred = predicted = 0. | ||
| 577 | */ | ||
| 533 | struct perf_branch_entry { | 578 | struct perf_branch_entry { |
| 534 | __u64 from; | 579 | __u64 from; |
| 535 | __u64 to; | 580 | __u64 to; |
| 536 | __u64 flags; | 581 | __u64 mispred:1, /* target mispredicted */ |
| 582 | predicted:1,/* target predicted */ | ||
| 583 | reserved:62; | ||
| 537 | }; | 584 | }; |
| 538 | 585 | ||
| 586 | /* | ||
| 587 | * branch stack layout: | ||
| 588 | * nr: number of taken branches stored in entries[] | ||
| 589 | * | ||
| 590 | * Note that nr can vary from sample to sample | ||
| 591 | * branches (to, from) are stored from most recent | ||
| 592 | * to least recent, i.e., entries[0] contains the most | ||
| 593 | * recent branch. | ||
| 594 | */ | ||
| 539 | struct perf_branch_stack { | 595 | struct perf_branch_stack { |
| 540 | __u64 nr; | 596 | __u64 nr; |
| 541 | struct perf_branch_entry entries[0]; | 597 | struct perf_branch_entry entries[0]; |
| @@ -566,7 +622,9 @@ struct hw_perf_event { | |||
| 566 | unsigned long event_base; | 622 | unsigned long event_base; |
| 567 | int idx; | 623 | int idx; |
| 568 | int last_cpu; | 624 | int last_cpu; |
| 625 | |||
| 569 | struct hw_perf_event_extra extra_reg; | 626 | struct hw_perf_event_extra extra_reg; |
| 627 | struct hw_perf_event_extra branch_reg; | ||
| 570 | }; | 628 | }; |
| 571 | struct { /* software */ | 629 | struct { /* software */ |
| 572 | struct hrtimer hrtimer; | 630 | struct hrtimer hrtimer; |
| @@ -690,6 +748,11 @@ struct pmu { | |||
| 690 | * if no implementation is provided it will default to: event->hw.idx + 1. | 748 | * if no implementation is provided it will default to: event->hw.idx + 1. |
| 691 | */ | 749 | */ |
| 692 | int (*event_idx) (struct perf_event *event); /*optional */ | 750 | int (*event_idx) (struct perf_event *event); /*optional */ |
| 751 | |||
| 752 | /* | ||
| 753 | * flush branch stack on context-switches (needed in cpu-wide mode) | ||
| 754 | */ | ||
| 755 | void (*flush_branch_stack) (void); | ||
| 693 | }; | 756 | }; |
| 694 | 757 | ||
| 695 | /** | 758 | /** |
| @@ -923,7 +986,8 @@ struct perf_event_context { | |||
| 923 | u64 parent_gen; | 986 | u64 parent_gen; |
| 924 | u64 generation; | 987 | u64 generation; |
| 925 | int pin_count; | 988 | int pin_count; |
| 926 | int nr_cgroups; /* cgroup events present */ | 989 | int nr_cgroups; /* cgroup evts */ |
| 990 | int nr_branch_stack; /* branch_stack evt */ | ||
| 927 | struct rcu_head rcu_head; | 991 | struct rcu_head rcu_head; |
| 928 | }; | 992 | }; |
| 929 | 993 | ||
| @@ -988,6 +1052,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, | |||
| 988 | extern u64 perf_event_read_value(struct perf_event *event, | 1052 | extern u64 perf_event_read_value(struct perf_event *event, |
| 989 | u64 *enabled, u64 *running); | 1053 | u64 *enabled, u64 *running); |
| 990 | 1054 | ||
| 1055 | |||
| 991 | struct perf_sample_data { | 1056 | struct perf_sample_data { |
| 992 | u64 type; | 1057 | u64 type; |
| 993 | 1058 | ||
| @@ -1007,12 +1072,14 @@ struct perf_sample_data { | |||
| 1007 | u64 period; | 1072 | u64 period; |
| 1008 | struct perf_callchain_entry *callchain; | 1073 | struct perf_callchain_entry *callchain; |
| 1009 | struct perf_raw_record *raw; | 1074 | struct perf_raw_record *raw; |
| 1075 | struct perf_branch_stack *br_stack; | ||
| 1010 | }; | 1076 | }; |
| 1011 | 1077 | ||
| 1012 | static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) | 1078 | static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) |
| 1013 | { | 1079 | { |
| 1014 | data->addr = addr; | 1080 | data->addr = addr; |
| 1015 | data->raw = NULL; | 1081 | data->raw = NULL; |
| 1082 | data->br_stack = NULL; | ||
| 1016 | } | 1083 | } |
| 1017 | 1084 | ||
| 1018 | extern void perf_output_sample(struct perf_output_handle *handle, | 1085 | extern void perf_output_sample(struct perf_output_handle *handle, |
| @@ -1151,6 +1218,11 @@ extern void perf_bp_event(struct perf_event *event, void *data); | |||
| 1151 | # define perf_instruction_pointer(regs) instruction_pointer(regs) | 1218 | # define perf_instruction_pointer(regs) instruction_pointer(regs) |
| 1152 | #endif | 1219 | #endif |
| 1153 | 1220 | ||
| 1221 | static inline bool has_branch_stack(struct perf_event *event) | ||
| 1222 | { | ||
| 1223 | return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; | ||
| 1224 | } | ||
| 1225 | |||
| 1154 | extern int perf_output_begin(struct perf_output_handle *handle, | 1226 | extern int perf_output_begin(struct perf_output_handle *handle, |
| 1155 | struct perf_event *event, unsigned int size); | 1227 | struct perf_event *event, unsigned int size); |
| 1156 | extern void perf_output_end(struct perf_output_handle *handle); | 1228 | extern void perf_output_end(struct perf_output_handle *handle); |
