aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2012-02-09 17:20:51 -0500
committerIngo Molnar <mingo@elte.hu>2012-03-05 08:55:39 -0500
commitbce38cd53e5ddba9cb6d708c4ef3d04a4016ec7e (patch)
tree8b42880d3ff9f250e246eb31bd80e6c36c99d8e1
parent737f24bda723fdf89ecaacb99fa2bf5683c32799 (diff)
perf: Add generic taken branch sampling support
This patch adds the ability to sample taken branches to the perf_event interface. The ability to capture taken branches is very useful for all sorts of analysis. For instance, basic block profiling, call counts, statistical call graph. This new capability requires hardware assist and as such may not be available on all HW platforms. On Intel x86 it is implemented on top of the Last Branch Record (LBR) facility. To enable taken branches sampling, the PERF_SAMPLE_BRANCH_STACK bit must be set in attr->sample_type. Sampled taken branches may be filtered by type and/or priv levels. The patch adds a new field, called branch_sample_type, to the perf_event_attr structure. It contains a bitmask of filters to apply to the sampled taken branches. Filters may be implemented in HW. If the HW filter does not exist or is not good enough, some arch may also implement a SW filter. The following generic filters are currently defined: - PERF_SAMPLE_USER only branches whose targets are at the user level - PERF_SAMPLE_KERNEL only branches whose targets are at the kernel level - PERF_SAMPLE_HV only branches whose targets are at the hypervisor level - PERF_SAMPLE_ANY any type of branches (subject to priv levels filters) - PERF_SAMPLE_ANY_CALL any call branches (may incl. syscall on some arch) - PERF_SAMPLE_ANY_RET any return branches (may incl. syscall returns on some arch) - PERF_SAMPLE_IND_CALL indirect call branches Obviously filter may be combined. The priv level bits are optional. If not provided, the priv level of the associated event are used. It is possible to collect branches at a priv level different from the associated event. Use of kernel, hv priv levels is subject to permissions and availability (hv). The number of taken branch records present in each sample may vary based on HW, the type of sampled branches, the executed code. Therefore each sample contains the number of taken branches it contains. Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/1328826068-11713-2-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c21
-rw-r--r--include/linux/perf_event.h71
-rw-r--r--kernel/events/core.c68
3 files changed, 148 insertions, 12 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 47a7e63bfe54..309d0cc69163 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -142,9 +142,11 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
142 142
143 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); 143 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
144 144
145 cpuc->lbr_entries[i].from = msr_lastbranch.from; 145 cpuc->lbr_entries[i].from = msr_lastbranch.from;
146 cpuc->lbr_entries[i].to = msr_lastbranch.to; 146 cpuc->lbr_entries[i].to = msr_lastbranch.to;
147 cpuc->lbr_entries[i].flags = 0; 147 cpuc->lbr_entries[i].mispred = 0;
148 cpuc->lbr_entries[i].predicted = 0;
149 cpuc->lbr_entries[i].reserved = 0;
148 } 150 }
149 cpuc->lbr_stack.nr = i; 151 cpuc->lbr_stack.nr = i;
150} 152}
@@ -165,19 +167,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
165 167
166 for (i = 0; i < x86_pmu.lbr_nr; i++) { 168 for (i = 0; i < x86_pmu.lbr_nr; i++) {
167 unsigned long lbr_idx = (tos - i) & mask; 169 unsigned long lbr_idx = (tos - i) & mask;
168 u64 from, to, flags = 0; 170 u64 from, to, mis = 0, pred = 0;
169 171
170 rdmsrl(x86_pmu.lbr_from + lbr_idx, from); 172 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
171 rdmsrl(x86_pmu.lbr_to + lbr_idx, to); 173 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
172 174
173 if (lbr_format == LBR_FORMAT_EIP_FLAGS) { 175 if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
174 flags = !!(from & LBR_FROM_FLAG_MISPRED); 176 mis = !!(from & LBR_FROM_FLAG_MISPRED);
177 pred = !mis;
175 from = (u64)((((s64)from) << 1) >> 1); 178 from = (u64)((((s64)from) << 1) >> 1);
176 } 179 }
177 180
178 cpuc->lbr_entries[i].from = from; 181 cpuc->lbr_entries[i].from = from;
179 cpuc->lbr_entries[i].to = to; 182 cpuc->lbr_entries[i].to = to;
180 cpuc->lbr_entries[i].flags = flags; 183 cpuc->lbr_entries[i].mispred = mis;
184 cpuc->lbr_entries[i].predicted = pred;
185 cpuc->lbr_entries[i].reserved = 0;
181 } 186 }
182 cpuc->lbr_stack.nr = i; 187 cpuc->lbr_stack.nr = i;
183} 188}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 64426b71381f..5fc494f4a094 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -129,11 +129,40 @@ enum perf_event_sample_format {
129 PERF_SAMPLE_PERIOD = 1U << 8, 129 PERF_SAMPLE_PERIOD = 1U << 8,
130 PERF_SAMPLE_STREAM_ID = 1U << 9, 130 PERF_SAMPLE_STREAM_ID = 1U << 9,
131 PERF_SAMPLE_RAW = 1U << 10, 131 PERF_SAMPLE_RAW = 1U << 10,
132 PERF_SAMPLE_BRANCH_STACK = 1U << 11,
132 133
133 PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ 134 PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */
134}; 135};
135 136
136/* 137/*
138 * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
139 *
140 * If the user does not pass priv level information via branch_sample_type,
141 * the kernel uses the event's priv level. Branch and event priv levels do
142 * not have to match. Branch priv level is checked for permissions.
143 *
144 * The branch types can be combined, however BRANCH_ANY covers all types
145 * of branches and therefore it supersedes all the other types.
146 */
147enum perf_branch_sample_type {
148 PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */
149 PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */
150 PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */
151
152 PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */
153 PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */
154 PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */
155 PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */
156
157 PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */
158};
159
160#define PERF_SAMPLE_BRANCH_PLM_ALL \
161 (PERF_SAMPLE_BRANCH_USER|\
162 PERF_SAMPLE_BRANCH_KERNEL|\
163 PERF_SAMPLE_BRANCH_HV)
164
165/*
137 * The format of the data returned by read() on a perf event fd, 166 * The format of the data returned by read() on a perf event fd,
138 * as specified by attr.read_format: 167 * as specified by attr.read_format:
139 * 168 *
@@ -240,6 +269,7 @@ struct perf_event_attr {
240 __u64 bp_len; 269 __u64 bp_len;
241 __u64 config2; /* extension of config1 */ 270 __u64 config2; /* extension of config1 */
242 }; 271 };
272 __u64 branch_sample_type; /* enum branch_sample_type */
243}; 273};
244 274
245/* 275/*
@@ -458,6 +488,8 @@ enum perf_event_type {
458 * 488 *
459 * { u32 size; 489 * { u32 size;
460 * char data[size];}&& PERF_SAMPLE_RAW 490 * char data[size];}&& PERF_SAMPLE_RAW
491 *
492 * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
461 * }; 493 * };
462 */ 494 */
463 PERF_RECORD_SAMPLE = 9, 495 PERF_RECORD_SAMPLE = 9,
@@ -530,12 +562,34 @@ struct perf_raw_record {
530 void *data; 562 void *data;
531}; 563};
532 564
565/*
566 * single taken branch record layout:
567 *
568 * from: source instruction (may not always be a branch insn)
569 * to: branch target
570 * mispred: branch target was mispredicted
571 * predicted: branch target was predicted
572 *
573 * support for mispred, predicted is optional. In case it
574 * is not supported mispred = predicted = 0.
575 */
533struct perf_branch_entry { 576struct perf_branch_entry {
534 __u64 from; 577 __u64 from;
535 __u64 to; 578 __u64 to;
536 __u64 flags; 579 __u64 mispred:1, /* target mispredicted */
580 predicted:1,/* target predicted */
581 reserved:62;
537}; 582};
538 583
584/*
585 * branch stack layout:
586 * nr: number of taken branches stored in entries[]
587 *
588 * Note that nr can vary from sample to sample
589 * branches (to, from) are stored from most recent
590 * to least recent, i.e., entries[0] contains the most
591 * recent branch.
592 */
539struct perf_branch_stack { 593struct perf_branch_stack {
540 __u64 nr; 594 __u64 nr;
541 struct perf_branch_entry entries[0]; 595 struct perf_branch_entry entries[0];
@@ -566,7 +620,9 @@ struct hw_perf_event {
566 unsigned long event_base; 620 unsigned long event_base;
567 int idx; 621 int idx;
568 int last_cpu; 622 int last_cpu;
623
569 struct hw_perf_event_extra extra_reg; 624 struct hw_perf_event_extra extra_reg;
625 struct hw_perf_event_extra branch_reg;
570 }; 626 };
571 struct { /* software */ 627 struct { /* software */
572 struct hrtimer hrtimer; 628 struct hrtimer hrtimer;
@@ -1007,12 +1063,14 @@ struct perf_sample_data {
1007 u64 period; 1063 u64 period;
1008 struct perf_callchain_entry *callchain; 1064 struct perf_callchain_entry *callchain;
1009 struct perf_raw_record *raw; 1065 struct perf_raw_record *raw;
1066 struct perf_branch_stack *br_stack;
1010}; 1067};
1011 1068
1012static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) 1069static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
1013{ 1070{
1014 data->addr = addr; 1071 data->addr = addr;
1015 data->raw = NULL; 1072 data->raw = NULL;
1073 data->br_stack = NULL;
1016} 1074}
1017 1075
1018extern void perf_output_sample(struct perf_output_handle *handle, 1076extern void perf_output_sample(struct perf_output_handle *handle,
@@ -1151,6 +1209,11 @@ extern void perf_bp_event(struct perf_event *event, void *data);
1151# define perf_instruction_pointer(regs) instruction_pointer(regs) 1209# define perf_instruction_pointer(regs) instruction_pointer(regs)
1152#endif 1210#endif
1153 1211
1212static inline bool has_branch_stack(struct perf_event *event)
1213{
1214 return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
1215}
1216
1154extern int perf_output_begin(struct perf_output_handle *handle, 1217extern int perf_output_begin(struct perf_output_handle *handle,
1155 struct perf_event *event, unsigned int size); 1218 struct perf_event *event, unsigned int size);
1156extern void perf_output_end(struct perf_output_handle *handle); 1219extern void perf_output_end(struct perf_output_handle *handle);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e8b32ac75ce3..5820efdf47cd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
118 PERF_FLAG_FD_OUTPUT |\ 118 PERF_FLAG_FD_OUTPUT |\
119 PERF_FLAG_PID_CGROUP) 119 PERF_FLAG_PID_CGROUP)
120 120
121/*
122 * branch priv levels that need permission checks
123 */
124#define PERF_SAMPLE_BRANCH_PERM_PLM \
125 (PERF_SAMPLE_BRANCH_KERNEL |\
126 PERF_SAMPLE_BRANCH_HV)
127
121enum event_type_t { 128enum event_type_t {
122 EVENT_FLEXIBLE = 0x1, 129 EVENT_FLEXIBLE = 0x1,
123 EVENT_PINNED = 0x2, 130 EVENT_PINNED = 0x2,
@@ -3907,6 +3914,24 @@ void perf_output_sample(struct perf_output_handle *handle,
3907 } 3914 }
3908 } 3915 }
3909 } 3916 }
3917
3918 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
3919 if (data->br_stack) {
3920 size_t size;
3921
3922 size = data->br_stack->nr
3923 * sizeof(struct perf_branch_entry);
3924
3925 perf_output_put(handle, data->br_stack->nr);
3926 perf_output_copy(handle, data->br_stack->entries, size);
3927 } else {
3928 /*
3929 * we always store at least the value of nr
3930 */
3931 u64 nr = 0;
3932 perf_output_put(handle, nr);
3933 }
3934 }
3910} 3935}
3911 3936
3912void perf_prepare_sample(struct perf_event_header *header, 3937void perf_prepare_sample(struct perf_event_header *header,
@@ -3949,6 +3974,15 @@ void perf_prepare_sample(struct perf_event_header *header,
3949 WARN_ON_ONCE(size & (sizeof(u64)-1)); 3974 WARN_ON_ONCE(size & (sizeof(u64)-1));
3950 header->size += size; 3975 header->size += size;
3951 } 3976 }
3977
3978 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
3979 int size = sizeof(u64); /* nr */
3980 if (data->br_stack) {
3981 size += data->br_stack->nr
3982 * sizeof(struct perf_branch_entry);
3983 }
3984 header->size += size;
3985 }
3952} 3986}
3953 3987
3954static void perf_event_output(struct perf_event *event, 3988static void perf_event_output(struct perf_event *event,
@@ -5935,6 +5969,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
5935 if (attr->read_format & ~(PERF_FORMAT_MAX-1)) 5969 if (attr->read_format & ~(PERF_FORMAT_MAX-1))
5936 return -EINVAL; 5970 return -EINVAL;
5937 5971
5972 if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
5973 u64 mask = attr->branch_sample_type;
5974
5975 /* only using defined bits */
5976 if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1))
5977 return -EINVAL;
5978
5979 /* at least one branch bit must be set */
5980 if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))
5981 return -EINVAL;
5982
5983 /* kernel level capture: check permissions */
5984 if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
5985 && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
5986 return -EACCES;
5987
5988 /* propagate priv level, when not set for branch */
5989 if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {
5990
5991 /* exclude_kernel checked on syscall entry */
5992 if (!attr->exclude_kernel)
5993 mask |= PERF_SAMPLE_BRANCH_KERNEL;
5994
5995 if (!attr->exclude_user)
5996 mask |= PERF_SAMPLE_BRANCH_USER;
5997
5998 if (!attr->exclude_hv)
5999 mask |= PERF_SAMPLE_BRANCH_HV;
6000 /*
6001 * adjust user setting (for HW filter setup)
6002 */
6003 attr->branch_sample_type = mask;
6004 }
6005 }
5938out: 6006out:
5939 return ret; 6007 return ret;
5940 6008