aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/perf.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-20 13:29:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-20 13:29:15 -0400
commit9c2b957db1772ebf942ae7a9346b14eba6c8ca66 (patch)
tree0dbb83e57260ea7fc0dc421f214d5f1b26262005 /tools/perf/perf.h
parent0bbfcaff9b2a69c71a95e6902253487ab30cb498 (diff)
parentbea95c152dee1791dd02cbc708afbb115bb00f9a (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events changes for v3.4 from Ingo Molnar: - New "hardware based branch profiling" feature both on the kernel and the tooling side, on CPUs that support it. (modern x86 Intel CPUs with the 'LBR' hardware feature currently.) This new feature is basically a sophisticated 'magnifying glass' for branch execution - something that is pretty difficult to extract from regular, function histogram centric profiles. The simplest mode is activated via 'perf record -b', and the result looks like this in perf report: $ perf record -b any_call,u -e cycles:u branchy $ perf report -b --sort=symbol 52.34% [.] main [.] f1 24.04% [.] f1 [.] f3 23.60% [.] f1 [.] f2 0.01% [k] _IO_new_file_xsputn [k] _IO_file_overflow 0.01% [k] _IO_vfprintf_internal [k] _IO_new_file_xsputn 0.01% [k] _IO_vfprintf_internal [k] strchrnul 0.01% [k] __printf [k] _IO_vfprintf_internal 0.01% [k] main [k] __printf This output shows from/to branch columns and shows the highest percentage (from,to) jump combinations - i.e. the most likely taken branches in the system. "branches" can also include function calls and any other synchronous and asynchronous transitions of the instruction pointer that are not 'next instruction' - such as system calls, traps, interrupts, etc. This feature comes with (hopefully intuitive) flat ascii and TUI support in perf report. - Various 'perf annotate' visual improvements for us assembly junkies. It will now recognize function calls in the TUI and by hitting enter you can follow the call (recursively) and back, amongst other improvements. - Multiple threads/processes recording support in perf record, perf stat, perf top - which is activated via a comma-list of PIDs: perf top -p 21483,21485 perf stat -p 21483,21485 -ddd perf record -p 21483,21485 - Support for per UID views, via the --uid paramter to perf top, perf report, etc. For example 'perf top --uid mingo' will only show the tasks that I am running, excluding other users, root, etc. - Jump label restructurings and improvements - this includes the factoring out of the (hopefully much clearer) include/linux/static_key.h generic facility: struct static_key key = STATIC_KEY_INIT_FALSE; ... if (static_key_false(&key)) do unlikely code else do likely code ... static_key_slow_inc(); ... static_key_slow_inc(); ... The static_key_false() branch will be generated into the code with as little impact to the likely code path as possible. the static_key_slow_*() APIs flip the branch via live kernel code patching. This facility can now be used more widely within the kernel to micro-optimize hot branches whose likelihood matches the static-key usage and fast/slow cost patterns. - SW function tracer improvements: perf support and filtering support. - Various hardenings of the perf.data ABI, to make older perf.data's smoother on newer tool versions, to make new features integrate more smoothly, to support cross-endian recording/analyzing workflows better, etc. - Restructuring of the kprobes code, the splitting out of 'optprobes', and a corner case bugfix. - Allow the tracing of kernel console output (printk). - Improvements/fixes to user-space RDPMC support, allowing user-space self-profiling code to extract PMU counts without performing any system calls, while playing nice with the kernel side. - 'perf bench' improvements - ... and lots of internal restructurings, cleanups and fixes that made these features possible. And, as usual this list is incomplete as there were also lots of other improvements * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (120 commits) perf report: Fix annotate double quit issue in branch view mode perf report: Remove duplicate annotate choice in branch view mode perf/x86: Prettify pmu config literals perf report: Enable TUI in branch view mode perf report: Auto-detect branch stack sampling mode perf record: Add HEADER_BRANCH_STACK tag perf record: Provide default branch stack sampling mode option perf tools: Make perf able to read files from older ABIs perf tools: Fix ABI compatibility bug in print_event_desc() perf tools: Enable reading of perf.data files from different ABI rev perf: Add ABI reference sizes perf report: Add support for taken branch sampling perf record: Add support for sampling taken branch perf tools: Add code to support PERF_SAMPLE_BRANCH_STACK x86/kprobes: Split out optprobe related code to kprobes-opt.c x86/kprobes: Fix a bug which can modify kernel code permanently x86/kprobes: Fix instruction recovery on optimized path perf: Add callback to flush branch_stack on context switch perf: Disable PERF_SAMPLE_BRANCH_* when not supported perf/x86: Add LBR software filter support for Intel CPUs ...
Diffstat (limited to 'tools/perf/perf.h')
-rw-r--r--tools/perf/perf.h26
1 files changed, 22 insertions, 4 deletions
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 3afa39ac1d40..89e3355ab173 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -173,7 +173,6 @@ sys_perf_event_open(struct perf_event_attr *attr,
173 pid_t pid, int cpu, int group_fd, 173 pid_t pid, int cpu, int group_fd,
174 unsigned long flags) 174 unsigned long flags)
175{ 175{
176 attr->size = sizeof(*attr);
177 return syscall(__NR_perf_event_open, attr, pid, cpu, 176 return syscall(__NR_perf_event_open, attr, pid, cpu,
178 group_fd, flags); 177 group_fd, flags);
179} 178}
@@ -186,14 +185,32 @@ struct ip_callchain {
186 u64 ips[0]; 185 u64 ips[0];
187}; 186};
188 187
188struct branch_flags {
189 u64 mispred:1;
190 u64 predicted:1;
191 u64 reserved:62;
192};
193
194struct branch_entry {
195 u64 from;
196 u64 to;
197 struct branch_flags flags;
198};
199
200struct branch_stack {
201 u64 nr;
202 struct branch_entry entries[0];
203};
204
189extern bool perf_host, perf_guest; 205extern bool perf_host, perf_guest;
190extern const char perf_version_string[]; 206extern const char perf_version_string[];
191 207
192void pthread__unblock_sigwinch(void); 208void pthread__unblock_sigwinch(void);
193 209
194struct perf_record_opts { 210struct perf_record_opts {
195 pid_t target_pid; 211 const char *target_pid;
196 pid_t target_tid; 212 const char *target_tid;
213 uid_t uid;
197 bool call_graph; 214 bool call_graph;
198 bool group; 215 bool group;
199 bool inherit_stat; 216 bool inherit_stat;
@@ -204,13 +221,14 @@ struct perf_record_opts {
204 bool raw_samples; 221 bool raw_samples;
205 bool sample_address; 222 bool sample_address;
206 bool sample_time; 223 bool sample_time;
207 bool sample_id_all_avail; 224 bool sample_id_all_missing;
208 bool exclude_guest_missing; 225 bool exclude_guest_missing;
209 bool system_wide; 226 bool system_wide;
210 bool period; 227 bool period;
211 unsigned int freq; 228 unsigned int freq;
212 unsigned int mmap_pages; 229 unsigned int mmap_pages;
213 unsigned int user_freq; 230 unsigned int user_freq;
231 int branch_stack;
214 u64 default_interval; 232 u64 default_interval;
215 u64 user_interval; 233 u64 user_interval;
216 const char *cpu_list; 234 const char *cpu_list;