diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-30 14:15:14 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-30 14:15:14 -0500 |
commit | d8b91dde38f4c43bd0bbbf17a90f735b16aaff2c (patch) | |
tree | bd72dabf6e4b23e060fce429c87e60504f69de54 /tools/perf/util/thread_map.c | |
parent | 5e7481a25e90b661d1dbbba18be3fd3dfe12ec6f (diff) | |
parent | e4c1091cb495d9cbec8956d642644a71a1689958 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"Kernel side changes:
- Clean up the x86 instruction decoder (Masami Hiramatsu)
- Add new uprobes optimization for PUSH instructions on x86 (Yonghong
Song)
- Add MSR_IA32_THERM_STATUS to the MSR events (Stephane Eranian)
- Fix misc bugs, update documentation, plus various cleanups (Jiri
Olsa)
There's a large number of tooling side improvements:
- Intel-PT/BTS improvements (Adrian Hunter)
- Numerous 'perf trace' improvements (Arnaldo Carvalho de Melo)
- Introduce an errno code to string facility (Hendrik Brueckner)
- Various build system improvements (Jiri Olsa)
- Add support for CoreSight trace decoding by making the perf tools
use the external openCSD (Mathieu Poirier, Tor Jeremiassen)
- Add ARM Statistical Profiling Extensions (SPE) support (Kim
Phillips)
- libtraceevent updates (Steven Rostedt)
- Intel vendor event JSON updates (Andi Kleen)
- Introduce 'perf report --mmaps' and 'perf report --tasks' to show
info present in 'perf.data' (Jiri Olsa, Arnaldo Carvalho de Melo)
- Add infrastructure to record first and last sample time to the
perf.data file header, so that when processing all samples in a
'perf record' session, such as when doing build-id processing, or
when specifically requesting that that info be recorded, use that
in 'perf report --time', that also got support for percent slices
in addition to absolute ones.
I.e. now it is possible to ask for the samples in the 10%-20% time
slice of a perf.data file (Jin Yao)
- Allow system wide 'perf stat --per-thread', sorting the result (Jin
Yao)
E.g.:
[root@jouet ~]# perf stat --per-thread --metrics IPC
^C
Performance counter stats for 'system wide':
make-22229 23,012,094,032 inst_retired.any # 0.8 IPC
cc1-22419 692,027,497 inst_retired.any # 0.8 IPC
gcc-22418 328,231,855 inst_retired.any # 0.9 IPC
cc1-22509 220,853,647 inst_retired.any # 0.8 IPC
gcc-22486 199,874,810 inst_retired.any # 1.0 IPC
as-22466 177,896,365 inst_retired.any # 0.9 IPC
cc1-22465 150,732,374 inst_retired.any # 0.8 IPC
gcc-22508 112,555,593 inst_retired.any # 0.9 IPC
cc1-22487 108,964,079 inst_retired.any # 0.7 IPC
qemu-system-x86-2697 21,330,550 inst_retired.any # 0.3 IPC
systemd-journal-551 20,642,951 inst_retired.any # 0.4 IPC
docker-containe-17651 9,552,892 inst_retired.any # 0.5 IPC
dockerd-current-9809 7,528,586 inst_retired.any # 0.5 IPC
make-22153 12,504,194,380 inst_retired.any # 0.8 IPC
python2-22429 12,081,290,954 inst_retired.any # 0.8 IPC
<SNIP>
python2-22429 15,026,328,103 cpu_clk_unhalted.thread
cc1-22419 826,660,193 cpu_clk_unhalted.thread
gcc-22418 365,321,295 cpu_clk_unhalted.thread
cc1-22509 279,169,362 cpu_clk_unhalted.thread
gcc-22486 210,156,950 cpu_clk_unhalted.thread
<SNIP>
5.638075538 seconds time elapsed
[root@jouet ~]#
- Improve shell auto-completion of perf events (Jin Yao)
- 'perf probe' improvements (Masami Hiramatsu)
- Improve PMU infrastructure to support amp64's ThunderX2
implementation defined core events (Ganapatrao Kulkarni)
- Various annotation related improvements and fixes (Thomas Richter)
- Clarify usage of 'overwrite' and 'backward' in the evlist/mmap
code, removing the 'overwrite' parameter from several functions as
it was always used it as 'false' (Wang Nan)
- Fix/improve 'perf record' reverse recording support (Wang Nan)
- Improve command line options documentation (Sihyeon Jang)
- Optimize sample parsing for ordering events, where we don't need to
parse all the PERF_SAMPLE_ bits, just the ones leading to the
timestamp needed to reorder events (Jiri Olsa)
- Generalize the annotation code to support other source information
besides objdump/DWARF obtained ones, starting with python scripts,
that will is slated to be merged soon (Jiri Olsa)
- ... and a lot more that I failed to list, see the shortlog and
changelog for details"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (262 commits)
perf trace beauty flock: Move to separate object file
perf evlist: Remove fcntl.h from evlist.h
perf trace beauty futex: Beautify FUTEX_BITSET_MATCH_ANY
perf trace: Do not print from time delta for interrupted syscall lines
perf trace: Add --print-sample
perf bpf: Remove misplaced __maybe_unused attribute
MAINTAINERS: Adding entry for CoreSight trace decoding
perf tools: Add mechanic to synthesise CoreSight trace packets
perf tools: Add full support for CoreSight trace decoding
pert tools: Add queue management functionality
perf tools: Add functionality to communicate with the openCSD decoder
perf tools: Add support for decoding CoreSight trace data
perf tools: Add decoder mechanic to support dumping trace data
perf tools: Add processing of coresight metadata
perf tools: Add initial entry point for decoder CoreSight traces
perf tools: Integrating the CoreSight decoding library
perf vendor events intel: Update IvyTown files to V20
perf vendor events intel: Update IvyBridge files to V20
perf vendor events intel: Update BroadwellDE events to V7
perf vendor events intel: Update SkylakeX events to V1.06
...
Diffstat (limited to 'tools/perf/util/thread_map.c')
-rw-r--r-- | tools/perf/util/thread_map.c | 27 |
1 files changed, 20 insertions, 7 deletions
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index be0d5a736dea..3e1038f6491c 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c | |||
@@ -92,7 +92,7 @@ struct thread_map *thread_map__new_by_tid(pid_t tid) | |||
92 | return threads; | 92 | return threads; |
93 | } | 93 | } |
94 | 94 | ||
95 | struct thread_map *thread_map__new_by_uid(uid_t uid) | 95 | static struct thread_map *__thread_map__new_all_cpus(uid_t uid) |
96 | { | 96 | { |
97 | DIR *proc; | 97 | DIR *proc; |
98 | int max_threads = 32, items, i; | 98 | int max_threads = 32, items, i; |
@@ -113,7 +113,6 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) | |||
113 | while ((dirent = readdir(proc)) != NULL) { | 113 | while ((dirent = readdir(proc)) != NULL) { |
114 | char *end; | 114 | char *end; |
115 | bool grow = false; | 115 | bool grow = false; |
116 | struct stat st; | ||
117 | pid_t pid = strtol(dirent->d_name, &end, 10); | 116 | pid_t pid = strtol(dirent->d_name, &end, 10); |
118 | 117 | ||
119 | if (*end) /* only interested in proper numerical dirents */ | 118 | if (*end) /* only interested in proper numerical dirents */ |
@@ -121,11 +120,12 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) | |||
121 | 120 | ||
122 | snprintf(path, sizeof(path), "/proc/%s", dirent->d_name); | 121 | snprintf(path, sizeof(path), "/proc/%s", dirent->d_name); |
123 | 122 | ||
124 | if (stat(path, &st) != 0) | 123 | if (uid != UINT_MAX) { |
125 | continue; | 124 | struct stat st; |
126 | 125 | ||
127 | if (st.st_uid != uid) | 126 | if (stat(path, &st) != 0 || st.st_uid != uid) |
128 | continue; | 127 | continue; |
128 | } | ||
129 | 129 | ||
130 | snprintf(path, sizeof(path), "/proc/%d/task", pid); | 130 | snprintf(path, sizeof(path), "/proc/%d/task", pid); |
131 | items = scandir(path, &namelist, filter, NULL); | 131 | items = scandir(path, &namelist, filter, NULL); |
@@ -178,6 +178,16 @@ out_free_closedir: | |||
178 | goto out_closedir; | 178 | goto out_closedir; |
179 | } | 179 | } |
180 | 180 | ||
181 | struct thread_map *thread_map__new_all_cpus(void) | ||
182 | { | ||
183 | return __thread_map__new_all_cpus(UINT_MAX); | ||
184 | } | ||
185 | |||
186 | struct thread_map *thread_map__new_by_uid(uid_t uid) | ||
187 | { | ||
188 | return __thread_map__new_all_cpus(uid); | ||
189 | } | ||
190 | |||
181 | struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) | 191 | struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) |
182 | { | 192 | { |
183 | if (pid != -1) | 193 | if (pid != -1) |
@@ -313,7 +323,7 @@ out_free_threads: | |||
313 | } | 323 | } |
314 | 324 | ||
315 | struct thread_map *thread_map__new_str(const char *pid, const char *tid, | 325 | struct thread_map *thread_map__new_str(const char *pid, const char *tid, |
316 | uid_t uid) | 326 | uid_t uid, bool per_thread) |
317 | { | 327 | { |
318 | if (pid) | 328 | if (pid) |
319 | return thread_map__new_by_pid_str(pid); | 329 | return thread_map__new_by_pid_str(pid); |
@@ -321,6 +331,9 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid, | |||
321 | if (!tid && uid != UINT_MAX) | 331 | if (!tid && uid != UINT_MAX) |
322 | return thread_map__new_by_uid(uid); | 332 | return thread_map__new_by_uid(uid); |
323 | 333 | ||
334 | if (per_thread) | ||
335 | return thread_map__new_all_cpus(); | ||
336 | |||
324 | return thread_map__new_by_tid_str(tid); | 337 | return thread_map__new_by_tid_str(tid); |
325 | } | 338 | } |
326 | 339 | ||