diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-30 14:15:14 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-30 14:15:14 -0500 |
commit | d8b91dde38f4c43bd0bbbf17a90f735b16aaff2c (patch) | |
tree | bd72dabf6e4b23e060fce429c87e60504f69de54 /arch/x86/tools/insn_decoder_test.c | |
parent | 5e7481a25e90b661d1dbbba18be3fd3dfe12ec6f (diff) | |
parent | e4c1091cb495d9cbec8956d642644a71a1689958 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"Kernel side changes:
- Clean up the x86 instruction decoder (Masami Hiramatsu)
- Add new uprobes optimization for PUSH instructions on x86 (Yonghong
Song)
- Add MSR_IA32_THERM_STATUS to the MSR events (Stephane Eranian)
- Fix misc bugs, update documentation, plus various cleanups (Jiri
Olsa)
There's a large number of tooling side improvements:
- Intel-PT/BTS improvements (Adrian Hunter)
- Numerous 'perf trace' improvements (Arnaldo Carvalho de Melo)
- Introduce an errno code to string facility (Hendrik Brueckner)
- Various build system improvements (Jiri Olsa)
- Add support for CoreSight trace decoding by making the perf tools
use the external openCSD (Mathieu Poirier, Tor Jeremiassen)
- Add ARM Statistical Profiling Extensions (SPE) support (Kim
Phillips)
- libtraceevent updates (Steven Rostedt)
- Intel vendor event JSON updates (Andi Kleen)
- Introduce 'perf report --mmaps' and 'perf report --tasks' to show
info present in 'perf.data' (Jiri Olsa, Arnaldo Carvalho de Melo)
- Add infrastructure to record first and last sample time to the
perf.data file header, so that when processing all samples in a
'perf record' session, such as when doing build-id processing, or
when specifically requesting that that info be recorded, use that
in 'perf report --time', that also got support for percent slices
in addition to absolute ones.
I.e. now it is possible to ask for the samples in the 10%-20% time
slice of a perf.data file (Jin Yao)
- Allow system wide 'perf stat --per-thread', sorting the result (Jin
Yao)
E.g.:
[root@jouet ~]# perf stat --per-thread --metrics IPC
^C
Performance counter stats for 'system wide':
make-22229 23,012,094,032 inst_retired.any # 0.8 IPC
cc1-22419 692,027,497 inst_retired.any # 0.8 IPC
gcc-22418 328,231,855 inst_retired.any # 0.9 IPC
cc1-22509 220,853,647 inst_retired.any # 0.8 IPC
gcc-22486 199,874,810 inst_retired.any # 1.0 IPC
as-22466 177,896,365 inst_retired.any # 0.9 IPC
cc1-22465 150,732,374 inst_retired.any # 0.8 IPC
gcc-22508 112,555,593 inst_retired.any # 0.9 IPC
cc1-22487 108,964,079 inst_retired.any # 0.7 IPC
qemu-system-x86-2697 21,330,550 inst_retired.any # 0.3 IPC
systemd-journal-551 20,642,951 inst_retired.any # 0.4 IPC
docker-containe-17651 9,552,892 inst_retired.any # 0.5 IPC
dockerd-current-9809 7,528,586 inst_retired.any # 0.5 IPC
make-22153 12,504,194,380 inst_retired.any # 0.8 IPC
python2-22429 12,081,290,954 inst_retired.any # 0.8 IPC
<SNIP>
python2-22429 15,026,328,103 cpu_clk_unhalted.thread
cc1-22419 826,660,193 cpu_clk_unhalted.thread
gcc-22418 365,321,295 cpu_clk_unhalted.thread
cc1-22509 279,169,362 cpu_clk_unhalted.thread
gcc-22486 210,156,950 cpu_clk_unhalted.thread
<SNIP>
5.638075538 seconds time elapsed
[root@jouet ~]#
- Improve shell auto-completion of perf events (Jin Yao)
- 'perf probe' improvements (Masami Hiramatsu)
- Improve PMU infrastructure to support amp64's ThunderX2
implementation defined core events (Ganapatrao Kulkarni)
- Various annotation related improvements and fixes (Thomas Richter)
- Clarify usage of 'overwrite' and 'backward' in the evlist/mmap
code, removing the 'overwrite' parameter from several functions as
it was always used it as 'false' (Wang Nan)
- Fix/improve 'perf record' reverse recording support (Wang Nan)
- Improve command line options documentation (Sihyeon Jang)
- Optimize sample parsing for ordering events, where we don't need to
parse all the PERF_SAMPLE_ bits, just the ones leading to the
timestamp needed to reorder events (Jiri Olsa)
- Generalize the annotation code to support other source information
besides objdump/DWARF obtained ones, starting with python scripts,
that will is slated to be merged soon (Jiri Olsa)
- ... and a lot more that I failed to list, see the shortlog and
changelog for details"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (262 commits)
perf trace beauty flock: Move to separate object file
perf evlist: Remove fcntl.h from evlist.h
perf trace beauty futex: Beautify FUTEX_BITSET_MATCH_ANY
perf trace: Do not print from time delta for interrupted syscall lines
perf trace: Add --print-sample
perf bpf: Remove misplaced __maybe_unused attribute
MAINTAINERS: Adding entry for CoreSight trace decoding
perf tools: Add mechanic to synthesise CoreSight trace packets
perf tools: Add full support for CoreSight trace decoding
pert tools: Add queue management functionality
perf tools: Add functionality to communicate with the openCSD decoder
perf tools: Add support for decoding CoreSight trace data
perf tools: Add decoder mechanic to support dumping trace data
perf tools: Add processing of coresight metadata
perf tools: Add initial entry point for decoder CoreSight traces
perf tools: Integrating the CoreSight decoding library
perf vendor events intel: Update IvyTown files to V20
perf vendor events intel: Update IvyBridge files to V20
perf vendor events intel: Update BroadwellDE events to V7
perf vendor events intel: Update SkylakeX events to V1.06
...
Diffstat (limited to 'arch/x86/tools/insn_decoder_test.c')
-rw-r--r-- | arch/x86/tools/insn_decoder_test.c | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c new file mode 100644 index 000000000000..a3b4fd954931 --- /dev/null +++ b/arch/x86/tools/insn_decoder_test.c | |||
@@ -0,0 +1,180 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * Copyright (C) IBM Corporation, 2009 | ||
13 | */ | ||
14 | |||
15 | #include <stdlib.h> | ||
16 | #include <stdio.h> | ||
17 | #include <string.h> | ||
18 | #include <assert.h> | ||
19 | #include <unistd.h> | ||
20 | #include <stdarg.h> | ||
21 | |||
22 | #define unlikely(cond) (cond) | ||
23 | |||
24 | #include <asm/insn.h> | ||
25 | #include <inat.c> | ||
26 | #include <insn.c> | ||
27 | |||
28 | /* | ||
29 | * Test of instruction analysis in general and insn_get_length() in | ||
30 | * particular. See if insn_get_length() and the disassembler agree | ||
31 | * on the length of each instruction in an elf disassembly. | ||
32 | * | ||
33 | * Usage: objdump -d a.out | awk -f objdump_reformat.awk | ./insn_decoder_test | ||
34 | */ | ||
35 | |||
36 | const char *prog; | ||
37 | static int verbose; | ||
38 | static int x86_64; | ||
39 | |||
40 | static void usage(void) | ||
41 | { | ||
42 | fprintf(stderr, "Usage: objdump -d a.out | awk -f objdump_reformat.awk" | ||
43 | " | %s [-y|-n] [-v]\n", prog); | ||
44 | fprintf(stderr, "\t-y 64bit mode\n"); | ||
45 | fprintf(stderr, "\t-n 32bit mode\n"); | ||
46 | fprintf(stderr, "\t-v verbose mode\n"); | ||
47 | exit(1); | ||
48 | } | ||
49 | |||
50 | static void malformed_line(const char *line, int line_nr) | ||
51 | { | ||
52 | fprintf(stderr, "%s: error: malformed line %d:\n%s", | ||
53 | prog, line_nr, line); | ||
54 | exit(3); | ||
55 | } | ||
56 | |||
57 | static void pr_warn(const char *fmt, ...) | ||
58 | { | ||
59 | va_list ap; | ||
60 | |||
61 | fprintf(stderr, "%s: warning: ", prog); | ||
62 | va_start(ap, fmt); | ||
63 | vfprintf(stderr, fmt, ap); | ||
64 | va_end(ap); | ||
65 | } | ||
66 | |||
67 | static void dump_field(FILE *fp, const char *name, const char *indent, | ||
68 | struct insn_field *field) | ||
69 | { | ||
70 | fprintf(fp, "%s.%s = {\n", indent, name); | ||
71 | fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", | ||
72 | indent, field->value, field->bytes[0], field->bytes[1], | ||
73 | field->bytes[2], field->bytes[3]); | ||
74 | fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, | ||
75 | field->got, field->nbytes); | ||
76 | } | ||
77 | |||
78 | static void dump_insn(FILE *fp, struct insn *insn) | ||
79 | { | ||
80 | fprintf(fp, "Instruction = {\n"); | ||
81 | dump_field(fp, "prefixes", "\t", &insn->prefixes); | ||
82 | dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); | ||
83 | dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); | ||
84 | dump_field(fp, "opcode", "\t", &insn->opcode); | ||
85 | dump_field(fp, "modrm", "\t", &insn->modrm); | ||
86 | dump_field(fp, "sib", "\t", &insn->sib); | ||
87 | dump_field(fp, "displacement", "\t", &insn->displacement); | ||
88 | dump_field(fp, "immediate1", "\t", &insn->immediate1); | ||
89 | dump_field(fp, "immediate2", "\t", &insn->immediate2); | ||
90 | fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", | ||
91 | insn->attr, insn->opnd_bytes, insn->addr_bytes); | ||
92 | fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", | ||
93 | insn->length, insn->x86_64, insn->kaddr); | ||
94 | } | ||
95 | |||
96 | static void parse_args(int argc, char **argv) | ||
97 | { | ||
98 | int c; | ||
99 | prog = argv[0]; | ||
100 | while ((c = getopt(argc, argv, "ynv")) != -1) { | ||
101 | switch (c) { | ||
102 | case 'y': | ||
103 | x86_64 = 1; | ||
104 | break; | ||
105 | case 'n': | ||
106 | x86_64 = 0; | ||
107 | break; | ||
108 | case 'v': | ||
109 | verbose = 1; | ||
110 | break; | ||
111 | default: | ||
112 | usage(); | ||
113 | } | ||
114 | } | ||
115 | } | ||
116 | |||
117 | #define BUFSIZE 256 | ||
118 | |||
119 | int main(int argc, char **argv) | ||
120 | { | ||
121 | char line[BUFSIZE], sym[BUFSIZE] = "<unknown>"; | ||
122 | unsigned char insn_buf[16]; | ||
123 | struct insn insn; | ||
124 | int insns = 0; | ||
125 | int warnings = 0; | ||
126 | |||
127 | parse_args(argc, argv); | ||
128 | |||
129 | while (fgets(line, BUFSIZE, stdin)) { | ||
130 | char copy[BUFSIZE], *s, *tab1, *tab2; | ||
131 | int nb = 0; | ||
132 | unsigned int b; | ||
133 | |||
134 | if (line[0] == '<') { | ||
135 | /* Symbol line */ | ||
136 | strcpy(sym, line); | ||
137 | continue; | ||
138 | } | ||
139 | |||
140 | insns++; | ||
141 | memset(insn_buf, 0, 16); | ||
142 | strcpy(copy, line); | ||
143 | tab1 = strchr(copy, '\t'); | ||
144 | if (!tab1) | ||
145 | malformed_line(line, insns); | ||
146 | s = tab1 + 1; | ||
147 | s += strspn(s, " "); | ||
148 | tab2 = strchr(s, '\t'); | ||
149 | if (!tab2) | ||
150 | malformed_line(line, insns); | ||
151 | *tab2 = '\0'; /* Characters beyond tab2 aren't examined */ | ||
152 | while (s < tab2) { | ||
153 | if (sscanf(s, "%x", &b) == 1) { | ||
154 | insn_buf[nb++] = (unsigned char) b; | ||
155 | s += 3; | ||
156 | } else | ||
157 | break; | ||
158 | } | ||
159 | /* Decode an instruction */ | ||
160 | insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64); | ||
161 | insn_get_length(&insn); | ||
162 | if (insn.length != nb) { | ||
163 | warnings++; | ||
164 | pr_warn("Found an x86 instruction decoder bug, " | ||
165 | "please report this.\n", sym); | ||
166 | pr_warn("%s", line); | ||
167 | pr_warn("objdump says %d bytes, but insn_get_length() " | ||
168 | "says %d\n", nb, insn.length); | ||
169 | if (verbose) | ||
170 | dump_insn(stderr, &insn); | ||
171 | } | ||
172 | } | ||
173 | if (warnings) | ||
174 | pr_warn("Decoded and checked %d instructions with %d " | ||
175 | "failures\n", insns, warnings); | ||
176 | else | ||
177 | fprintf(stdout, "%s: success: Decoded and checked %d" | ||
178 | " instructions\n", prog, insns); | ||
179 | return 0; | ||
180 | } | ||