aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib/insn.c
diff options
context:
space:
mode:
authorMasami Hiramatsu <mhiramat@redhat.com>2009-08-13 16:34:13 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2009-08-26 18:35:56 -0400
commiteb13296cfaf6c699566473669a96a38a90562384 (patch)
tree466c44bf0a747effaf85ec13dbf75ae857449bfd /arch/x86/lib/insn.c
parent35dce1a99d010f3d738af4ce1b9b77302fdfe69c (diff)
x86: Instruction decoder API
Add x86 instruction decoder to arch-specific libraries. This decoder can decode x86 instructions used in kernel into prefix, opcode, modrm, sib, displacement and immediates. This can also show the length of instructions. This version introduces instruction attributes for decoding instructions. The instruction attribute tables are generated from the opcode map file (x86-opcode-map.txt) by the generator script(gen-insn-attr-x86.awk). Currently, the opcode maps are based on opcode maps in Intel(R) 64 and IA-32 Architectures Software Developers Manual Vol.2: Appendix.A, and consist of below two types of opcode tables. 1-byte/2-bytes/3-bytes opcodes, which has 256 elements, are written as below; Table: table-name Referrer: escaped-name opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] (or) opcode: escape # escaped-name EndTable Group opcodes, which has 8 elements, are written as below; GrpTable: GrpXXX reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] EndTable These opcode maps include a few SSE and FP opcodes (for setup), because those opcodes are used in the kernel. Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com> Signed-off-by: Jim Keniston <jkenisto@us.ibm.com> Acked-by: H. Peter Anvin <hpa@zytor.com> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: Avi Kivity <avi@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Frank Ch. Eigler <fche@redhat.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Jason Baron <jbaron@redhat.com> Cc: K.Prasad <prasad@linux.vnet.ibm.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Przemysław Pawełczyk <przemyslaw@pawelczyk.it> Cc: Roland McGrath <roland@redhat.com> Cc: Sam Ravnborg <sam@ravnborg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Vegard Nossum <vegard.nossum@gmail.com> LKML-Reference: <20090813203413.31965.49709.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Diffstat (limited to 'arch/x86/lib/insn.c')
-rw-r--r--arch/x86/lib/insn.c464
1 files changed, 464 insertions, 0 deletions
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
new file mode 100644
index 000000000000..dfd56a30053f
--- /dev/null
+++ b/arch/x86/lib/insn.c
@@ -0,0 +1,464 @@
1/*
2 * x86 instruction analysis
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2002, 2004, 2009
19 */
20
21#include <linux/string.h>
22#include <asm/inat.h>
23#include <asm/insn.h>
24
25#define get_next(t, insn) \
26 ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
27
28#define peek_next(t, insn) \
29 ({t r; r = *(t*)insn->next_byte; r; })
30
31/**
32 * insn_init() - initialize struct insn
33 * @insn: &struct insn to be initialized
34 * @kaddr: address (in kernel memory) of instruction (or copy thereof)
35 * @x86_64: !0 for 64-bit kernel or 64-bit app
36 */
37void insn_init(struct insn *insn, const void *kaddr, int x86_64)
38{
39 memset(insn, 0, sizeof(*insn));
40 insn->kaddr = kaddr;
41 insn->next_byte = kaddr;
42 insn->x86_64 = x86_64 ? 1 : 0;
43 insn->opnd_bytes = 4;
44 if (x86_64)
45 insn->addr_bytes = 8;
46 else
47 insn->addr_bytes = 4;
48}
49
50/**
51 * insn_get_prefixes - scan x86 instruction prefix bytes
52 * @insn: &struct insn containing instruction
53 *
54 * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
55 * to point to the (first) opcode. No effect if @insn->prefixes.got
56 * is already set.
57 */
58void insn_get_prefixes(struct insn *insn)
59{
60 struct insn_field *prefixes = &insn->prefixes;
61 insn_attr_t attr;
62 insn_byte_t b, lb;
63 int i, nb;
64
65 if (prefixes->got)
66 return;
67
68 nb = 0;
69 lb = 0;
70 b = peek_next(insn_byte_t, insn);
71 attr = inat_get_opcode_attribute(b);
72 while (inat_is_prefix(attr)) {
73 /* Skip if same prefix */
74 for (i = 0; i < nb; i++)
75 if (prefixes->bytes[i] == b)
76 goto found;
77 if (nb == 4)
78 /* Invalid instruction */
79 break;
80 prefixes->bytes[nb++] = b;
81 if (inat_is_address_size_prefix(attr)) {
82 /* address size switches 2/4 or 4/8 */
83 if (insn->x86_64)
84 insn->addr_bytes ^= 12;
85 else
86 insn->addr_bytes ^= 6;
87 } else if (inat_is_operand_size_prefix(attr)) {
88 /* oprand size switches 2/4 */
89 insn->opnd_bytes ^= 6;
90 }
91found:
92 prefixes->nbytes++;
93 insn->next_byte++;
94 lb = b;
95 b = peek_next(insn_byte_t, insn);
96 attr = inat_get_opcode_attribute(b);
97 }
98 /* Set the last prefix */
99 if (lb && lb != insn->prefixes.bytes[3]) {
100 if (unlikely(insn->prefixes.bytes[3])) {
101 /* Swap the last prefix */
102 b = insn->prefixes.bytes[3];
103 for (i = 0; i < nb; i++)
104 if (prefixes->bytes[i] == lb)
105 prefixes->bytes[i] = b;
106 }
107 insn->prefixes.bytes[3] = lb;
108 }
109
110 if (insn->x86_64) {
111 b = peek_next(insn_byte_t, insn);
112 attr = inat_get_opcode_attribute(b);
113 if (inat_is_rex_prefix(attr)) {
114 insn->rex_prefix.value = b;
115 insn->rex_prefix.nbytes = 1;
116 insn->next_byte++;
117 if (X86_REX_W(b))
118 /* REX.W overrides opnd_size */
119 insn->opnd_bytes = 8;
120 }
121 }
122 insn->rex_prefix.got = 1;
123 prefixes->got = 1;
124 return;
125}
126
127/**
128 * insn_get_opcode - collect opcode(s)
129 * @insn: &struct insn containing instruction
130 *
131 * Populates @insn->opcode, updates @insn->next_byte to point past the
132 * opcode byte(s), and set @insn->attr (except for groups).
133 * If necessary, first collects any preceding (prefix) bytes.
134 * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
135 * is already 1.
136 */
137void insn_get_opcode(struct insn *insn)
138{
139 struct insn_field *opcode = &insn->opcode;
140 insn_byte_t op, pfx;
141 if (opcode->got)
142 return;
143 if (!insn->prefixes.got)
144 insn_get_prefixes(insn);
145
146 /* Get first opcode */
147 op = get_next(insn_byte_t, insn);
148 opcode->bytes[0] = op;
149 opcode->nbytes = 1;
150 insn->attr = inat_get_opcode_attribute(op);
151 while (inat_is_escape(insn->attr)) {
152 /* Get escaped opcode */
153 op = get_next(insn_byte_t, insn);
154 opcode->bytes[opcode->nbytes++] = op;
155 pfx = insn_last_prefix(insn);
156 insn->attr = inat_get_escape_attribute(op, pfx, insn->attr);
157 }
158 opcode->got = 1;
159}
160
161/**
162 * insn_get_modrm - collect ModRM byte, if any
163 * @insn: &struct insn containing instruction
164 *
165 * Populates @insn->modrm and updates @insn->next_byte to point past the
166 * ModRM byte, if any. If necessary, first collects the preceding bytes
167 * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
168 */
169void insn_get_modrm(struct insn *insn)
170{
171 struct insn_field *modrm = &insn->modrm;
172 insn_byte_t pfx, mod;
173 if (modrm->got)
174 return;
175 if (!insn->opcode.got)
176 insn_get_opcode(insn);
177
178 if (inat_has_modrm(insn->attr)) {
179 mod = get_next(insn_byte_t, insn);
180 modrm->value = mod;
181 modrm->nbytes = 1;
182 if (inat_is_group(insn->attr)) {
183 pfx = insn_last_prefix(insn);
184 insn->attr = inat_get_group_attribute(mod, pfx,
185 insn->attr);
186 }
187 }
188
189 if (insn->x86_64 && inat_is_force64(insn->attr))
190 insn->opnd_bytes = 8;
191 modrm->got = 1;
192}
193
194
195/**
196 * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
197 * @insn: &struct insn containing instruction
198 *
199 * If necessary, first collects the instruction up to and including the
200 * ModRM byte. No effect if @insn->x86_64 is 0.
201 */
202int insn_rip_relative(struct insn *insn)
203{
204 struct insn_field *modrm = &insn->modrm;
205
206 if (!insn->x86_64)
207 return 0;
208 if (!modrm->got)
209 insn_get_modrm(insn);
210 /*
211 * For rip-relative instructions, the mod field (top 2 bits)
212 * is zero and the r/m field (bottom 3 bits) is 0x5.
213 */
214 return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
215}
216
217/**
218 * insn_get_sib() - Get the SIB byte of instruction
219 * @insn: &struct insn containing instruction
220 *
221 * If necessary, first collects the instruction up to and including the
222 * ModRM byte.
223 */
224void insn_get_sib(struct insn *insn)
225{
226 insn_byte_t modrm;
227
228 if (insn->sib.got)
229 return;
230 if (!insn->modrm.got)
231 insn_get_modrm(insn);
232 if (insn->modrm.nbytes) {
233 modrm = (insn_byte_t)insn->modrm.value;
234 if (insn->addr_bytes != 2 &&
235 X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
236 insn->sib.value = get_next(insn_byte_t, insn);
237 insn->sib.nbytes = 1;
238 }
239 }
240 insn->sib.got = 1;
241}
242
243
244/**
245 * insn_get_displacement() - Get the displacement of instruction
246 * @insn: &struct insn containing instruction
247 *
248 * If necessary, first collects the instruction up to and including the
249 * SIB byte.
250 * Displacement value is sign-expanded.
251 */
252void insn_get_displacement(struct insn *insn)
253{
254 insn_byte_t mod, rm, base;
255
256 if (insn->displacement.got)
257 return;
258 if (!insn->sib.got)
259 insn_get_sib(insn);
260 if (insn->modrm.nbytes) {
261 /*
262 * Interpreting the modrm byte:
263 * mod = 00 - no displacement fields (exceptions below)
264 * mod = 01 - 1-byte displacement field
265 * mod = 10 - displacement field is 4 bytes, or 2 bytes if
266 * address size = 2 (0x67 prefix in 32-bit mode)
267 * mod = 11 - no memory operand
268 *
269 * If address size = 2...
270 * mod = 00, r/m = 110 - displacement field is 2 bytes
271 *
272 * If address size != 2...
273 * mod != 11, r/m = 100 - SIB byte exists
274 * mod = 00, SIB base = 101 - displacement field is 4 bytes
275 * mod = 00, r/m = 101 - rip-relative addressing, displacement
276 * field is 4 bytes
277 */
278 mod = X86_MODRM_MOD(insn->modrm.value);
279 rm = X86_MODRM_RM(insn->modrm.value);
280 base = X86_SIB_BASE(insn->sib.value);
281 if (mod == 3)
282 goto out;
283 if (mod == 1) {
284 insn->displacement.value = get_next(char, insn);
285 insn->displacement.nbytes = 1;
286 } else if (insn->addr_bytes == 2) {
287 if ((mod == 0 && rm == 6) || mod == 2) {
288 insn->displacement.value =
289 get_next(short, insn);
290 insn->displacement.nbytes = 2;
291 }
292 } else {
293 if ((mod == 0 && rm == 5) || mod == 2 ||
294 (mod == 0 && base == 5)) {
295 insn->displacement.value = get_next(int, insn);
296 insn->displacement.nbytes = 4;
297 }
298 }
299 }
300out:
301 insn->displacement.got = 1;
302}
303
304/* Decode moffset16/32/64 */
305static void __get_moffset(struct insn *insn)
306{
307 switch (insn->addr_bytes) {
308 case 2:
309 insn->moffset1.value = get_next(short, insn);
310 insn->moffset1.nbytes = 2;
311 break;
312 case 4:
313 insn->moffset1.value = get_next(int, insn);
314 insn->moffset1.nbytes = 4;
315 break;
316 case 8:
317 insn->moffset1.value = get_next(int, insn);
318 insn->moffset1.nbytes = 4;
319 insn->moffset2.value = get_next(int, insn);
320 insn->moffset2.nbytes = 4;
321 break;
322 }
323 insn->moffset1.got = insn->moffset2.got = 1;
324}
325
326/* Decode imm v32(Iz) */
327static void __get_immv32(struct insn *insn)
328{
329 switch (insn->opnd_bytes) {
330 case 2:
331 insn->immediate.value = get_next(short, insn);
332 insn->immediate.nbytes = 2;
333 break;
334 case 4:
335 case 8:
336 insn->immediate.value = get_next(int, insn);
337 insn->immediate.nbytes = 4;
338 break;
339 }
340}
341
342/* Decode imm v64(Iv/Ov) */
343static void __get_immv(struct insn *insn)
344{
345 switch (insn->opnd_bytes) {
346 case 2:
347 insn->immediate1.value = get_next(short, insn);
348 insn->immediate1.nbytes = 2;
349 break;
350 case 4:
351 insn->immediate1.value = get_next(int, insn);
352 insn->immediate1.nbytes = 4;
353 break;
354 case 8:
355 insn->immediate1.value = get_next(int, insn);
356 insn->immediate1.nbytes = 4;
357 insn->immediate2.value = get_next(int, insn);
358 insn->immediate2.nbytes = 4;
359 break;
360 }
361 insn->immediate1.got = insn->immediate2.got = 1;
362}
363
364/* Decode ptr16:16/32(Ap) */
365static void __get_immptr(struct insn *insn)
366{
367 switch (insn->opnd_bytes) {
368 case 2:
369 insn->immediate1.value = get_next(short, insn);
370 insn->immediate1.nbytes = 2;
371 break;
372 case 4:
373 insn->immediate1.value = get_next(int, insn);
374 insn->immediate1.nbytes = 4;
375 break;
376 case 8:
377 /* ptr16:64 is not exist (no segment) */
378 return;
379 }
380 insn->immediate2.value = get_next(unsigned short, insn);
381 insn->immediate2.nbytes = 2;
382 insn->immediate1.got = insn->immediate2.got = 1;
383}
384
385/**
386 * insn_get_immediate() - Get the immediates of instruction
387 * @insn: &struct insn containing instruction
388 *
389 * If necessary, first collects the instruction up to and including the
390 * displacement bytes.
391 * Basically, most of immediates are sign-expanded. Unsigned-value can be
392 * get by bit masking with ((1 << (nbytes * 8)) - 1)
393 */
394void insn_get_immediate(struct insn *insn)
395{
396 if (insn->immediate.got)
397 return;
398 if (!insn->displacement.got)
399 insn_get_displacement(insn);
400
401 if (inat_has_moffset(insn->attr)) {
402 __get_moffset(insn);
403 goto done;
404 }
405
406 if (!inat_has_immediate(insn->attr))
407 /* no immediates */
408 goto done;
409
410 switch (inat_immediate_size(insn->attr)) {
411 case INAT_IMM_BYTE:
412 insn->immediate.value = get_next(char, insn);
413 insn->immediate.nbytes = 1;
414 break;
415 case INAT_IMM_WORD:
416 insn->immediate.value = get_next(short, insn);
417 insn->immediate.nbytes = 2;
418 break;
419 case INAT_IMM_DWORD:
420 insn->immediate.value = get_next(int, insn);
421 insn->immediate.nbytes = 4;
422 break;
423 case INAT_IMM_QWORD:
424 insn->immediate1.value = get_next(int, insn);
425 insn->immediate1.nbytes = 4;
426 insn->immediate2.value = get_next(int, insn);
427 insn->immediate2.nbytes = 4;
428 break;
429 case INAT_IMM_PTR:
430 __get_immptr(insn);
431 break;
432 case INAT_IMM_VWORD32:
433 __get_immv32(insn);
434 break;
435 case INAT_IMM_VWORD:
436 __get_immv(insn);
437 break;
438 default:
439 break;
440 }
441 if (inat_has_second_immediate(insn->attr)) {
442 insn->immediate2.value = get_next(char, insn);
443 insn->immediate2.nbytes = 1;
444 }
445done:
446 insn->immediate.got = 1;
447}
448
449/**
450 * insn_get_length() - Get the length of instruction
451 * @insn: &struct insn containing instruction
452 *
453 * If necessary, first collects the instruction up to and including the
454 * immediates bytes.
455 */
456void insn_get_length(struct insn *insn)
457{
458 if (insn->length)
459 return;
460 if (!insn->immediate.got)
461 insn_get_immediate(insn);
462 insn->length = (unsigned char)((unsigned long)insn->next_byte
463 - (unsigned long)insn->kaddr);
464}