diff options
Diffstat (limited to 'arch/x86/lib/insn.c')
-rw-r--r-- | arch/x86/lib/insn.c | 516 |
1 files changed, 516 insertions, 0 deletions
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c new file mode 100644 index 000000000000..9f33b984d0ef --- /dev/null +++ b/arch/x86/lib/insn.c | |||
@@ -0,0 +1,516 @@ | |||
1 | /* | ||
2 | * x86 instruction analysis | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2002, 2004, 2009 | ||
19 | */ | ||
20 | |||
21 | #include <linux/string.h> | ||
22 | #include <asm/inat.h> | ||
23 | #include <asm/insn.h> | ||
24 | |||
25 | #define get_next(t, insn) \ | ||
26 | ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) | ||
27 | |||
28 | #define peek_next(t, insn) \ | ||
29 | ({t r; r = *(t*)insn->next_byte; r; }) | ||
30 | |||
31 | #define peek_nbyte_next(t, insn, n) \ | ||
32 | ({t r; r = *(t*)((insn)->next_byte + n); r; }) | ||
33 | |||
34 | /** | ||
35 | * insn_init() - initialize struct insn | ||
36 | * @insn: &struct insn to be initialized | ||
37 | * @kaddr: address (in kernel memory) of instruction (or copy thereof) | ||
38 | * @x86_64: !0 for 64-bit kernel or 64-bit app | ||
39 | */ | ||
40 | void insn_init(struct insn *insn, const void *kaddr, int x86_64) | ||
41 | { | ||
42 | memset(insn, 0, sizeof(*insn)); | ||
43 | insn->kaddr = kaddr; | ||
44 | insn->next_byte = kaddr; | ||
45 | insn->x86_64 = x86_64 ? 1 : 0; | ||
46 | insn->opnd_bytes = 4; | ||
47 | if (x86_64) | ||
48 | insn->addr_bytes = 8; | ||
49 | else | ||
50 | insn->addr_bytes = 4; | ||
51 | } | ||
52 | |||
53 | /** | ||
54 | * insn_get_prefixes - scan x86 instruction prefix bytes | ||
55 | * @insn: &struct insn containing instruction | ||
56 | * | ||
57 | * Populates the @insn->prefixes bitmap, and updates @insn->next_byte | ||
58 | * to point to the (first) opcode. No effect if @insn->prefixes.got | ||
59 | * is already set. | ||
60 | */ | ||
61 | void insn_get_prefixes(struct insn *insn) | ||
62 | { | ||
63 | struct insn_field *prefixes = &insn->prefixes; | ||
64 | insn_attr_t attr; | ||
65 | insn_byte_t b, lb; | ||
66 | int i, nb; | ||
67 | |||
68 | if (prefixes->got) | ||
69 | return; | ||
70 | |||
71 | nb = 0; | ||
72 | lb = 0; | ||
73 | b = peek_next(insn_byte_t, insn); | ||
74 | attr = inat_get_opcode_attribute(b); | ||
75 | while (inat_is_legacy_prefix(attr)) { | ||
76 | /* Skip if same prefix */ | ||
77 | for (i = 0; i < nb; i++) | ||
78 | if (prefixes->bytes[i] == b) | ||
79 | goto found; | ||
80 | if (nb == 4) | ||
81 | /* Invalid instruction */ | ||
82 | break; | ||
83 | prefixes->bytes[nb++] = b; | ||
84 | if (inat_is_address_size_prefix(attr)) { | ||
85 | /* address size switches 2/4 or 4/8 */ | ||
86 | if (insn->x86_64) | ||
87 | insn->addr_bytes ^= 12; | ||
88 | else | ||
89 | insn->addr_bytes ^= 6; | ||
90 | } else if (inat_is_operand_size_prefix(attr)) { | ||
91 | /* oprand size switches 2/4 */ | ||
92 | insn->opnd_bytes ^= 6; | ||
93 | } | ||
94 | found: | ||
95 | prefixes->nbytes++; | ||
96 | insn->next_byte++; | ||
97 | lb = b; | ||
98 | b = peek_next(insn_byte_t, insn); | ||
99 | attr = inat_get_opcode_attribute(b); | ||
100 | } | ||
101 | /* Set the last prefix */ | ||
102 | if (lb && lb != insn->prefixes.bytes[3]) { | ||
103 | if (unlikely(insn->prefixes.bytes[3])) { | ||
104 | /* Swap the last prefix */ | ||
105 | b = insn->prefixes.bytes[3]; | ||
106 | for (i = 0; i < nb; i++) | ||
107 | if (prefixes->bytes[i] == lb) | ||
108 | prefixes->bytes[i] = b; | ||
109 | } | ||
110 | insn->prefixes.bytes[3] = lb; | ||
111 | } | ||
112 | |||
113 | /* Decode REX prefix */ | ||
114 | if (insn->x86_64) { | ||
115 | b = peek_next(insn_byte_t, insn); | ||
116 | attr = inat_get_opcode_attribute(b); | ||
117 | if (inat_is_rex_prefix(attr)) { | ||
118 | insn->rex_prefix.value = b; | ||
119 | insn->rex_prefix.nbytes = 1; | ||
120 | insn->next_byte++; | ||
121 | if (X86_REX_W(b)) | ||
122 | /* REX.W overrides opnd_size */ | ||
123 | insn->opnd_bytes = 8; | ||
124 | } | ||
125 | } | ||
126 | insn->rex_prefix.got = 1; | ||
127 | |||
128 | /* Decode VEX prefix */ | ||
129 | b = peek_next(insn_byte_t, insn); | ||
130 | attr = inat_get_opcode_attribute(b); | ||
131 | if (inat_is_vex_prefix(attr)) { | ||
132 | insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); | ||
133 | if (!insn->x86_64) { | ||
134 | /* | ||
135 | * In 32-bits mode, if the [7:6] bits (mod bits of | ||
136 | * ModRM) on the second byte are not 11b, it is | ||
137 | * LDS or LES. | ||
138 | */ | ||
139 | if (X86_MODRM_MOD(b2) != 3) | ||
140 | goto vex_end; | ||
141 | } | ||
142 | insn->vex_prefix.bytes[0] = b; | ||
143 | insn->vex_prefix.bytes[1] = b2; | ||
144 | if (inat_is_vex3_prefix(attr)) { | ||
145 | b2 = peek_nbyte_next(insn_byte_t, insn, 2); | ||
146 | insn->vex_prefix.bytes[2] = b2; | ||
147 | insn->vex_prefix.nbytes = 3; | ||
148 | insn->next_byte += 3; | ||
149 | if (insn->x86_64 && X86_VEX_W(b2)) | ||
150 | /* VEX.W overrides opnd_size */ | ||
151 | insn->opnd_bytes = 8; | ||
152 | } else { | ||
153 | insn->vex_prefix.nbytes = 2; | ||
154 | insn->next_byte += 2; | ||
155 | } | ||
156 | } | ||
157 | vex_end: | ||
158 | insn->vex_prefix.got = 1; | ||
159 | |||
160 | prefixes->got = 1; | ||
161 | return; | ||
162 | } | ||
163 | |||
164 | /** | ||
165 | * insn_get_opcode - collect opcode(s) | ||
166 | * @insn: &struct insn containing instruction | ||
167 | * | ||
168 | * Populates @insn->opcode, updates @insn->next_byte to point past the | ||
169 | * opcode byte(s), and set @insn->attr (except for groups). | ||
170 | * If necessary, first collects any preceding (prefix) bytes. | ||
171 | * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got | ||
172 | * is already 1. | ||
173 | */ | ||
174 | void insn_get_opcode(struct insn *insn) | ||
175 | { | ||
176 | struct insn_field *opcode = &insn->opcode; | ||
177 | insn_byte_t op, pfx; | ||
178 | if (opcode->got) | ||
179 | return; | ||
180 | if (!insn->prefixes.got) | ||
181 | insn_get_prefixes(insn); | ||
182 | |||
183 | /* Get first opcode */ | ||
184 | op = get_next(insn_byte_t, insn); | ||
185 | opcode->bytes[0] = op; | ||
186 | opcode->nbytes = 1; | ||
187 | |||
188 | /* Check if there is VEX prefix or not */ | ||
189 | if (insn_is_avx(insn)) { | ||
190 | insn_byte_t m, p; | ||
191 | m = insn_vex_m_bits(insn); | ||
192 | p = insn_vex_p_bits(insn); | ||
193 | insn->attr = inat_get_avx_attribute(op, m, p); | ||
194 | if (!inat_accept_vex(insn->attr)) | ||
195 | insn->attr = 0; /* This instruction is bad */ | ||
196 | goto end; /* VEX has only 1 byte for opcode */ | ||
197 | } | ||
198 | |||
199 | insn->attr = inat_get_opcode_attribute(op); | ||
200 | while (inat_is_escape(insn->attr)) { | ||
201 | /* Get escaped opcode */ | ||
202 | op = get_next(insn_byte_t, insn); | ||
203 | opcode->bytes[opcode->nbytes++] = op; | ||
204 | pfx = insn_last_prefix(insn); | ||
205 | insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); | ||
206 | } | ||
207 | if (inat_must_vex(insn->attr)) | ||
208 | insn->attr = 0; /* This instruction is bad */ | ||
209 | end: | ||
210 | opcode->got = 1; | ||
211 | } | ||
212 | |||
213 | /** | ||
214 | * insn_get_modrm - collect ModRM byte, if any | ||
215 | * @insn: &struct insn containing instruction | ||
216 | * | ||
217 | * Populates @insn->modrm and updates @insn->next_byte to point past the | ||
218 | * ModRM byte, if any. If necessary, first collects the preceding bytes | ||
219 | * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. | ||
220 | */ | ||
221 | void insn_get_modrm(struct insn *insn) | ||
222 | { | ||
223 | struct insn_field *modrm = &insn->modrm; | ||
224 | insn_byte_t pfx, mod; | ||
225 | if (modrm->got) | ||
226 | return; | ||
227 | if (!insn->opcode.got) | ||
228 | insn_get_opcode(insn); | ||
229 | |||
230 | if (inat_has_modrm(insn->attr)) { | ||
231 | mod = get_next(insn_byte_t, insn); | ||
232 | modrm->value = mod; | ||
233 | modrm->nbytes = 1; | ||
234 | if (inat_is_group(insn->attr)) { | ||
235 | pfx = insn_last_prefix(insn); | ||
236 | insn->attr = inat_get_group_attribute(mod, pfx, | ||
237 | insn->attr); | ||
238 | } | ||
239 | } | ||
240 | |||
241 | if (insn->x86_64 && inat_is_force64(insn->attr)) | ||
242 | insn->opnd_bytes = 8; | ||
243 | modrm->got = 1; | ||
244 | } | ||
245 | |||
246 | |||
247 | /** | ||
248 | * insn_rip_relative() - Does instruction use RIP-relative addressing mode? | ||
249 | * @insn: &struct insn containing instruction | ||
250 | * | ||
251 | * If necessary, first collects the instruction up to and including the | ||
252 | * ModRM byte. No effect if @insn->x86_64 is 0. | ||
253 | */ | ||
254 | int insn_rip_relative(struct insn *insn) | ||
255 | { | ||
256 | struct insn_field *modrm = &insn->modrm; | ||
257 | |||
258 | if (!insn->x86_64) | ||
259 | return 0; | ||
260 | if (!modrm->got) | ||
261 | insn_get_modrm(insn); | ||
262 | /* | ||
263 | * For rip-relative instructions, the mod field (top 2 bits) | ||
264 | * is zero and the r/m field (bottom 3 bits) is 0x5. | ||
265 | */ | ||
266 | return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); | ||
267 | } | ||
268 | |||
269 | /** | ||
270 | * insn_get_sib() - Get the SIB byte of instruction | ||
271 | * @insn: &struct insn containing instruction | ||
272 | * | ||
273 | * If necessary, first collects the instruction up to and including the | ||
274 | * ModRM byte. | ||
275 | */ | ||
276 | void insn_get_sib(struct insn *insn) | ||
277 | { | ||
278 | insn_byte_t modrm; | ||
279 | |||
280 | if (insn->sib.got) | ||
281 | return; | ||
282 | if (!insn->modrm.got) | ||
283 | insn_get_modrm(insn); | ||
284 | if (insn->modrm.nbytes) { | ||
285 | modrm = (insn_byte_t)insn->modrm.value; | ||
286 | if (insn->addr_bytes != 2 && | ||
287 | X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { | ||
288 | insn->sib.value = get_next(insn_byte_t, insn); | ||
289 | insn->sib.nbytes = 1; | ||
290 | } | ||
291 | } | ||
292 | insn->sib.got = 1; | ||
293 | } | ||
294 | |||
295 | |||
296 | /** | ||
297 | * insn_get_displacement() - Get the displacement of instruction | ||
298 | * @insn: &struct insn containing instruction | ||
299 | * | ||
300 | * If necessary, first collects the instruction up to and including the | ||
301 | * SIB byte. | ||
302 | * Displacement value is sign-expanded. | ||
303 | */ | ||
304 | void insn_get_displacement(struct insn *insn) | ||
305 | { | ||
306 | insn_byte_t mod, rm, base; | ||
307 | |||
308 | if (insn->displacement.got) | ||
309 | return; | ||
310 | if (!insn->sib.got) | ||
311 | insn_get_sib(insn); | ||
312 | if (insn->modrm.nbytes) { | ||
313 | /* | ||
314 | * Interpreting the modrm byte: | ||
315 | * mod = 00 - no displacement fields (exceptions below) | ||
316 | * mod = 01 - 1-byte displacement field | ||
317 | * mod = 10 - displacement field is 4 bytes, or 2 bytes if | ||
318 | * address size = 2 (0x67 prefix in 32-bit mode) | ||
319 | * mod = 11 - no memory operand | ||
320 | * | ||
321 | * If address size = 2... | ||
322 | * mod = 00, r/m = 110 - displacement field is 2 bytes | ||
323 | * | ||
324 | * If address size != 2... | ||
325 | * mod != 11, r/m = 100 - SIB byte exists | ||
326 | * mod = 00, SIB base = 101 - displacement field is 4 bytes | ||
327 | * mod = 00, r/m = 101 - rip-relative addressing, displacement | ||
328 | * field is 4 bytes | ||
329 | */ | ||
330 | mod = X86_MODRM_MOD(insn->modrm.value); | ||
331 | rm = X86_MODRM_RM(insn->modrm.value); | ||
332 | base = X86_SIB_BASE(insn->sib.value); | ||
333 | if (mod == 3) | ||
334 | goto out; | ||
335 | if (mod == 1) { | ||
336 | insn->displacement.value = get_next(char, insn); | ||
337 | insn->displacement.nbytes = 1; | ||
338 | } else if (insn->addr_bytes == 2) { | ||
339 | if ((mod == 0 && rm == 6) || mod == 2) { | ||
340 | insn->displacement.value = | ||
341 | get_next(short, insn); | ||
342 | insn->displacement.nbytes = 2; | ||
343 | } | ||
344 | } else { | ||
345 | if ((mod == 0 && rm == 5) || mod == 2 || | ||
346 | (mod == 0 && base == 5)) { | ||
347 | insn->displacement.value = get_next(int, insn); | ||
348 | insn->displacement.nbytes = 4; | ||
349 | } | ||
350 | } | ||
351 | } | ||
352 | out: | ||
353 | insn->displacement.got = 1; | ||
354 | } | ||
355 | |||
356 | /* Decode moffset16/32/64 */ | ||
357 | static void __get_moffset(struct insn *insn) | ||
358 | { | ||
359 | switch (insn->addr_bytes) { | ||
360 | case 2: | ||
361 | insn->moffset1.value = get_next(short, insn); | ||
362 | insn->moffset1.nbytes = 2; | ||
363 | break; | ||
364 | case 4: | ||
365 | insn->moffset1.value = get_next(int, insn); | ||
366 | insn->moffset1.nbytes = 4; | ||
367 | break; | ||
368 | case 8: | ||
369 | insn->moffset1.value = get_next(int, insn); | ||
370 | insn->moffset1.nbytes = 4; | ||
371 | insn->moffset2.value = get_next(int, insn); | ||
372 | insn->moffset2.nbytes = 4; | ||
373 | break; | ||
374 | } | ||
375 | insn->moffset1.got = insn->moffset2.got = 1; | ||
376 | } | ||
377 | |||
378 | /* Decode imm v32(Iz) */ | ||
379 | static void __get_immv32(struct insn *insn) | ||
380 | { | ||
381 | switch (insn->opnd_bytes) { | ||
382 | case 2: | ||
383 | insn->immediate.value = get_next(short, insn); | ||
384 | insn->immediate.nbytes = 2; | ||
385 | break; | ||
386 | case 4: | ||
387 | case 8: | ||
388 | insn->immediate.value = get_next(int, insn); | ||
389 | insn->immediate.nbytes = 4; | ||
390 | break; | ||
391 | } | ||
392 | } | ||
393 | |||
394 | /* Decode imm v64(Iv/Ov) */ | ||
395 | static void __get_immv(struct insn *insn) | ||
396 | { | ||
397 | switch (insn->opnd_bytes) { | ||
398 | case 2: | ||
399 | insn->immediate1.value = get_next(short, insn); | ||
400 | insn->immediate1.nbytes = 2; | ||
401 | break; | ||
402 | case 4: | ||
403 | insn->immediate1.value = get_next(int, insn); | ||
404 | insn->immediate1.nbytes = 4; | ||
405 | break; | ||
406 | case 8: | ||
407 | insn->immediate1.value = get_next(int, insn); | ||
408 | insn->immediate1.nbytes = 4; | ||
409 | insn->immediate2.value = get_next(int, insn); | ||
410 | insn->immediate2.nbytes = 4; | ||
411 | break; | ||
412 | } | ||
413 | insn->immediate1.got = insn->immediate2.got = 1; | ||
414 | } | ||
415 | |||
416 | /* Decode ptr16:16/32(Ap) */ | ||
417 | static void __get_immptr(struct insn *insn) | ||
418 | { | ||
419 | switch (insn->opnd_bytes) { | ||
420 | case 2: | ||
421 | insn->immediate1.value = get_next(short, insn); | ||
422 | insn->immediate1.nbytes = 2; | ||
423 | break; | ||
424 | case 4: | ||
425 | insn->immediate1.value = get_next(int, insn); | ||
426 | insn->immediate1.nbytes = 4; | ||
427 | break; | ||
428 | case 8: | ||
429 | /* ptr16:64 is not exist (no segment) */ | ||
430 | return; | ||
431 | } | ||
432 | insn->immediate2.value = get_next(unsigned short, insn); | ||
433 | insn->immediate2.nbytes = 2; | ||
434 | insn->immediate1.got = insn->immediate2.got = 1; | ||
435 | } | ||
436 | |||
437 | /** | ||
438 | * insn_get_immediate() - Get the immediates of instruction | ||
439 | * @insn: &struct insn containing instruction | ||
440 | * | ||
441 | * If necessary, first collects the instruction up to and including the | ||
442 | * displacement bytes. | ||
443 | * Basically, most of immediates are sign-expanded. Unsigned-value can be | ||
444 | * get by bit masking with ((1 << (nbytes * 8)) - 1) | ||
445 | */ | ||
446 | void insn_get_immediate(struct insn *insn) | ||
447 | { | ||
448 | if (insn->immediate.got) | ||
449 | return; | ||
450 | if (!insn->displacement.got) | ||
451 | insn_get_displacement(insn); | ||
452 | |||
453 | if (inat_has_moffset(insn->attr)) { | ||
454 | __get_moffset(insn); | ||
455 | goto done; | ||
456 | } | ||
457 | |||
458 | if (!inat_has_immediate(insn->attr)) | ||
459 | /* no immediates */ | ||
460 | goto done; | ||
461 | |||
462 | switch (inat_immediate_size(insn->attr)) { | ||
463 | case INAT_IMM_BYTE: | ||
464 | insn->immediate.value = get_next(char, insn); | ||
465 | insn->immediate.nbytes = 1; | ||
466 | break; | ||
467 | case INAT_IMM_WORD: | ||
468 | insn->immediate.value = get_next(short, insn); | ||
469 | insn->immediate.nbytes = 2; | ||
470 | break; | ||
471 | case INAT_IMM_DWORD: | ||
472 | insn->immediate.value = get_next(int, insn); | ||
473 | insn->immediate.nbytes = 4; | ||
474 | break; | ||
475 | case INAT_IMM_QWORD: | ||
476 | insn->immediate1.value = get_next(int, insn); | ||
477 | insn->immediate1.nbytes = 4; | ||
478 | insn->immediate2.value = get_next(int, insn); | ||
479 | insn->immediate2.nbytes = 4; | ||
480 | break; | ||
481 | case INAT_IMM_PTR: | ||
482 | __get_immptr(insn); | ||
483 | break; | ||
484 | case INAT_IMM_VWORD32: | ||
485 | __get_immv32(insn); | ||
486 | break; | ||
487 | case INAT_IMM_VWORD: | ||
488 | __get_immv(insn); | ||
489 | break; | ||
490 | default: | ||
491 | break; | ||
492 | } | ||
493 | if (inat_has_second_immediate(insn->attr)) { | ||
494 | insn->immediate2.value = get_next(char, insn); | ||
495 | insn->immediate2.nbytes = 1; | ||
496 | } | ||
497 | done: | ||
498 | insn->immediate.got = 1; | ||
499 | } | ||
500 | |||
501 | /** | ||
502 | * insn_get_length() - Get the length of instruction | ||
503 | * @insn: &struct insn containing instruction | ||
504 | * | ||
505 | * If necessary, first collects the instruction up to and including the | ||
506 | * immediates bytes. | ||
507 | */ | ||
508 | void insn_get_length(struct insn *insn) | ||
509 | { | ||
510 | if (insn->length) | ||
511 | return; | ||
512 | if (!insn->immediate.got) | ||
513 | insn_get_immediate(insn); | ||
514 | insn->length = (unsigned char)((unsigned long)insn->next_byte | ||
515 | - (unsigned long)insn->kaddr); | ||
516 | } | ||