diff options
Diffstat (limited to 'arch/x86/lib/insn.c')
-rw-r--r-- | arch/x86/lib/insn.c | 464 |
1 files changed, 464 insertions, 0 deletions
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c new file mode 100644 index 000000000000..dfd56a30053f --- /dev/null +++ b/arch/x86/lib/insn.c | |||
@@ -0,0 +1,464 @@ | |||
1 | /* | ||
2 | * x86 instruction analysis | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2002, 2004, 2009 | ||
19 | */ | ||
20 | |||
21 | #include <linux/string.h> | ||
22 | #include <asm/inat.h> | ||
23 | #include <asm/insn.h> | ||
24 | |||
25 | #define get_next(t, insn) \ | ||
26 | ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) | ||
27 | |||
28 | #define peek_next(t, insn) \ | ||
29 | ({t r; r = *(t*)insn->next_byte; r; }) | ||
30 | |||
31 | /** | ||
32 | * insn_init() - initialize struct insn | ||
33 | * @insn: &struct insn to be initialized | ||
34 | * @kaddr: address (in kernel memory) of instruction (or copy thereof) | ||
35 | * @x86_64: !0 for 64-bit kernel or 64-bit app | ||
36 | */ | ||
37 | void insn_init(struct insn *insn, const void *kaddr, int x86_64) | ||
38 | { | ||
39 | memset(insn, 0, sizeof(*insn)); | ||
40 | insn->kaddr = kaddr; | ||
41 | insn->next_byte = kaddr; | ||
42 | insn->x86_64 = x86_64 ? 1 : 0; | ||
43 | insn->opnd_bytes = 4; | ||
44 | if (x86_64) | ||
45 | insn->addr_bytes = 8; | ||
46 | else | ||
47 | insn->addr_bytes = 4; | ||
48 | } | ||
49 | |||
50 | /** | ||
51 | * insn_get_prefixes - scan x86 instruction prefix bytes | ||
52 | * @insn: &struct insn containing instruction | ||
53 | * | ||
54 | * Populates the @insn->prefixes bitmap, and updates @insn->next_byte | ||
55 | * to point to the (first) opcode. No effect if @insn->prefixes.got | ||
56 | * is already set. | ||
57 | */ | ||
58 | void insn_get_prefixes(struct insn *insn) | ||
59 | { | ||
60 | struct insn_field *prefixes = &insn->prefixes; | ||
61 | insn_attr_t attr; | ||
62 | insn_byte_t b, lb; | ||
63 | int i, nb; | ||
64 | |||
65 | if (prefixes->got) | ||
66 | return; | ||
67 | |||
68 | nb = 0; | ||
69 | lb = 0; | ||
70 | b = peek_next(insn_byte_t, insn); | ||
71 | attr = inat_get_opcode_attribute(b); | ||
72 | while (inat_is_prefix(attr)) { | ||
73 | /* Skip if same prefix */ | ||
74 | for (i = 0; i < nb; i++) | ||
75 | if (prefixes->bytes[i] == b) | ||
76 | goto found; | ||
77 | if (nb == 4) | ||
78 | /* Invalid instruction */ | ||
79 | break; | ||
80 | prefixes->bytes[nb++] = b; | ||
81 | if (inat_is_address_size_prefix(attr)) { | ||
82 | /* address size switches 2/4 or 4/8 */ | ||
83 | if (insn->x86_64) | ||
84 | insn->addr_bytes ^= 12; | ||
85 | else | ||
86 | insn->addr_bytes ^= 6; | ||
87 | } else if (inat_is_operand_size_prefix(attr)) { | ||
88 | /* oprand size switches 2/4 */ | ||
89 | insn->opnd_bytes ^= 6; | ||
90 | } | ||
91 | found: | ||
92 | prefixes->nbytes++; | ||
93 | insn->next_byte++; | ||
94 | lb = b; | ||
95 | b = peek_next(insn_byte_t, insn); | ||
96 | attr = inat_get_opcode_attribute(b); | ||
97 | } | ||
98 | /* Set the last prefix */ | ||
99 | if (lb && lb != insn->prefixes.bytes[3]) { | ||
100 | if (unlikely(insn->prefixes.bytes[3])) { | ||
101 | /* Swap the last prefix */ | ||
102 | b = insn->prefixes.bytes[3]; | ||
103 | for (i = 0; i < nb; i++) | ||
104 | if (prefixes->bytes[i] == lb) | ||
105 | prefixes->bytes[i] = b; | ||
106 | } | ||
107 | insn->prefixes.bytes[3] = lb; | ||
108 | } | ||
109 | |||
110 | if (insn->x86_64) { | ||
111 | b = peek_next(insn_byte_t, insn); | ||
112 | attr = inat_get_opcode_attribute(b); | ||
113 | if (inat_is_rex_prefix(attr)) { | ||
114 | insn->rex_prefix.value = b; | ||
115 | insn->rex_prefix.nbytes = 1; | ||
116 | insn->next_byte++; | ||
117 | if (X86_REX_W(b)) | ||
118 | /* REX.W overrides opnd_size */ | ||
119 | insn->opnd_bytes = 8; | ||
120 | } | ||
121 | } | ||
122 | insn->rex_prefix.got = 1; | ||
123 | prefixes->got = 1; | ||
124 | return; | ||
125 | } | ||
126 | |||
127 | /** | ||
128 | * insn_get_opcode - collect opcode(s) | ||
129 | * @insn: &struct insn containing instruction | ||
130 | * | ||
131 | * Populates @insn->opcode, updates @insn->next_byte to point past the | ||
132 | * opcode byte(s), and set @insn->attr (except for groups). | ||
133 | * If necessary, first collects any preceding (prefix) bytes. | ||
134 | * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got | ||
135 | * is already 1. | ||
136 | */ | ||
137 | void insn_get_opcode(struct insn *insn) | ||
138 | { | ||
139 | struct insn_field *opcode = &insn->opcode; | ||
140 | insn_byte_t op, pfx; | ||
141 | if (opcode->got) | ||
142 | return; | ||
143 | if (!insn->prefixes.got) | ||
144 | insn_get_prefixes(insn); | ||
145 | |||
146 | /* Get first opcode */ | ||
147 | op = get_next(insn_byte_t, insn); | ||
148 | opcode->bytes[0] = op; | ||
149 | opcode->nbytes = 1; | ||
150 | insn->attr = inat_get_opcode_attribute(op); | ||
151 | while (inat_is_escape(insn->attr)) { | ||
152 | /* Get escaped opcode */ | ||
153 | op = get_next(insn_byte_t, insn); | ||
154 | opcode->bytes[opcode->nbytes++] = op; | ||
155 | pfx = insn_last_prefix(insn); | ||
156 | insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); | ||
157 | } | ||
158 | opcode->got = 1; | ||
159 | } | ||
160 | |||
161 | /** | ||
162 | * insn_get_modrm - collect ModRM byte, if any | ||
163 | * @insn: &struct insn containing instruction | ||
164 | * | ||
165 | * Populates @insn->modrm and updates @insn->next_byte to point past the | ||
166 | * ModRM byte, if any. If necessary, first collects the preceding bytes | ||
167 | * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. | ||
168 | */ | ||
169 | void insn_get_modrm(struct insn *insn) | ||
170 | { | ||
171 | struct insn_field *modrm = &insn->modrm; | ||
172 | insn_byte_t pfx, mod; | ||
173 | if (modrm->got) | ||
174 | return; | ||
175 | if (!insn->opcode.got) | ||
176 | insn_get_opcode(insn); | ||
177 | |||
178 | if (inat_has_modrm(insn->attr)) { | ||
179 | mod = get_next(insn_byte_t, insn); | ||
180 | modrm->value = mod; | ||
181 | modrm->nbytes = 1; | ||
182 | if (inat_is_group(insn->attr)) { | ||
183 | pfx = insn_last_prefix(insn); | ||
184 | insn->attr = inat_get_group_attribute(mod, pfx, | ||
185 | insn->attr); | ||
186 | } | ||
187 | } | ||
188 | |||
189 | if (insn->x86_64 && inat_is_force64(insn->attr)) | ||
190 | insn->opnd_bytes = 8; | ||
191 | modrm->got = 1; | ||
192 | } | ||
193 | |||
194 | |||
195 | /** | ||
196 | * insn_rip_relative() - Does instruction use RIP-relative addressing mode? | ||
197 | * @insn: &struct insn containing instruction | ||
198 | * | ||
199 | * If necessary, first collects the instruction up to and including the | ||
200 | * ModRM byte. No effect if @insn->x86_64 is 0. | ||
201 | */ | ||
202 | int insn_rip_relative(struct insn *insn) | ||
203 | { | ||
204 | struct insn_field *modrm = &insn->modrm; | ||
205 | |||
206 | if (!insn->x86_64) | ||
207 | return 0; | ||
208 | if (!modrm->got) | ||
209 | insn_get_modrm(insn); | ||
210 | /* | ||
211 | * For rip-relative instructions, the mod field (top 2 bits) | ||
212 | * is zero and the r/m field (bottom 3 bits) is 0x5. | ||
213 | */ | ||
214 | return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); | ||
215 | } | ||
216 | |||
217 | /** | ||
218 | * insn_get_sib() - Get the SIB byte of instruction | ||
219 | * @insn: &struct insn containing instruction | ||
220 | * | ||
221 | * If necessary, first collects the instruction up to and including the | ||
222 | * ModRM byte. | ||
223 | */ | ||
224 | void insn_get_sib(struct insn *insn) | ||
225 | { | ||
226 | insn_byte_t modrm; | ||
227 | |||
228 | if (insn->sib.got) | ||
229 | return; | ||
230 | if (!insn->modrm.got) | ||
231 | insn_get_modrm(insn); | ||
232 | if (insn->modrm.nbytes) { | ||
233 | modrm = (insn_byte_t)insn->modrm.value; | ||
234 | if (insn->addr_bytes != 2 && | ||
235 | X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { | ||
236 | insn->sib.value = get_next(insn_byte_t, insn); | ||
237 | insn->sib.nbytes = 1; | ||
238 | } | ||
239 | } | ||
240 | insn->sib.got = 1; | ||
241 | } | ||
242 | |||
243 | |||
244 | /** | ||
245 | * insn_get_displacement() - Get the displacement of instruction | ||
246 | * @insn: &struct insn containing instruction | ||
247 | * | ||
248 | * If necessary, first collects the instruction up to and including the | ||
249 | * SIB byte. | ||
250 | * Displacement value is sign-expanded. | ||
251 | */ | ||
252 | void insn_get_displacement(struct insn *insn) | ||
253 | { | ||
254 | insn_byte_t mod, rm, base; | ||
255 | |||
256 | if (insn->displacement.got) | ||
257 | return; | ||
258 | if (!insn->sib.got) | ||
259 | insn_get_sib(insn); | ||
260 | if (insn->modrm.nbytes) { | ||
261 | /* | ||
262 | * Interpreting the modrm byte: | ||
263 | * mod = 00 - no displacement fields (exceptions below) | ||
264 | * mod = 01 - 1-byte displacement field | ||
265 | * mod = 10 - displacement field is 4 bytes, or 2 bytes if | ||
266 | * address size = 2 (0x67 prefix in 32-bit mode) | ||
267 | * mod = 11 - no memory operand | ||
268 | * | ||
269 | * If address size = 2... | ||
270 | * mod = 00, r/m = 110 - displacement field is 2 bytes | ||
271 | * | ||
272 | * If address size != 2... | ||
273 | * mod != 11, r/m = 100 - SIB byte exists | ||
274 | * mod = 00, SIB base = 101 - displacement field is 4 bytes | ||
275 | * mod = 00, r/m = 101 - rip-relative addressing, displacement | ||
276 | * field is 4 bytes | ||
277 | */ | ||
278 | mod = X86_MODRM_MOD(insn->modrm.value); | ||
279 | rm = X86_MODRM_RM(insn->modrm.value); | ||
280 | base = X86_SIB_BASE(insn->sib.value); | ||
281 | if (mod == 3) | ||
282 | goto out; | ||
283 | if (mod == 1) { | ||
284 | insn->displacement.value = get_next(char, insn); | ||
285 | insn->displacement.nbytes = 1; | ||
286 | } else if (insn->addr_bytes == 2) { | ||
287 | if ((mod == 0 && rm == 6) || mod == 2) { | ||
288 | insn->displacement.value = | ||
289 | get_next(short, insn); | ||
290 | insn->displacement.nbytes = 2; | ||
291 | } | ||
292 | } else { | ||
293 | if ((mod == 0 && rm == 5) || mod == 2 || | ||
294 | (mod == 0 && base == 5)) { | ||
295 | insn->displacement.value = get_next(int, insn); | ||
296 | insn->displacement.nbytes = 4; | ||
297 | } | ||
298 | } | ||
299 | } | ||
300 | out: | ||
301 | insn->displacement.got = 1; | ||
302 | } | ||
303 | |||
304 | /* Decode moffset16/32/64 */ | ||
305 | static void __get_moffset(struct insn *insn) | ||
306 | { | ||
307 | switch (insn->addr_bytes) { | ||
308 | case 2: | ||
309 | insn->moffset1.value = get_next(short, insn); | ||
310 | insn->moffset1.nbytes = 2; | ||
311 | break; | ||
312 | case 4: | ||
313 | insn->moffset1.value = get_next(int, insn); | ||
314 | insn->moffset1.nbytes = 4; | ||
315 | break; | ||
316 | case 8: | ||
317 | insn->moffset1.value = get_next(int, insn); | ||
318 | insn->moffset1.nbytes = 4; | ||
319 | insn->moffset2.value = get_next(int, insn); | ||
320 | insn->moffset2.nbytes = 4; | ||
321 | break; | ||
322 | } | ||
323 | insn->moffset1.got = insn->moffset2.got = 1; | ||
324 | } | ||
325 | |||
326 | /* Decode imm v32(Iz) */ | ||
327 | static void __get_immv32(struct insn *insn) | ||
328 | { | ||
329 | switch (insn->opnd_bytes) { | ||
330 | case 2: | ||
331 | insn->immediate.value = get_next(short, insn); | ||
332 | insn->immediate.nbytes = 2; | ||
333 | break; | ||
334 | case 4: | ||
335 | case 8: | ||
336 | insn->immediate.value = get_next(int, insn); | ||
337 | insn->immediate.nbytes = 4; | ||
338 | break; | ||
339 | } | ||
340 | } | ||
341 | |||
342 | /* Decode imm v64(Iv/Ov) */ | ||
343 | static void __get_immv(struct insn *insn) | ||
344 | { | ||
345 | switch (insn->opnd_bytes) { | ||
346 | case 2: | ||
347 | insn->immediate1.value = get_next(short, insn); | ||
348 | insn->immediate1.nbytes = 2; | ||
349 | break; | ||
350 | case 4: | ||
351 | insn->immediate1.value = get_next(int, insn); | ||
352 | insn->immediate1.nbytes = 4; | ||
353 | break; | ||
354 | case 8: | ||
355 | insn->immediate1.value = get_next(int, insn); | ||
356 | insn->immediate1.nbytes = 4; | ||
357 | insn->immediate2.value = get_next(int, insn); | ||
358 | insn->immediate2.nbytes = 4; | ||
359 | break; | ||
360 | } | ||
361 | insn->immediate1.got = insn->immediate2.got = 1; | ||
362 | } | ||
363 | |||
364 | /* Decode ptr16:16/32(Ap) */ | ||
365 | static void __get_immptr(struct insn *insn) | ||
366 | { | ||
367 | switch (insn->opnd_bytes) { | ||
368 | case 2: | ||
369 | insn->immediate1.value = get_next(short, insn); | ||
370 | insn->immediate1.nbytes = 2; | ||
371 | break; | ||
372 | case 4: | ||
373 | insn->immediate1.value = get_next(int, insn); | ||
374 | insn->immediate1.nbytes = 4; | ||
375 | break; | ||
376 | case 8: | ||
377 | /* ptr16:64 is not exist (no segment) */ | ||
378 | return; | ||
379 | } | ||
380 | insn->immediate2.value = get_next(unsigned short, insn); | ||
381 | insn->immediate2.nbytes = 2; | ||
382 | insn->immediate1.got = insn->immediate2.got = 1; | ||
383 | } | ||
384 | |||
385 | /** | ||
386 | * insn_get_immediate() - Get the immediates of instruction | ||
387 | * @insn: &struct insn containing instruction | ||
388 | * | ||
389 | * If necessary, first collects the instruction up to and including the | ||
390 | * displacement bytes. | ||
391 | * Basically, most of immediates are sign-expanded. Unsigned-value can be | ||
392 | * get by bit masking with ((1 << (nbytes * 8)) - 1) | ||
393 | */ | ||
394 | void insn_get_immediate(struct insn *insn) | ||
395 | { | ||
396 | if (insn->immediate.got) | ||
397 | return; | ||
398 | if (!insn->displacement.got) | ||
399 | insn_get_displacement(insn); | ||
400 | |||
401 | if (inat_has_moffset(insn->attr)) { | ||
402 | __get_moffset(insn); | ||
403 | goto done; | ||
404 | } | ||
405 | |||
406 | if (!inat_has_immediate(insn->attr)) | ||
407 | /* no immediates */ | ||
408 | goto done; | ||
409 | |||
410 | switch (inat_immediate_size(insn->attr)) { | ||
411 | case INAT_IMM_BYTE: | ||
412 | insn->immediate.value = get_next(char, insn); | ||
413 | insn->immediate.nbytes = 1; | ||
414 | break; | ||
415 | case INAT_IMM_WORD: | ||
416 | insn->immediate.value = get_next(short, insn); | ||
417 | insn->immediate.nbytes = 2; | ||
418 | break; | ||
419 | case INAT_IMM_DWORD: | ||
420 | insn->immediate.value = get_next(int, insn); | ||
421 | insn->immediate.nbytes = 4; | ||
422 | break; | ||
423 | case INAT_IMM_QWORD: | ||
424 | insn->immediate1.value = get_next(int, insn); | ||
425 | insn->immediate1.nbytes = 4; | ||
426 | insn->immediate2.value = get_next(int, insn); | ||
427 | insn->immediate2.nbytes = 4; | ||
428 | break; | ||
429 | case INAT_IMM_PTR: | ||
430 | __get_immptr(insn); | ||
431 | break; | ||
432 | case INAT_IMM_VWORD32: | ||
433 | __get_immv32(insn); | ||
434 | break; | ||
435 | case INAT_IMM_VWORD: | ||
436 | __get_immv(insn); | ||
437 | break; | ||
438 | default: | ||
439 | break; | ||
440 | } | ||
441 | if (inat_has_second_immediate(insn->attr)) { | ||
442 | insn->immediate2.value = get_next(char, insn); | ||
443 | insn->immediate2.nbytes = 1; | ||
444 | } | ||
445 | done: | ||
446 | insn->immediate.got = 1; | ||
447 | } | ||
448 | |||
449 | /** | ||
450 | * insn_get_length() - Get the length of instruction | ||
451 | * @insn: &struct insn containing instruction | ||
452 | * | ||
453 | * If necessary, first collects the instruction up to and including the | ||
454 | * immediates bytes. | ||
455 | */ | ||
456 | void insn_get_length(struct insn *insn) | ||
457 | { | ||
458 | if (insn->length) | ||
459 | return; | ||
460 | if (!insn->immediate.got) | ||
461 | insn_get_immediate(insn); | ||
462 | insn->length = (unsigned char)((unsigned long)insn->next_byte | ||
463 | - (unsigned long)insn->kaddr); | ||
464 | } | ||