aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorMasami Hiramatsu <mhiramat@redhat.com>2009-08-13 16:34:13 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2009-08-26 18:35:56 -0400
commiteb13296cfaf6c699566473669a96a38a90562384 (patch)
tree466c44bf0a747effaf85ec13dbf75ae857449bfd /arch/x86
parent35dce1a99d010f3d738af4ce1b9b77302fdfe69c (diff)
x86: Instruction decoder API
Add x86 instruction decoder to arch-specific libraries. This decoder can decode x86 instructions used in kernel into prefix, opcode, modrm, sib, displacement and immediates. This can also show the length of instructions. This version introduces instruction attributes for decoding instructions. The instruction attribute tables are generated from the opcode map file (x86-opcode-map.txt) by the generator script(gen-insn-attr-x86.awk). Currently, the opcode maps are based on opcode maps in Intel(R) 64 and IA-32 Architectures Software Developers Manual Vol.2: Appendix.A, and consist of below two types of opcode tables. 1-byte/2-bytes/3-bytes opcodes, which has 256 elements, are written as below; Table: table-name Referrer: escaped-name opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] (or) opcode: escape # escaped-name EndTable Group opcodes, which has 8 elements, are written as below; GrpTable: GrpXXX reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] EndTable These opcode maps include a few SSE and FP opcodes (for setup), because those opcodes are used in the kernel. Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com> Signed-off-by: Jim Keniston <jkenisto@us.ibm.com> Acked-by: H. Peter Anvin <hpa@zytor.com> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: Avi Kivity <avi@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Frank Ch. Eigler <fche@redhat.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Jason Baron <jbaron@redhat.com> Cc: K.Prasad <prasad@linux.vnet.ibm.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Przemysław Pawełczyk <przemyslaw@pawelczyk.it> Cc: Roland McGrath <roland@redhat.com> Cc: Sam Ravnborg <sam@ravnborg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Vegard Nossum <vegard.nossum@gmail.com> LKML-Reference: <20090813203413.31965.49709.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/inat.h188
-rw-r--r--arch/x86/include/asm/inat_types.h29
-rw-r--r--arch/x86/include/asm/insn.h143
-rw-r--r--arch/x86/lib/Makefile13
-rw-r--r--arch/x86/lib/inat.c78
-rw-r--r--arch/x86/lib/insn.c464
-rw-r--r--arch/x86/lib/x86-opcode-map.txt719
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk314
8 files changed, 1948 insertions, 0 deletions
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
new file mode 100644
index 000000000000..2866fddd1848
--- /dev/null
+++ b/arch/x86/include/asm/inat.h
@@ -0,0 +1,188 @@
1#ifndef _ASM_X86_INAT_H
2#define _ASM_X86_INAT_H
3/*
4 * x86 instruction attributes
5 *
6 * Written by Masami Hiramatsu <mhiramat@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 *
22 */
23#include <asm/inat_types.h>
24
25/*
26 * Internal bits. Don't use bitmasks directly, because these bits are
27 * unstable. You should use checking functions.
28 */
29
30#define INAT_OPCODE_TABLE_SIZE 256
31#define INAT_GROUP_TABLE_SIZE 8
32
33/* Legacy instruction prefixes */
34#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */
35#define INAT_PFX_REPNE 2 /* 0xF2 */ /* LPFX2 */
36#define INAT_PFX_REPE 3 /* 0xF3 */ /* LPFX3 */
37#define INAT_PFX_LOCK 4 /* 0xF0 */
38#define INAT_PFX_CS 5 /* 0x2E */
39#define INAT_PFX_DS 6 /* 0x3E */
40#define INAT_PFX_ES 7 /* 0x26 */
41#define INAT_PFX_FS 8 /* 0x64 */
42#define INAT_PFX_GS 9 /* 0x65 */
43#define INAT_PFX_SS 10 /* 0x36 */
44#define INAT_PFX_ADDRSZ 11 /* 0x67 */
45
46#define INAT_LPREFIX_MAX 3
47
48/* Immediate size */
49#define INAT_IMM_BYTE 1
50#define INAT_IMM_WORD 2
51#define INAT_IMM_DWORD 3
52#define INAT_IMM_QWORD 4
53#define INAT_IMM_PTR 5
54#define INAT_IMM_VWORD32 6
55#define INAT_IMM_VWORD 7
56
57/* Legacy prefix */
58#define INAT_PFX_OFFS 0
59#define INAT_PFX_BITS 4
60#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1)
61#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS)
62/* Escape opcodes */
63#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS)
64#define INAT_ESC_BITS 2
65#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1)
66#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS)
67/* Group opcodes (1-16) */
68#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS)
69#define INAT_GRP_BITS 5
70#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1)
71#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS)
72/* Immediates */
73#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS)
74#define INAT_IMM_BITS 3
75#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
76/* Flags */
77#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS)
78#define INAT_REXPFX (1 << INAT_FLAG_OFFS)
79#define INAT_MODRM (1 << (INAT_FLAG_OFFS + 1))
80#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 2))
81#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 3))
82#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 4))
83#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 5))
84/* Attribute making macros for attribute tables */
85#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
86#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
87#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM)
88#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS)
89
90/* Attribute search APIs */
91extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
92extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
93 insn_byte_t last_pfx,
94 insn_attr_t esc_attr);
95extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
96 insn_byte_t last_pfx,
97 insn_attr_t esc_attr);
98
99/* Attribute checking functions */
100static inline int inat_is_prefix(insn_attr_t attr)
101{
102 return attr & INAT_PFX_MASK;
103}
104
105static inline int inat_is_address_size_prefix(insn_attr_t attr)
106{
107 return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
108}
109
110static inline int inat_is_operand_size_prefix(insn_attr_t attr)
111{
112 return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
113}
114
115static inline int inat_last_prefix_id(insn_attr_t attr)
116{
117 if ((attr & INAT_PFX_MASK) > INAT_LPREFIX_MAX)
118 return 0;
119 else
120 return attr & INAT_PFX_MASK;
121}
122
123static inline int inat_is_escape(insn_attr_t attr)
124{
125 return attr & INAT_ESC_MASK;
126}
127
128static inline int inat_escape_id(insn_attr_t attr)
129{
130 return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
131}
132
133static inline int inat_is_group(insn_attr_t attr)
134{
135 return attr & INAT_GRP_MASK;
136}
137
138static inline int inat_group_id(insn_attr_t attr)
139{
140 return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
141}
142
143static inline int inat_group_common_attribute(insn_attr_t attr)
144{
145 return attr & ~INAT_GRP_MASK;
146}
147
148static inline int inat_has_immediate(insn_attr_t attr)
149{
150 return attr & INAT_IMM_MASK;
151}
152
153static inline int inat_immediate_size(insn_attr_t attr)
154{
155 return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
156}
157
158static inline int inat_is_rex_prefix(insn_attr_t attr)
159{
160 return attr & INAT_REXPFX;
161}
162
163static inline int inat_has_modrm(insn_attr_t attr)
164{
165 return attr & INAT_MODRM;
166}
167
168static inline int inat_is_force64(insn_attr_t attr)
169{
170 return attr & INAT_FORCE64;
171}
172
173static inline int inat_has_second_immediate(insn_attr_t attr)
174{
175 return attr & INAT_SCNDIMM;
176}
177
178static inline int inat_has_moffset(insn_attr_t attr)
179{
180 return attr & INAT_MOFFSET;
181}
182
183static inline int inat_has_variant(insn_attr_t attr)
184{
185 return attr & INAT_VARIANT;
186}
187
188#endif
diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h
new file mode 100644
index 000000000000..cb3c20ce39cf
--- /dev/null
+++ b/arch/x86/include/asm/inat_types.h
@@ -0,0 +1,29 @@
1#ifndef _ASM_X86_INAT_TYPES_H
2#define _ASM_X86_INAT_TYPES_H
3/*
4 * x86 instruction attributes
5 *
6 * Written by Masami Hiramatsu <mhiramat@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 *
22 */
23
24/* Instruction attributes */
25typedef unsigned int insn_attr_t;
26typedef unsigned char insn_byte_t;
27typedef signed int insn_value_t;
28
29#endif
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
new file mode 100644
index 000000000000..12b4e3751d3f
--- /dev/null
+++ b/arch/x86/include/asm/insn.h
@@ -0,0 +1,143 @@
1#ifndef _ASM_X86_INSN_H
2#define _ASM_X86_INSN_H
3/*
4 * x86 instruction analysis
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 * Copyright (C) IBM Corporation, 2009
21 */
22
23/* insn_attr_t is defined in inat.h */
24#include <asm/inat.h>
25
26struct insn_field {
27 union {
28 insn_value_t value;
29 insn_byte_t bytes[4];
30 };
31 /* !0 if we've run insn_get_xxx() for this field */
32 unsigned char got;
33 unsigned char nbytes;
34};
35
36struct insn {
37 struct insn_field prefixes; /*
38 * Prefixes
39 * prefixes.bytes[3]: last prefix
40 */
41 struct insn_field rex_prefix; /* REX prefix */
42 struct insn_field opcode; /*
43 * opcode.bytes[0]: opcode1
44 * opcode.bytes[1]: opcode2
45 * opcode.bytes[2]: opcode3
46 */
47 struct insn_field modrm;
48 struct insn_field sib;
49 struct insn_field displacement;
50 union {
51 struct insn_field immediate;
52 struct insn_field moffset1; /* for 64bit MOV */
53 struct insn_field immediate1; /* for 64bit imm or off16/32 */
54 };
55 union {
56 struct insn_field moffset2; /* for 64bit MOV */
57 struct insn_field immediate2; /* for 64bit imm or seg16 */
58 };
59
60 insn_attr_t attr;
61 unsigned char opnd_bytes;
62 unsigned char addr_bytes;
63 unsigned char length;
64 unsigned char x86_64;
65
66 const insn_byte_t *kaddr; /* kernel address of insn to analyze */
67 const insn_byte_t *next_byte;
68};
69
70#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
71#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
72#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
73
74#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
75#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
76#define X86_SIB_BASE(sib) ((sib) & 0x07)
77
78#define X86_REX_W(rex) ((rex) & 8)
79#define X86_REX_R(rex) ((rex) & 4)
80#define X86_REX_X(rex) ((rex) & 2)
81#define X86_REX_B(rex) ((rex) & 1)
82
83/* The last prefix is needed for two-byte and three-byte opcodes */
84static inline insn_byte_t insn_last_prefix(struct insn *insn)
85{
86 return insn->prefixes.bytes[3];
87}
88
89extern void insn_init(struct insn *insn, const void *kaddr, int x86_64);
90extern void insn_get_prefixes(struct insn *insn);
91extern void insn_get_opcode(struct insn *insn);
92extern void insn_get_modrm(struct insn *insn);
93extern void insn_get_sib(struct insn *insn);
94extern void insn_get_displacement(struct insn *insn);
95extern void insn_get_immediate(struct insn *insn);
96extern void insn_get_length(struct insn *insn);
97
98/* Attribute will be determined after getting ModRM (for opcode groups) */
99static inline void insn_get_attribute(struct insn *insn)
100{
101 insn_get_modrm(insn);
102}
103
104/* Instruction uses RIP-relative addressing */
105extern int insn_rip_relative(struct insn *insn);
106
107/* Init insn for kernel text */
108static inline void kernel_insn_init(struct insn *insn, const void *kaddr)
109{
110#ifdef CONFIG_X86_64
111 insn_init(insn, kaddr, 1);
112#else /* CONFIG_X86_32 */
113 insn_init(insn, kaddr, 0);
114#endif
115}
116
117/* Offset of each field from kaddr */
118static inline int insn_offset_rex_prefix(struct insn *insn)
119{
120 return insn->prefixes.nbytes;
121}
122static inline int insn_offset_opcode(struct insn *insn)
123{
124 return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
125}
126static inline int insn_offset_modrm(struct insn *insn)
127{
128 return insn_offset_opcode(insn) + insn->opcode.nbytes;
129}
130static inline int insn_offset_sib(struct insn *insn)
131{
132 return insn_offset_modrm(insn) + insn->modrm.nbytes;
133}
134static inline int insn_offset_displacement(struct insn *insn)
135{
136 return insn_offset_sib(insn) + insn->sib.nbytes;
137}
138static inline int insn_offset_immediate(struct insn *insn)
139{
140 return insn_offset_displacement(insn) + insn->displacement.nbytes;
141}
142
143#endif /* _ASM_X86_INSN_H */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 07c31899c9c2..c77f8a7c531d 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -2,12 +2,25 @@
2# Makefile for x86 specific library files. 2# Makefile for x86 specific library files.
3# 3#
4 4
5inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
6inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
7quiet_cmd_inat_tables = GEN $@
8 cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@
9
10$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
11 $(call cmd,inat_tables)
12
13$(obj)/inat.o: $(obj)/inat-tables.c
14
15clean-files := inat-tables.c
16
5obj-$(CONFIG_SMP) := msr.o 17obj-$(CONFIG_SMP) := msr.o
6 18
7lib-y := delay.o 19lib-y := delay.o
8lib-y += thunk_$(BITS).o 20lib-y += thunk_$(BITS).o
9lib-y += usercopy_$(BITS).o getuser.o putuser.o 21lib-y += usercopy_$(BITS).o getuser.o putuser.o
10lib-y += memcpy_$(BITS).o 22lib-y += memcpy_$(BITS).o
23lib-y += insn.o inat.o
11 24
12ifeq ($(CONFIG_X86_32),y) 25ifeq ($(CONFIG_X86_32),y)
13 obj-y += atomic64_32.o 26 obj-y += atomic64_32.o
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
new file mode 100644
index 000000000000..054656a01dfd
--- /dev/null
+++ b/arch/x86/lib/inat.c
@@ -0,0 +1,78 @@
1/*
2 * x86 instruction attribute tables
3 *
4 * Written by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 */
21#include <asm/insn.h>
22
23/* Attribute tables are generated from opcode map */
24#include "inat-tables.c"
25
26/* Attribute search APIs */
27insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
28{
29 return inat_primary_table[opcode];
30}
31
32insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx,
33 insn_attr_t esc_attr)
34{
35 const insn_attr_t *table;
36 insn_attr_t lpfx_attr;
37 int n, m = 0;
38
39 n = inat_escape_id(esc_attr);
40 if (last_pfx) {
41 lpfx_attr = inat_get_opcode_attribute(last_pfx);
42 m = inat_last_prefix_id(lpfx_attr);
43 }
44 table = inat_escape_tables[n][0];
45 if (!table)
46 return 0;
47 if (inat_has_variant(table[opcode]) && m) {
48 table = inat_escape_tables[n][m];
49 if (!table)
50 return 0;
51 }
52 return table[opcode];
53}
54
55insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx,
56 insn_attr_t grp_attr)
57{
58 const insn_attr_t *table;
59 insn_attr_t lpfx_attr;
60 int n, m = 0;
61
62 n = inat_group_id(grp_attr);
63 if (last_pfx) {
64 lpfx_attr = inat_get_opcode_attribute(last_pfx);
65 m = inat_last_prefix_id(lpfx_attr);
66 }
67 table = inat_group_tables[n][0];
68 if (!table)
69 return inat_group_common_attribute(grp_attr);
70 if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) {
71 table = inat_escape_tables[n][m];
72 if (!table)
73 return inat_group_common_attribute(grp_attr);
74 }
75 return table[X86_MODRM_REG(modrm)] |
76 inat_group_common_attribute(grp_attr);
77}
78
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
new file mode 100644
index 000000000000..dfd56a30053f
--- /dev/null
+++ b/arch/x86/lib/insn.c
@@ -0,0 +1,464 @@
1/*
2 * x86 instruction analysis
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2002, 2004, 2009
19 */
20
21#include <linux/string.h>
22#include <asm/inat.h>
23#include <asm/insn.h>
24
25#define get_next(t, insn) \
26 ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
27
28#define peek_next(t, insn) \
29 ({t r; r = *(t*)insn->next_byte; r; })
30
31/**
32 * insn_init() - initialize struct insn
33 * @insn: &struct insn to be initialized
34 * @kaddr: address (in kernel memory) of instruction (or copy thereof)
35 * @x86_64: !0 for 64-bit kernel or 64-bit app
36 */
37void insn_init(struct insn *insn, const void *kaddr, int x86_64)
38{
39 memset(insn, 0, sizeof(*insn));
40 insn->kaddr = kaddr;
41 insn->next_byte = kaddr;
42 insn->x86_64 = x86_64 ? 1 : 0;
43 insn->opnd_bytes = 4;
44 if (x86_64)
45 insn->addr_bytes = 8;
46 else
47 insn->addr_bytes = 4;
48}
49
50/**
51 * insn_get_prefixes - scan x86 instruction prefix bytes
52 * @insn: &struct insn containing instruction
53 *
54 * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
55 * to point to the (first) opcode. No effect if @insn->prefixes.got
56 * is already set.
57 */
58void insn_get_prefixes(struct insn *insn)
59{
60 struct insn_field *prefixes = &insn->prefixes;
61 insn_attr_t attr;
62 insn_byte_t b, lb;
63 int i, nb;
64
65 if (prefixes->got)
66 return;
67
68 nb = 0;
69 lb = 0;
70 b = peek_next(insn_byte_t, insn);
71 attr = inat_get_opcode_attribute(b);
72 while (inat_is_prefix(attr)) {
73 /* Skip if same prefix */
74 for (i = 0; i < nb; i++)
75 if (prefixes->bytes[i] == b)
76 goto found;
77 if (nb == 4)
78 /* Invalid instruction */
79 break;
80 prefixes->bytes[nb++] = b;
81 if (inat_is_address_size_prefix(attr)) {
82 /* address size switches 2/4 or 4/8 */
83 if (insn->x86_64)
84 insn->addr_bytes ^= 12;
85 else
86 insn->addr_bytes ^= 6;
87 } else if (inat_is_operand_size_prefix(attr)) {
88 /* oprand size switches 2/4 */
89 insn->opnd_bytes ^= 6;
90 }
91found:
92 prefixes->nbytes++;
93 insn->next_byte++;
94 lb = b;
95 b = peek_next(insn_byte_t, insn);
96 attr = inat_get_opcode_attribute(b);
97 }
98 /* Set the last prefix */
99 if (lb && lb != insn->prefixes.bytes[3]) {
100 if (unlikely(insn->prefixes.bytes[3])) {
101 /* Swap the last prefix */
102 b = insn->prefixes.bytes[3];
103 for (i = 0; i < nb; i++)
104 if (prefixes->bytes[i] == lb)
105 prefixes->bytes[i] = b;
106 }
107 insn->prefixes.bytes[3] = lb;
108 }
109
110 if (insn->x86_64) {
111 b = peek_next(insn_byte_t, insn);
112 attr = inat_get_opcode_attribute(b);
113 if (inat_is_rex_prefix(attr)) {
114 insn->rex_prefix.value = b;
115 insn->rex_prefix.nbytes = 1;
116 insn->next_byte++;
117 if (X86_REX_W(b))
118 /* REX.W overrides opnd_size */
119 insn->opnd_bytes = 8;
120 }
121 }
122 insn->rex_prefix.got = 1;
123 prefixes->got = 1;
124 return;
125}
126
127/**
128 * insn_get_opcode - collect opcode(s)
129 * @insn: &struct insn containing instruction
130 *
131 * Populates @insn->opcode, updates @insn->next_byte to point past the
132 * opcode byte(s), and set @insn->attr (except for groups).
133 * If necessary, first collects any preceding (prefix) bytes.
134 * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
135 * is already 1.
136 */
137void insn_get_opcode(struct insn *insn)
138{
139 struct insn_field *opcode = &insn->opcode;
140 insn_byte_t op, pfx;
141 if (opcode->got)
142 return;
143 if (!insn->prefixes.got)
144 insn_get_prefixes(insn);
145
146 /* Get first opcode */
147 op = get_next(insn_byte_t, insn);
148 opcode->bytes[0] = op;
149 opcode->nbytes = 1;
150 insn->attr = inat_get_opcode_attribute(op);
151 while (inat_is_escape(insn->attr)) {
152 /* Get escaped opcode */
153 op = get_next(insn_byte_t, insn);
154 opcode->bytes[opcode->nbytes++] = op;
155 pfx = insn_last_prefix(insn);
156 insn->attr = inat_get_escape_attribute(op, pfx, insn->attr);
157 }
158 opcode->got = 1;
159}
160
161/**
162 * insn_get_modrm - collect ModRM byte, if any
163 * @insn: &struct insn containing instruction
164 *
165 * Populates @insn->modrm and updates @insn->next_byte to point past the
166 * ModRM byte, if any. If necessary, first collects the preceding bytes
167 * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
168 */
169void insn_get_modrm(struct insn *insn)
170{
171 struct insn_field *modrm = &insn->modrm;
172 insn_byte_t pfx, mod;
173 if (modrm->got)
174 return;
175 if (!insn->opcode.got)
176 insn_get_opcode(insn);
177
178 if (inat_has_modrm(insn->attr)) {
179 mod = get_next(insn_byte_t, insn);
180 modrm->value = mod;
181 modrm->nbytes = 1;
182 if (inat_is_group(insn->attr)) {
183 pfx = insn_last_prefix(insn);
184 insn->attr = inat_get_group_attribute(mod, pfx,
185 insn->attr);
186 }
187 }
188
189 if (insn->x86_64 && inat_is_force64(insn->attr))
190 insn->opnd_bytes = 8;
191 modrm->got = 1;
192}
193
194
195/**
196 * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
197 * @insn: &struct insn containing instruction
198 *
199 * If necessary, first collects the instruction up to and including the
200 * ModRM byte. No effect if @insn->x86_64 is 0.
201 */
202int insn_rip_relative(struct insn *insn)
203{
204 struct insn_field *modrm = &insn->modrm;
205
206 if (!insn->x86_64)
207 return 0;
208 if (!modrm->got)
209 insn_get_modrm(insn);
210 /*
211 * For rip-relative instructions, the mod field (top 2 bits)
212 * is zero and the r/m field (bottom 3 bits) is 0x5.
213 */
214 return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
215}
216
217/**
218 * insn_get_sib() - Get the SIB byte of instruction
219 * @insn: &struct insn containing instruction
220 *
221 * If necessary, first collects the instruction up to and including the
222 * ModRM byte.
223 */
224void insn_get_sib(struct insn *insn)
225{
226 insn_byte_t modrm;
227
228 if (insn->sib.got)
229 return;
230 if (!insn->modrm.got)
231 insn_get_modrm(insn);
232 if (insn->modrm.nbytes) {
233 modrm = (insn_byte_t)insn->modrm.value;
234 if (insn->addr_bytes != 2 &&
235 X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
236 insn->sib.value = get_next(insn_byte_t, insn);
237 insn->sib.nbytes = 1;
238 }
239 }
240 insn->sib.got = 1;
241}
242
243
244/**
245 * insn_get_displacement() - Get the displacement of instruction
246 * @insn: &struct insn containing instruction
247 *
248 * If necessary, first collects the instruction up to and including the
249 * SIB byte.
250 * Displacement value is sign-expanded.
251 */
252void insn_get_displacement(struct insn *insn)
253{
254 insn_byte_t mod, rm, base;
255
256 if (insn->displacement.got)
257 return;
258 if (!insn->sib.got)
259 insn_get_sib(insn);
260 if (insn->modrm.nbytes) {
261 /*
262 * Interpreting the modrm byte:
263 * mod = 00 - no displacement fields (exceptions below)
264 * mod = 01 - 1-byte displacement field
265 * mod = 10 - displacement field is 4 bytes, or 2 bytes if
266 * address size = 2 (0x67 prefix in 32-bit mode)
267 * mod = 11 - no memory operand
268 *
269 * If address size = 2...
270 * mod = 00, r/m = 110 - displacement field is 2 bytes
271 *
272 * If address size != 2...
273 * mod != 11, r/m = 100 - SIB byte exists
274 * mod = 00, SIB base = 101 - displacement field is 4 bytes
275 * mod = 00, r/m = 101 - rip-relative addressing, displacement
276 * field is 4 bytes
277 */
278 mod = X86_MODRM_MOD(insn->modrm.value);
279 rm = X86_MODRM_RM(insn->modrm.value);
280 base = X86_SIB_BASE(insn->sib.value);
281 if (mod == 3)
282 goto out;
283 if (mod == 1) {
284 insn->displacement.value = get_next(char, insn);
285 insn->displacement.nbytes = 1;
286 } else if (insn->addr_bytes == 2) {
287 if ((mod == 0 && rm == 6) || mod == 2) {
288 insn->displacement.value =
289 get_next(short, insn);
290 insn->displacement.nbytes = 2;
291 }
292 } else {
293 if ((mod == 0 && rm == 5) || mod == 2 ||
294 (mod == 0 && base == 5)) {
295 insn->displacement.value = get_next(int, insn);
296 insn->displacement.nbytes = 4;
297 }
298 }
299 }
300out:
301 insn->displacement.got = 1;
302}
303
304/* Decode moffset16/32/64 */
305static void __get_moffset(struct insn *insn)
306{
307 switch (insn->addr_bytes) {
308 case 2:
309 insn->moffset1.value = get_next(short, insn);
310 insn->moffset1.nbytes = 2;
311 break;
312 case 4:
313 insn->moffset1.value = get_next(int, insn);
314 insn->moffset1.nbytes = 4;
315 break;
316 case 8:
317 insn->moffset1.value = get_next(int, insn);
318 insn->moffset1.nbytes = 4;
319 insn->moffset2.value = get_next(int, insn);
320 insn->moffset2.nbytes = 4;
321 break;
322 }
323 insn->moffset1.got = insn->moffset2.got = 1;
324}
325
326/* Decode imm v32(Iz) */
327static void __get_immv32(struct insn *insn)
328{
329 switch (insn->opnd_bytes) {
330 case 2:
331 insn->immediate.value = get_next(short, insn);
332 insn->immediate.nbytes = 2;
333 break;
334 case 4:
335 case 8:
336 insn->immediate.value = get_next(int, insn);
337 insn->immediate.nbytes = 4;
338 break;
339 }
340}
341
342/* Decode imm v64(Iv/Ov) */
343static void __get_immv(struct insn *insn)
344{
345 switch (insn->opnd_bytes) {
346 case 2:
347 insn->immediate1.value = get_next(short, insn);
348 insn->immediate1.nbytes = 2;
349 break;
350 case 4:
351 insn->immediate1.value = get_next(int, insn);
352 insn->immediate1.nbytes = 4;
353 break;
354 case 8:
355 insn->immediate1.value = get_next(int, insn);
356 insn->immediate1.nbytes = 4;
357 insn->immediate2.value = get_next(int, insn);
358 insn->immediate2.nbytes = 4;
359 break;
360 }
361 insn->immediate1.got = insn->immediate2.got = 1;
362}
363
364/* Decode ptr16:16/32(Ap) */
365static void __get_immptr(struct insn *insn)
366{
367 switch (insn->opnd_bytes) {
368 case 2:
369 insn->immediate1.value = get_next(short, insn);
370 insn->immediate1.nbytes = 2;
371 break;
372 case 4:
373 insn->immediate1.value = get_next(int, insn);
374 insn->immediate1.nbytes = 4;
375 break;
376 case 8:
377 /* ptr16:64 is not exist (no segment) */
378 return;
379 }
380 insn->immediate2.value = get_next(unsigned short, insn);
381 insn->immediate2.nbytes = 2;
382 insn->immediate1.got = insn->immediate2.got = 1;
383}
384
385/**
386 * insn_get_immediate() - Get the immediates of instruction
387 * @insn: &struct insn containing instruction
388 *
389 * If necessary, first collects the instruction up to and including the
390 * displacement bytes.
391 * Basically, most of immediates are sign-expanded. Unsigned-value can be
392 * get by bit masking with ((1 << (nbytes * 8)) - 1)
393 */
394void insn_get_immediate(struct insn *insn)
395{
396 if (insn->immediate.got)
397 return;
398 if (!insn->displacement.got)
399 insn_get_displacement(insn);
400
401 if (inat_has_moffset(insn->attr)) {
402 __get_moffset(insn);
403 goto done;
404 }
405
406 if (!inat_has_immediate(insn->attr))
407 /* no immediates */
408 goto done;
409
410 switch (inat_immediate_size(insn->attr)) {
411 case INAT_IMM_BYTE:
412 insn->immediate.value = get_next(char, insn);
413 insn->immediate.nbytes = 1;
414 break;
415 case INAT_IMM_WORD:
416 insn->immediate.value = get_next(short, insn);
417 insn->immediate.nbytes = 2;
418 break;
419 case INAT_IMM_DWORD:
420 insn->immediate.value = get_next(int, insn);
421 insn->immediate.nbytes = 4;
422 break;
423 case INAT_IMM_QWORD:
424 insn->immediate1.value = get_next(int, insn);
425 insn->immediate1.nbytes = 4;
426 insn->immediate2.value = get_next(int, insn);
427 insn->immediate2.nbytes = 4;
428 break;
429 case INAT_IMM_PTR:
430 __get_immptr(insn);
431 break;
432 case INAT_IMM_VWORD32:
433 __get_immv32(insn);
434 break;
435 case INAT_IMM_VWORD:
436 __get_immv(insn);
437 break;
438 default:
439 break;
440 }
441 if (inat_has_second_immediate(insn->attr)) {
442 insn->immediate2.value = get_next(char, insn);
443 insn->immediate2.nbytes = 1;
444 }
445done:
446 insn->immediate.got = 1;
447}
448
449/**
450 * insn_get_length() - Get the length of instruction
451 * @insn: &struct insn containing instruction
452 *
453 * If necessary, first collects the instruction up to and including the
454 * immediates bytes.
455 */
456void insn_get_length(struct insn *insn)
457{
458 if (insn->length)
459 return;
460 if (!insn->immediate.got)
461 insn_get_immediate(insn);
462 insn->length = (unsigned char)((unsigned long)insn->next_byte
463 - (unsigned long)insn->kaddr);
464}
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
new file mode 100644
index 000000000000..083dd59dd74b
--- /dev/null
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -0,0 +1,719 @@
1# x86 Opcode Maps
2#
3#<Opcode maps>
4# Table: table-name
5# Referrer: escaped-name
6# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
7# (or)
8# opcode: escape # escaped-name
9# EndTable
10#
11#<group maps>
12# GrpTable: GrpXXX
13# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
14# EndTable
15#
16
17Table: one byte opcode
18Referrer:
19# 0x00 - 0x0f
2000: ADD Eb,Gb
2101: ADD Ev,Gv
2202: ADD Gb,Eb
2303: ADD Gv,Ev
2404: ADD AL,Ib
2505: ADD rAX,Iz
2606: PUSH ES (i64)
2707: POP ES (i64)
2808: OR Eb,Gb
2909: OR Ev,Gv
300a: OR Gb,Eb
310b: OR Gv,Ev
320c: OR AL,Ib
330d: OR rAX,Iz
340e: PUSH CS (i64)
350f: escape # 2-byte escape
36# 0x10 - 0x1f
3710: ADC Eb,Gb
3811: ADC Ev,Gv
3912: ADC Gb,Eb
4013: ADC Gv,Ev
4114: ADC AL,Ib
4215: ADC rAX,Iz
4316: PUSH SS (i64)
4417: POP SS (i64)
4518: SBB Eb,Gb
4619: SBB Ev,Gv
471a: SBB Gb,Eb
481b: SBB Gv,Ev
491c: SBB AL,Ib
501d: SBB rAX,Iz
511e: PUSH DS (i64)
521f: POP DS (i64)
53# 0x20 - 0x2f
5420: AND Eb,Gb
5521: AND Ev,Gv
5622: AND Gb,Eb
5723: AND Gv,Ev
5824: AND AL,Ib
5925: AND rAx,Iz
6026: SEG=ES (Prefix)
6127: DAA (i64)
6228: SUB Eb,Gb
6329: SUB Ev,Gv
642a: SUB Gb,Eb
652b: SUB Gv,Ev
662c: SUB AL,Ib
672d: SUB rAX,Iz
682e: SEG=CS (Prefix)
692f: DAS (i64)
70# 0x30 - 0x3f
7130: XOR Eb,Gb
7231: XOR Ev,Gv
7332: XOR Gb,Eb
7433: XOR Gv,Ev
7534: XOR AL,Ib
7635: XOR rAX,Iz
7736: SEG=SS (Prefix)
7837: AAA (i64)
7938: CMP Eb,Gb
8039: CMP Ev,Gv
813a: CMP Gb,Eb
823b: CMP Gv,Ev
833c: CMP AL,Ib
843d: CMP rAX,Iz
853e: SEG=DS (Prefix)
863f: AAS (i64)
87# 0x40 - 0x4f
8840: INC eAX (i64) | REX (o64)
8941: INC eCX (i64) | REX.B (o64)
9042: INC eDX (i64) | REX.X (o64)
9143: INC eBX (i64) | REX.XB (o64)
9244: INC eSP (i64) | REX.R (o64)
9345: INC eBP (i64) | REX.RB (o64)
9446: INC eSI (i64) | REX.RX (o64)
9547: INC eDI (i64) | REX.RXB (o64)
9648: DEC eAX (i64) | REX.W (o64)
9749: DEC eCX (i64) | REX.WB (o64)
984a: DEC eDX (i64) | REX.WX (o64)
994b: DEC eBX (i64) | REX.WXB (o64)
1004c: DEC eSP (i64) | REX.WR (o64)
1014d: DEC eBP (i64) | REX.WRB (o64)
1024e: DEC eSI (i64) | REX.WRX (o64)
1034f: DEC eDI (i64) | REX.WRXB (o64)
104# 0x50 - 0x5f
10550: PUSH rAX/r8 (d64)
10651: PUSH rCX/r9 (d64)
10752: PUSH rDX/r10 (d64)
10853: PUSH rBX/r11 (d64)
10954: PUSH rSP/r12 (d64)
11055: PUSH rBP/r13 (d64)
11156: PUSH rSI/r14 (d64)
11257: PUSH rDI/r15 (d64)
11358: POP rAX/r8 (d64)
11459: POP rCX/r9 (d64)
1155a: POP rDX/r10 (d64)
1165b: POP rBX/r11 (d64)
1175c: POP rSP/r12 (d64)
1185d: POP rBP/r13 (d64)
1195e: POP rSI/r14 (d64)
1205f: POP rDI/r15 (d64)
121# 0x60 - 0x6f
12260: PUSHA/PUSHAD (i64)
12361: POPA/POPAD (i64)
12462: BOUND Gv,Ma (i64)
12563: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
12664: SEG=FS (Prefix)
12765: SEG=GS (Prefix)
12866: Operand-Size (Prefix)
12967: Address-Size (Prefix)
13068: PUSH Iz (d64)
13169: IMUL Gv,Ev,Iz
1326a: PUSH Ib (d64)
1336b: IMUL Gv,Ev,Ib
1346c: INS/INSB Yb,DX
1356d: INS/INSW/INSD Yz,DX
1366e: OUTS/OUTSB DX,Xb
1376f: OUTS/OUTSW/OUTSD DX,Xz
138# 0x70 - 0x7f
13970: JO Jb
14071: JNO Jb
14172: JB/JNAE/JC Jb
14273: JNB/JAE/JNC Jb
14374: JZ/JE Jb
14475: JNZ/JNE Jb
14576: JBE/JNA Jb
14677: JNBE/JA Jb
14778: JS Jb
14879: JNS Jb
1497a: JP/JPE Jb
1507b: JNP/JPO Jb
1517c: JL/JNGE Jb
1527d: JNL/JGE Jb
1537e: JLE/JNG Jb
1547f: JNLE/JG Jb
155# 0x80 - 0x8f
15680: Grp1 Eb,Ib (1A)
15781: Grp1 Ev,Iz (1A)
15882: Grp1 Eb,Ib (1A),(i64)
15983: Grp1 Ev,Ib (1A)
16084: TEST Eb,Gb
16185: TEST Ev,Gv
16286: XCHG Eb,Gb
16387: XCHG Ev,Gv
16488: MOV Eb,Gb
16589: MOV Ev,Gv
1668a: MOV Gb,Eb
1678b: MOV Gv,Ev
1688c: MOV Ev,Sw
1698d: LEA Gv,M
1708e: MOV Sw,Ew
1718f: Grp1A (1A) | POP Ev (d64)
172# 0x90 - 0x9f
17390: NOP | PAUSE (F3) | XCHG r8,rAX
17491: XCHG rCX/r9,rAX
17592: XCHG rDX/r10,rAX
17693: XCHG rBX/r11,rAX
17794: XCHG rSP/r12,rAX
17895: XCHG rBP/r13,rAX
17996: XCHG rSI/r14,rAX
18097: XCHG rDI/r15,rAX
18198: CBW/CWDE/CDQE
18299: CWD/CDQ/CQO
1839a: CALLF Ap (i64)
1849b: FWAIT/WAIT
1859c: PUSHF/D/Q Fv (d64)
1869d: POPF/D/Q Fv (d64)
1879e: SAHF
1889f: LAHF
189# 0xa0 - 0xaf
190a0: MOV AL,Ob
191a1: MOV rAX,Ov
192a2: MOV Ob,AL
193a3: MOV Ov,rAX
194a4: MOVS/B Xb,Yb
195a5: MOVS/W/D/Q Xv,Yv
196a6: CMPS/B Xb,Yb
197a7: CMPS/W/D Xv,Yv
198a8: TEST AL,Ib
199a9: TEST rAX,Iz
200aa: STOS/B Yb,AL
201ab: STOS/W/D/Q Yv,rAX
202ac: LODS/B AL,Xb
203ad: LODS/W/D/Q rAX,Xv
204ae: SCAS/B AL,Yb
205af: SCAS/W/D/Q rAX,Xv
206# 0xb0 - 0xbf
207b0: MOV AL/R8L,Ib
208b1: MOV CL/R9L,Ib
209b2: MOV DL/R10L,Ib
210b3: MOV BL/R11L,Ib
211b4: MOV AH/R12L,Ib
212b5: MOV CH/R13L,Ib
213b6: MOV DH/R14L,Ib
214b7: MOV BH/R15L,Ib
215b8: MOV rAX/r8,Iv
216b9: MOV rCX/r9,Iv
217ba: MOV rDX/r10,Iv
218bb: MOV rBX/r11,Iv
219bc: MOV rSP/r12,Iv
220bd: MOV rBP/r13,Iv
221be: MOV rSI/r14,Iv
222bf: MOV rDI/r15,Iv
223# 0xc0 - 0xcf
224c0: Grp2 Eb,Ib (1A)
225c1: Grp2 Ev,Ib (1A)
226c2: RETN Iw (f64)
227c3: RETN
228c4: LES Gz,Mp (i64)
229c5: LDS Gz,Mp (i64)
230c6: Grp11 Eb,Ib (1A)
231c7: Grp11 Ev,Iz (1A)
232c8: ENTER Iw,Ib
233c9: LEAVE (d64)
234ca: RETF Iw
235cb: RETF
236cc: INT3
237cd: INT Ib
238ce: INTO (i64)
239cf: IRET/D/Q
240# 0xd0 - 0xdf
241d0: Grp2 Eb,1 (1A)
242d1: Grp2 Ev,1 (1A)
243d2: Grp2 Eb,CL (1A)
244d3: Grp2 Ev,CL (1A)
245d4: AAM Ib (i64)
246d5: AAD Ib (i64)
247d6:
248d7: XLAT/XLATB
249d8: ESC
250d9: ESC
251da: ESC
252db: ESC
253dc: ESC
254dd: ESC
255de: ESC
256df: ESC
257# 0xe0 - 0xef
258e0: LOOPNE/LOOPNZ Jb (f64)
259e1: LOOPE/LOOPZ Jb (f64)
260e2: LOOP Jb (f64)
261e3: JrCXZ Jb (f64)
262e4: IN AL,Ib
263e5: IN eAX,Ib
264e6: OUT Ib,AL
265e7: OUT Ib,eAX
266e8: CALL Jz (f64)
267e9: JMP-near Jz (f64)
268ea: JMP-far Ap (i64)
269eb: JMP-short Jb (f64)
270ec: IN AL,DX
271ed: IN eAX,DX
272ee: OUT DX,AL
273ef: OUT DX,eAX
274# 0xf0 - 0xff
275f0: LOCK (Prefix)
276f1:
277f2: REPNE (Prefix)
278f3: REP/REPE (Prefix)
279f4: HLT
280f5: CMC
281f6: Grp3_1 Eb (1A)
282f7: Grp3_2 Ev (1A)
283f8: CLC
284f9: STC
285fa: CLI
286fb: STI
287fc: CLD
288fd: STD
289fe: Grp4 (1A)
290ff: Grp5 (1A)
291EndTable
292
293Table: 2-byte opcode # First Byte is 0x0f
294Referrer: 2-byte escape
295# 0x0f 0x00-0x0f
29600: Grp6 (1A)
29701: Grp7 (1A)
29802: LAR Gv,Ew
29903: LSL Gv,Ew
30004:
30105: SYSCALL (o64)
30206: CLTS
30307: SYSRET (o64)
30408: INVD
30509: WBINVD
3060a:
3070b: UD2 (1B)
3080c:
3090d: NOP Ev
3100e:
3110f:
312# 0x0f 0x10-0x1f
31310:
31411:
31512:
31613:
31714:
31815:
31916:
32017:
32118: Grp16 (1A)
32219:
3231a:
3241b:
3251c:
3261d:
3271e:
3281f: NOP Ev
329# 0x0f 0x20-0x2f
33020: MOV Rd,Cd
33121: MOV Rd,Dd
33222: MOV Cd,Rd
33323: MOV Dd,Rd
33424:
33525:
33626:
33727:
33828: movaps Vps,Wps | movapd Vpd,Wpd (66)
33929: movaps Wps,Vps | movapd Wpd,Vpd (66)
3402a:
3412b:
3422c:
3432d:
3442e:
3452f:
346# 0x0f 0x30-0x3f
34730: WRMSR
34831: RDTSC
34932: RDMSR
35033: RDPMC
35134: SYSENTER
35235: SYSEXIT
35336:
35437: GETSEC
35538: escape # 3-byte escape 1
35639:
3573a: escape # 3-byte escape 2
3583b:
3593c:
3603d:
3613e:
3623f:
363# 0x0f 0x40-0x4f
36440: CMOVO Gv,Ev
36541: CMOVNO Gv,Ev
36642: CMOVB/C/NAE Gv,Ev
36743: CMOVAE/NB/NC Gv,Ev
36844: CMOVE/Z Gv,Ev
36945: CMOVNE/NZ Gv,Ev
37046: CMOVBE/NA Gv,Ev
37147: CMOVA/NBE Gv,Ev
37248: CMOVS Gv,Ev
37349: CMOVNS Gv,Ev
3744a: CMOVP/PE Gv,Ev
3754b: CMOVNP/PO Gv,Ev
3764c: CMOVL/NGE Gv,Ev
3774d: CMOVNL/GE Gv,Ev
3784e: CMOVLE/NG Gv,Ev
3794f: CMOVNLE/G Gv,Ev
380# 0x0f 0x50-0x5f
38150:
38251:
38352:
38453:
38554:
38655:
38756:
38857:
38958:
39059:
3915a:
3925b:
3935c:
3945d:
3955e:
3965f:
397# 0x0f 0x60-0x6f
39860:
39961:
40062:
40163:
40264:
40365:
40466:
40567:
40668:
40769:
4086a:
4096b:
4106c:
4116d:
4126e:
4136f:
414# 0x0f 0x70-0x7f
41570:
41671: Grp12 (1A)
41772: Grp13 (1A)
41873: Grp14 (1A)
41974:
42075:
42176:
42277:
42378: VMREAD Ed/q,Gd/q
42479: VMWRITE Gd/q,Ed/q
4257a:
4267b:
4277c:
4287d:
4297e:
4307f:
431# 0x0f 0x80-0x8f
43280: JO Jz (f64)
43381: JNO Jz (f64)
43482: JB/JNAE/JC Jz (f64)
43583: JNB/JAE/JNC Jz (f64)
43684: JZ/JE Jz (f64)
43785: JNZ/JNE Jz (f64)
43886: JBE/JNA Jz (f64)
43987: JNBE/JA Jz (f64)
44088: JS Jz (f64)
44189: JNS Jz (f64)
4428a: JP/JPE Jz (f64)
4438b: JNP/JPO Jz (f64)
4448c: JL/JNGE Jz (f64)
4458d: JNL/JGE Jz (f64)
4468e: JLE/JNG Jz (f64)
4478f: JNLE/JG Jz (f64)
448# 0x0f 0x90-0x9f
44990: SETO Eb
45091: SETNO Eb
45192: SETB/C/NAE Eb
45293: SETAE/NB/NC Eb
45394: SETE/Z Eb
45495: SETNE/NZ Eb
45596: SETBE/NA Eb
45697: SETA/NBE Eb
45798: SETS Eb
45899: SETNS Eb
4599a: SETP/PE Eb
4609b: SETNP/PO Eb
4619c: SETL/NGE Eb
4629d: SETNL/GE Eb
4639e: SETLE/NG Eb
4649f: SETNLE/G Eb
465# 0x0f 0xa0-0xaf
466a0: PUSH FS (d64)
467a1: POP FS (d64)
468a2: CPUID
469a3: BT Ev,Gv
470a4: SHLD Ev,Gv,Ib
471a5: SHLD Ev,Gv,CL
472a6:
473a7: GrpRNG
474a8: PUSH GS (d64)
475a9: POP GS (d64)
476aa: RSM
477ab: BTS Ev,Gv
478ac: SHRD Ev,Gv,Ib
479ad: SHRD Ev,Gv,CL
480ae: Grp15 (1A),(1C)
481af: IMUL Gv,Ev
482# 0x0f 0xb0-0xbf
483b0: CMPXCHG Eb,Gb
484b1: CMPXCHG Ev,Gv
485b2: LSS Gv,Mp
486b3: BTR Ev,Gv
487b4: LFS Gv,Mp
488b5: LGS Gv,Mp
489b6: MOVZX Gv,Eb
490b7: MOVZX Gv,Ew
491b8: JMPE | POPCNT Gv,Ev (F3)
492b9: Grp10 (1A)
493ba: Grp8 Ev,Ib (1A)
494bb: BTC Ev,Gv
495bc: BSF Gv,Ev
496bd: BSR Gv,Ev
497be: MOVSX Gv,Eb
498bf: MOVSX Gv,Ew
499# 0x0f 0xc0-0xcf
500c0: XADD Eb,Gb
501c1: XADD Ev,Gv
502c2:
503c3: movnti Md/q,Gd/q
504c4:
505c5:
506c6:
507c7: Grp9 (1A)
508c8: BSWAP RAX/EAX/R8/R8D
509c9: BSWAP RCX/ECX/R9/R9D
510ca: BSWAP RDX/EDX/R10/R10D
511cb: BSWAP RBX/EBX/R11/R11D
512cc: BSWAP RSP/ESP/R12/R12D
513cd: BSWAP RBP/EBP/R13/R13D
514ce: BSWAP RSI/ESI/R14/R14D
515cf: BSWAP RDI/EDI/R15/R15D
516# 0x0f 0xd0-0xdf
517d0:
518d1:
519d2:
520d3:
521d4:
522d5:
523d6:
524d7:
525d8:
526d9:
527da:
528db:
529dc:
530dd:
531de:
532df:
533# 0x0f 0xe0-0xef
534e0:
535e1:
536e2:
537e3:
538e4:
539e5:
540e6:
541e7:
542e8:
543e9:
544ea:
545eb:
546ec:
547ed:
548ee:
549ef:
550# 0x0f 0xf0-0xff
551f0:
552f1:
553f2:
554f3:
555f4:
556f5:
557f6:
558f7:
559f8:
560f9:
561fa:
562fb:
563fc:
564fd:
565fe:
566ff:
567EndTable
568
569Table: 3-byte opcode 1
570Referrer: 3-byte escape 1
57180: INVEPT Gd/q,Mdq (66)
57281: INVPID Gd/q,Mdq (66)
573f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2)
574f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2)
575EndTable
576
577Table: 3-byte opcode 2
578Referrer: 3-byte escape 2
579# all opcode is for SSE
580EndTable
581
582GrpTable: Grp1
5830: ADD
5841: OR
5852: ADC
5863: SBB
5874: AND
5885: SUB
5896: XOR
5907: CMP
591EndTable
592
593GrpTable: Grp1A
5940: POP
595EndTable
596
597GrpTable: Grp2
5980: ROL
5991: ROR
6002: RCL
6013: RCR
6024: SHL/SAL
6035: SHR
6046:
6057: SAR
606EndTable
607
608GrpTable: Grp3_1
6090: TEST Eb,Ib
6101:
6112: NOT Eb
6123: NEG Eb
6134: MUL AL,Eb
6145: IMUL AL,Eb
6156: DIV AL,Eb
6167: IDIV AL,Eb
617EndTable
618
619GrpTable: Grp3_2
6200: TEST Ev,Iz
6211:
6222: NOT Ev
6233: NEG Ev
6244: MUL rAX,Ev
6255: IMUL rAX,Ev
6266: DIV rAX,Ev
6277: IDIV rAX,Ev
628EndTable
629
630GrpTable: Grp4
6310: INC Eb
6321: DEC Eb
633EndTable
634
635GrpTable: Grp5
6360: INC Ev
6371: DEC Ev
6382: CALLN Ev (f64)
6393: CALLF Ep
6404: JMPN Ev (f64)
6415: JMPF Ep
6426: PUSH Ev (d64)
6437:
644EndTable
645
646GrpTable: Grp6
6470: SLDT Rv/Mw
6481: STR Rv/Mw
6492: LLDT Ew
6503: LTR Ew
6514: VERR Ew
6525: VERW Ew
653EndTable
654
655GrpTable: Grp7
6560: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
6571: SIDT Ms | MONITOR (000),(11B) | MWAIT (001)
6582: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B)
6593: LIDT Ms
6604: SMSW Mw/Rv
6615:
6626: LMSW Ew
6637: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
664EndTable
665
666GrpTable: Grp8
6674: BT
6685: BTS
6696: BTR
6707: BTC
671EndTable
672
673GrpTable: Grp9
6741: CMPXCHG8B/16B Mq/Mdq
6756: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3)
6767: VMPTRST Mq
677EndTable
678
679GrpTable: Grp10
680EndTable
681
682GrpTable: Grp11
6830: MOV
684EndTable
685
686GrpTable: Grp12
687EndTable
688
689GrpTable: Grp13
690EndTable
691
692GrpTable: Grp14
693EndTable
694
695GrpTable: Grp15
6960: fxsave
6971: fxstor
6982: ldmxcsr
6993: stmxcsr
7004: XSAVE
7015: XRSTOR | lfence (11B)
7026: mfence (11B)
7037: clflush | sfence (11B)
704EndTable
705
706GrpTable: Grp16
7070: prefetch NTA
7081: prefetch T0
7092: prefetch T1
7103: prefetch T2
711EndTable
712
713GrpTable: GrpRNG
7140: xstore-rng
7151: xcrypt-ecb
7162: xcrypt-cbc
7174: xcrypt-cfb
7185: xcrypt-ofb
719EndTable
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
new file mode 100644
index 000000000000..93b62c92d044
--- /dev/null
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -0,0 +1,314 @@
1#!/bin/awk -f
2# gen-insn-attr-x86.awk: Instruction attribute table generator
3# Written by Masami Hiramatsu <mhiramat@redhat.com>
4#
5# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
6
7BEGIN {
8 print "/* x86 opcode map generated from x86-opcode-map.txt */"
9 print "/* Do not change this code. */"
10 ggid = 1
11 geid = 1
12
13 opnd_expr = "^[[:alpha:]]"
14 ext_expr = "^\\("
15 sep_expr = "^\\|$"
16 group_expr = "^Grp[[:alnum:]]+"
17
18 imm_expr = "^[IJAO][[:lower:]]"
19 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
20 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
21 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
22 imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
23 imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
24 imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
25 imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
26 imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
27 imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
28 imm_flag["Ob"] = "INAT_MOFFSET"
29 imm_flag["Ov"] = "INAT_MOFFSET"
30
31 modrm_expr = "^([CDEGMNPQRSUVW][[:lower:]]+|NTA|T[012])"
32 force64_expr = "\\([df]64\\)"
33 rex_expr = "^REX(\\.[XRWB]+)*"
34 fpu_expr = "^ESC" # TODO
35
36 lprefix1_expr = "\\(66\\)"
37 delete lptable1
38 lprefix2_expr = "\\(F2\\)"
39 delete lptable2
40 lprefix3_expr = "\\(F3\\)"
41 delete lptable3
42 max_lprefix = 4
43
44 prefix_expr = "\\(Prefix\\)"
45 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
46 prefix_num["REPNE"] = "INAT_PFX_REPNE"
47 prefix_num["REP/REPE"] = "INAT_PFX_REPE"
48 prefix_num["LOCK"] = "INAT_PFX_LOCK"
49 prefix_num["SEG=CS"] = "INAT_PFX_CS"
50 prefix_num["SEG=DS"] = "INAT_PFX_DS"
51 prefix_num["SEG=ES"] = "INAT_PFX_ES"
52 prefix_num["SEG=FS"] = "INAT_PFX_FS"
53 prefix_num["SEG=GS"] = "INAT_PFX_GS"
54 prefix_num["SEG=SS"] = "INAT_PFX_SS"
55 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
56
57 delete table
58 delete etable
59 delete gtable
60 eid = -1
61 gid = -1
62}
63
64function semantic_error(msg) {
65 print "Semantic error at " NR ": " msg > "/dev/stderr"
66 exit 1
67}
68
69function debug(msg) {
70 print "DEBUG: " msg
71}
72
73function array_size(arr, i,c) {
74 c = 0
75 for (i in arr)
76 c++
77 return c
78}
79
80/^Table:/ {
81 print "/* " $0 " */"
82}
83
84/^Referrer:/ {
85 if (NF == 1) {
86 # primary opcode table
87 tname = "inat_primary_table"
88 eid = -1
89 } else {
90 # escape opcode table
91 ref = ""
92 for (i = 2; i <= NF; i++)
93 ref = ref $i
94 eid = escape[ref]
95 tname = sprintf("inat_escape_table_%d", eid)
96 }
97}
98
99/^GrpTable:/ {
100 print "/* " $0 " */"
101 if (!($2 in group))
102 semantic_error("No group: " $2 )
103 gid = group[$2]
104 tname = "inat_group_table_" gid
105}
106
107function print_table(tbl,name,fmt,n)
108{
109 print "const insn_attr_t " name " = {"
110 for (i = 0; i < n; i++) {
111 id = sprintf(fmt, i)
112 if (tbl[id])
113 print " [" id "] = " tbl[id] ","
114 }
115 print "};"
116}
117
118/^EndTable/ {
119 if (gid != -1) {
120 # print group tables
121 if (array_size(table) != 0) {
122 print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
123 "0x%x", 8)
124 gtable[gid,0] = tname
125 }
126 if (array_size(lptable1) != 0) {
127 print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
128 "0x%x", 8)
129 gtable[gid,1] = tname "_1"
130 }
131 if (array_size(lptable2) != 0) {
132 print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
133 "0x%x", 8)
134 gtable[gid,2] = tname "_2"
135 }
136 if (array_size(lptable3) != 0) {
137 print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
138 "0x%x", 8)
139 gtable[gid,3] = tname "_3"
140 }
141 } else {
142 # print primary/escaped tables
143 if (array_size(table) != 0) {
144 print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
145 "0x%02x", 256)
146 etable[eid,0] = tname
147 }
148 if (array_size(lptable1) != 0) {
149 print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
150 "0x%02x", 256)
151 etable[eid,1] = tname "_1"
152 }
153 if (array_size(lptable2) != 0) {
154 print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
155 "0x%02x", 256)
156 etable[eid,2] = tname "_2"
157 }
158 if (array_size(lptable3) != 0) {
159 print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
160 "0x%02x", 256)
161 etable[eid,3] = tname "_3"
162 }
163 }
164 print ""
165 delete table
166 delete lptable1
167 delete lptable2
168 delete lptable3
169 gid = -1
170 eid = -1
171}
172
173function add_flags(old,new) {
174 if (old && new)
175 return old " | " new
176 else if (old)
177 return old
178 else
179 return new
180}
181
182# convert operands to flags.
183function convert_operands(opnd, i,imm,mod)
184{
185 imm = null
186 mod = null
187 for (i in opnd) {
188 i = opnd[i]
189 if (match(i, imm_expr) == 1) {
190 if (!imm_flag[i])
191 semantic_error("Unknown imm opnd: " i)
192 if (imm) {
193 if (i != "Ib")
194 semantic_error("Second IMM error")
195 imm = add_flags(imm, "INAT_SCNDIMM")
196 } else
197 imm = imm_flag[i]
198 } else if (match(i, modrm_expr))
199 mod = "INAT_MODRM"
200 }
201 return add_flags(imm, mod)
202}
203
204/^[0-9a-f]+\:/ {
205 if (NR == 1)
206 next
207 # get index
208 idx = "0x" substr($1, 1, index($1,":") - 1)
209 if (idx in table)
210 semantic_error("Redefine " idx " in " tname)
211
212 # check if escaped opcode
213 if ("escape" == $2) {
214 if ($3 != "#")
215 semantic_error("No escaped name")
216 ref = ""
217 for (i = 4; i <= NF; i++)
218 ref = ref $i
219 if (ref in escape)
220 semantic_error("Redefine escape (" ref ")")
221 escape[ref] = geid
222 geid++
223 table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
224 next
225 }
226
227 variant = null
228 # converts
229 i = 2
230 while (i <= NF) {
231 opcode = $(i++)
232 delete opnds
233 ext = null
234 flags = null
235 opnd = null
236 # parse one opcode
237 if (match($i, opnd_expr)) {
238 opnd = $i
239 split($(i++), opnds, ",")
240 flags = convert_operands(opnds)
241 }
242 if (match($i, ext_expr))
243 ext = $(i++)
244 if (match($i, sep_expr))
245 i++
246 else if (i < NF)
247 semantic_error($i " is not a separator")
248
249 # check if group opcode
250 if (match(opcode, group_expr)) {
251 if (!(opcode in group)) {
252 group[opcode] = ggid
253 ggid++
254 }
255 flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
256 }
257 # check force(or default) 64bit
258 if (match(ext, force64_expr))
259 flags = add_flags(flags, "INAT_FORCE64")
260
261 # check REX prefix
262 if (match(opcode, rex_expr))
263 flags = add_flags(flags, "INAT_REXPFX")
264
265 # check coprocessor escape : TODO
266 if (match(opcode, fpu_expr))
267 flags = add_flags(flags, "INAT_MODRM")
268
269 # check prefixes
270 if (match(ext, prefix_expr)) {
271 if (!prefix_num[opcode])
272 semantic_error("Unknown prefix: " opcode)
273 flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
274 }
275 if (length(flags) == 0)
276 continue
277 # check if last prefix
278 if (match(ext, lprefix1_expr)) {
279 lptable1[idx] = add_flags(lptable1[idx],flags)
280 variant = "INAT_VARIANT"
281 } else if (match(ext, lprefix2_expr)) {
282 lptable2[idx] = add_flags(lptable2[idx],flags)
283 variant = "INAT_VARIANT"
284 } else if (match(ext, lprefix3_expr)) {
285 lptable3[idx] = add_flags(lptable3[idx],flags)
286 variant = "INAT_VARIANT"
287 } else {
288 table[idx] = add_flags(table[idx],flags)
289 }
290 }
291 if (variant)
292 table[idx] = add_flags(table[idx],variant)
293}
294
295END {
296 # print escape opcode map's array
297 print "/* Escape opcode map array */"
298 print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \
299 "[INAT_LPREFIX_MAX + 1] = {"
300 for (i = 0; i < geid; i++)
301 for (j = 0; j < max_lprefix; j++)
302 if (etable[i,j])
303 print " ["i"]["j"] = "etable[i,j]","
304 print "};\n"
305 # print group opcode map's array
306 print "/* Group opcode map array */"
307 print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\
308 "[INAT_LPREFIX_MAX + 1] = {"
309 for (i = 0; i < ggid; i++)
310 for (j = 0; j < max_lprefix; j++)
311 if (gtable[i,j])
312 print " ["i"]["j"] = "gtable[i,j]","
313 print "};"
314}