x86: Instruction decoder API

Add x86 instruction decoder to arch-specific libraries. This decoder can decode x86 instructions used in kernel into prefix, opcode, modrm, sib, displacement and immediates. This can also show the length of instructions. This version introduces instruction attributes for decoding instructions. The instruction attribute tables are generated from the opcode map file (x86-opcode-map.txt) by the generator script(gen-insn-attr-x86.awk). Currently, the opcode maps are based on opcode maps in Intel(R) 64 and IA-32 Architectures Software Developers Manual Vol.2: Appendix.A, and consist of below two types of opcode tables. 1-byte/2-bytes/3-bytes opcodes, which has 256 elements, are written as below; Table: table-name Referrer: escaped-name opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] (or) opcode: escape # escaped-name EndTable Group opcodes, which has 8 elements, are written as below; GrpTable: GrpXXX reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] EndTable These opcode maps include a few SSE and FP opcodes (for setup), because those opcodes are used in the kernel. Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com> Signed-off-by: Jim Keniston <jkenisto@us.ibm.com> Acked-by: H. Peter Anvin <hpa@zytor.com> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: Avi Kivity <avi@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Frank Ch. Eigler <fche@redhat.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Jason Baron <jbaron@redhat.com> Cc: K.Prasad <prasad@linux.vnet.ibm.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Przemysław Pawełczyk <przemyslaw@pawelczyk.it> Cc: Roland McGrath <roland@redhat.com> Cc: Sam Ravnborg <sam@ravnborg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Vegard Nossum <vegard.nossum@gmail.com> LKML-Reference: <20090813203413.31965.49709.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
author: Masami Hiramatsu <mhiramat@redhat.com> 2009-08-13 16:34:13 -0400
committer: Frederic Weisbecker <fweisbec@gmail.com> 2009-08-26 18:35:56 -0400
commit: eb13296cfaf6c699566473669a96a38a90562384 (patch)
tree: 466c44bf0a747effaf85ec13dbf75ae857449bfd /arch/x86/lib/insn.c
parent: 35dce1a99d010f3d738af4ce1b9b77302fdfe69c (diff)
1 files changed, 464 insertions, 0 deletions
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
new file mode 100644
index 000000000000..dfd56a30053f
--- /dev/null
+++ b/arch/x86/lib/insn.c
@@ -0,0 +1,464 @@
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004, 2009
+ */
+#include <linux/string.h>
+#include <asm/inat.h>
+#include <asm/insn.h>
+#define get_next(t, insn)       \
+        ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+#define peek_next(t, insn)      \
+        ({t r; r = *(t*)insn->next_byte; r; })
+/**
+ * insn_init() - initialize struct insn
+ * @insn:       &struct insn to be initialized
+ * @kaddr:      address (in kernel memory) of instruction (or copy thereof)
+ * @x86_64:     !0 for 64-bit kernel or 64-bit app
+ */
+void insn_init(struct insn *insn, const void *kaddr, int x86_64)
+{
+        memset(insn, 0, sizeof(*insn));
+        insn->kaddr = kaddr;
+        insn->next_byte = kaddr;
+        insn->x86_64 = x86_64 ? 1 : 0;
+        insn->opnd_bytes = 4;
+        if (x86_64)
+                insn->addr_bytes = 8;
+        else
+                insn->addr_bytes = 4;
+}
+/**
+ * insn_get_prefixes - scan x86 instruction prefix bytes
+ * @insn:       &struct insn containing instruction
+ *
+ * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
+ * to point to the (first) opcode.  No effect if @insn->prefixes.got
+ * is already set.
+ */
+void insn_get_prefixes(struct insn *insn)
+{
+        struct insn_field *prefixes = &insn->prefixes;
+        insn_attr_t attr;
+        insn_byte_t b, lb;
+        int i, nb;
+        if (prefixes->got)
+                return;
+        nb = 0;
+        lb = 0;
+        b = peek_next(insn_byte_t, insn);
+        attr = inat_get_opcode_attribute(b);
+        while (inat_is_prefix(attr)) {
+                /* Skip if same prefix */
+                for (i = 0; i < nb; i++)
+                        if (prefixes->bytes[i] == b)
+                                goto found;
+                if (nb == 4)
+                        /* Invalid instruction */
+                        break;
+                prefixes->bytes[nb++] = b;
+                if (inat_is_address_size_prefix(attr)) {
+                        /* address size switches 2/4 or 4/8 */
+                        if (insn->x86_64)
+                                insn->addr_bytes ^= 12;
+                        else
+                                insn->addr_bytes ^= 6;
+                } else if (inat_is_operand_size_prefix(attr)) {
+                        /* oprand size switches 2/4 */
+                        insn->opnd_bytes ^= 6;
+                }
+found:
+                prefixes->nbytes++;
+                insn->next_byte++;
+                lb = b;
+                b = peek_next(insn_byte_t, insn);
+                attr = inat_get_opcode_attribute(b);
+        }
+        /* Set the last prefix */
+        if (lb && lb != insn->prefixes.bytes[3]) {
+                if (unlikely(insn->prefixes.bytes[3])) {
+                        /* Swap the last prefix */
+                        b = insn->prefixes.bytes[3];
+                        for (i = 0; i < nb; i++)
+                                if (prefixes->bytes[i] == lb)
+                                        prefixes->bytes[i] = b;
+                }
+                insn->prefixes.bytes[3] = lb;
+        }
+        if (insn->x86_64) {
+                b = peek_next(insn_byte_t, insn);
+                attr = inat_get_opcode_attribute(b);
+                if (inat_is_rex_prefix(attr)) {
+                        insn->rex_prefix.value = b;
+                        insn->rex_prefix.nbytes = 1;
+                        insn->next_byte++;
+                        if (X86_REX_W(b))
+                                /* REX.W overrides opnd_size */
+                                insn->opnd_bytes = 8;
+                }
+        }
+        insn->rex_prefix.got = 1;
+        prefixes->got = 1;
+        return;
+}
+/**
+ * insn_get_opcode - collect opcode(s)
+ * @insn:       &struct insn containing instruction
+ *
+ * Populates @insn->opcode, updates @insn->next_byte to point past the
+ * opcode byte(s), and set @insn->attr (except for groups).
+ * If necessary, first collects any preceding (prefix) bytes.
+ * Sets @insn->opcode.value = opcode1.  No effect if @insn->opcode.got
+ * is already 1.
+ */
+void insn_get_opcode(struct insn *insn)
+{
+        struct insn_field *opcode = &insn->opcode;
+        insn_byte_t op, pfx;
+        if (opcode->got)
+                return;
+        if (!insn->prefixes.got)
+                insn_get_prefixes(insn);
+        /* Get first opcode */
+        op = get_next(insn_byte_t, insn);
+        opcode->bytes[0] = op;
+        opcode->nbytes = 1;
+        insn->attr = inat_get_opcode_attribute(op);
+        while (inat_is_escape(insn->attr)) {
+                /* Get escaped opcode */
+                op = get_next(insn_byte_t, insn);
+                opcode->bytes[opcode->nbytes++] = op;
+                pfx = insn_last_prefix(insn);
+                insn->attr = inat_get_escape_attribute(op, pfx, insn->attr);
+        }
+        opcode->got = 1;
+}
+/**
+ * insn_get_modrm - collect ModRM byte, if any
+ * @insn:       &struct insn containing instruction
+ *
+ * Populates @insn->modrm and updates @insn->next_byte to point past the
+ * ModRM byte, if any.  If necessary, first collects the preceding bytes
+ * (prefixes and opcode(s)).  No effect if @insn->modrm.got is already 1.
+ */
+void insn_get_modrm(struct insn *insn)
+{
+        struct insn_field *modrm = &insn->modrm;
+        insn_byte_t pfx, mod;
+        if (modrm->got)
+                return;
+        if (!insn->opcode.got)
+                insn_get_opcode(insn);
+        if (inat_has_modrm(insn->attr)) {
+                mod = get_next(insn_byte_t, insn);
+                modrm->value = mod;
+                modrm->nbytes = 1;
+                if (inat_is_group(insn->attr)) {
+                        pfx = insn_last_prefix(insn);
+                        insn->attr = inat_get_group_attribute(mod, pfx,
+                                                              insn->attr);
+                }
+        }
+        if (insn->x86_64 && inat_is_force64(insn->attr))
+                insn->opnd_bytes = 8;
+        modrm->got = 1;
+}
+/**
+ * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
+ * @insn:       &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.  No effect if @insn->x86_64 is 0.
+ */
+int insn_rip_relative(struct insn *insn)
+{
+        struct insn_field *modrm = &insn->modrm;
+        if (!insn->x86_64)
+                return 0;
+        if (!modrm->got)
+                insn_get_modrm(insn);
+        /*
+         * For rip-relative instructions, the mod field (top 2 bits)
+         * is zero and the r/m field (bottom 3 bits) is 0x5.
+         */
+        return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
+}
+/**
+ * insn_get_sib() - Get the SIB byte of instruction
+ * @insn:       &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.
+ */
+void insn_get_sib(struct insn *insn)
+{
+        insn_byte_t modrm;
+        if (insn->sib.got)
+                return;
+        if (!insn->modrm.got)
+                insn_get_modrm(insn);
+        if (insn->modrm.nbytes) {
+                modrm = (insn_byte_t)insn->modrm.value;
+                if (insn->addr_bytes != 2 &&
+                    X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
+                        insn->sib.value = get_next(insn_byte_t, insn);
+                        insn->sib.nbytes = 1;
+                }
+        }
+        insn->sib.got = 1;
+}
+/**
+ * insn_get_displacement() - Get the displacement of instruction
+ * @insn:       &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * SIB byte.
+ * Displacement value is sign-expanded.
+ */
+void insn_get_displacement(struct insn *insn)
+{
+        insn_byte_t mod, rm, base;
+        if (insn->displacement.got)
+                return;
+        if (!insn->sib.got)
+                insn_get_sib(insn);
+        if (insn->modrm.nbytes) {
+                /*
+                 * Interpreting the modrm byte:
+                 * mod = 00 - no displacement fields (exceptions below)
+                 * mod = 01 - 1-byte displacement field
+                 * mod = 10 - displacement field is 4 bytes, or 2 bytes if
+                 *      address size = 2 (0x67 prefix in 32-bit mode)
+                 * mod = 11 - no memory operand
+                 *
+                 * If address size = 2...
+                 * mod = 00, r/m = 110 - displacement field is 2 bytes
+                 *
+                 * If address size != 2...
+                 * mod != 11, r/m = 100 - SIB byte exists
+                 * mod = 00, SIB base = 101 - displacement field is 4 bytes
+                 * mod = 00, r/m = 101 - rip-relative addressing, displacement
+                 *      field is 4 bytes
+                 */
+                mod = X86_MODRM_MOD(insn->modrm.value);
+                rm = X86_MODRM_RM(insn->modrm.value);
+                base = X86_SIB_BASE(insn->sib.value);
+                if (mod == 3)
+                        goto out;
+                if (mod == 1) {
+                        insn->displacement.value = get_next(char, insn);
+                        insn->displacement.nbytes = 1;
+                } else if (insn->addr_bytes == 2) {
+                        if ((mod == 0 && rm == 6) || mod == 2) {
+                                insn->displacement.value =
+                                         get_next(short, insn);
+                                insn->displacement.nbytes = 2;
+                        }
+                } else {
+                        if ((mod == 0 && rm == 5) || mod == 2 ||
+                            (mod == 0 && base == 5)) {
+                                insn->displacement.value = get_next(int, insn);
+                                insn->displacement.nbytes = 4;
+                        }
+                }
+        }
+out:
+        insn->displacement.got = 1;
+}
+/* Decode moffset16/32/64 */
+static void __get_moffset(struct insn *insn)
+{
+        switch (insn->addr_bytes) {
+        case 2:
+                insn->moffset1.value = get_next(short, insn);
+                insn->moffset1.nbytes = 2;
+                break;
+        case 4:
+                insn->moffset1.value = get_next(int, insn);
+                insn->moffset1.nbytes = 4;
+                break;
+        case 8:
+                insn->moffset1.value = get_next(int, insn);
+                insn->moffset1.nbytes = 4;
+                insn->moffset2.value = get_next(int, insn);
+                insn->moffset2.nbytes = 4;
+                break;
+        }
+        insn->moffset1.got = insn->moffset2.got = 1;
+}
+/* Decode imm v32(Iz) */
+static void __get_immv32(struct insn *insn)
+{
+        switch (insn->opnd_bytes) {
+        case 2:
+                insn->immediate.value = get_next(short, insn);
+                insn->immediate.nbytes = 2;
+                break;
+        case 4:
+        case 8:
+                insn->immediate.value = get_next(int, insn);
+                insn->immediate.nbytes = 4;
+                break;
+        }
+}
+/* Decode imm v64(Iv/Ov) */
+static void __get_immv(struct insn *insn)
+{
+        switch (insn->opnd_bytes) {
+        case 2:
+                insn->immediate1.value = get_next(short, insn);
+                insn->immediate1.nbytes = 2;
+                break;
+        case 4:
+                insn->immediate1.value = get_next(int, insn);
+                insn->immediate1.nbytes = 4;
+                break;
+        case 8:
+                insn->immediate1.value = get_next(int, insn);
+                insn->immediate1.nbytes = 4;
+                insn->immediate2.value = get_next(int, insn);
+                insn->immediate2.nbytes = 4;
+                break;
+        }
+        insn->immediate1.got = insn->immediate2.got = 1;
+}
+/* Decode ptr16:16/32(Ap) */
+static void __get_immptr(struct insn *insn)
+{
+        switch (insn->opnd_bytes) {
+        case 2:
+                insn->immediate1.value = get_next(short, insn);
+                insn->immediate1.nbytes = 2;
+                break;
+        case 4:
+                insn->immediate1.value = get_next(int, insn);
+                insn->immediate1.nbytes = 4;
+                break;
+        case 8:
+                /* ptr16:64 is not exist (no segment) */
+                return;
+        }
+        insn->immediate2.value = get_next(unsigned short, insn);
+        insn->immediate2.nbytes = 2;
+        insn->immediate1.got = insn->immediate2.got = 1;
+}
+/**
+ * insn_get_immediate() - Get the immediates of instruction
+ * @insn:       &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * displacement bytes.
+ * Basically, most of immediates are sign-expanded. Unsigned-value can be
+ * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ */
+void insn_get_immediate(struct insn *insn)
+{
+        if (insn->immediate.got)
+                return;
+        if (!insn->displacement.got)
+                insn_get_displacement(insn);
+        if (inat_has_moffset(insn->attr)) {
+                __get_moffset(insn);
+                goto done;
+        }
+        if (!inat_has_immediate(insn->attr))
+                /* no immediates */
+                goto done;
+        switch (inat_immediate_size(insn->attr)) {
+        case INAT_IMM_BYTE:
+                insn->immediate.value = get_next(char, insn);
+                insn->immediate.nbytes = 1;
+                break;
+        case INAT_IMM_WORD:
+                insn->immediate.value = get_next(short, insn);
+                insn->immediate.nbytes = 2;
+                break;
+        case INAT_IMM_DWORD:
+                insn->immediate.value = get_next(int, insn);
+                insn->immediate.nbytes = 4;
+                break;
+        case INAT_IMM_QWORD:
+                insn->immediate1.value = get_next(int, insn);
+                insn->immediate1.nbytes = 4;
+                insn->immediate2.value = get_next(int, insn);
+                insn->immediate2.nbytes = 4;
+                break;
+        case INAT_IMM_PTR:
+                __get_immptr(insn);
+                break;
+        case INAT_IMM_VWORD32:
+                __get_immv32(insn);
+                break;
+        case INAT_IMM_VWORD:
+                __get_immv(insn);
+                break;
+        default:
+                break;
+        }
+        if (inat_has_second_immediate(insn->attr)) {
+                insn->immediate2.value = get_next(char, insn);
+                insn->immediate2.nbytes = 1;
+        }
+done:
+        insn->immediate.got = 1;
+}
+/**
+ * insn_get_length() - Get the length of instruction
+ * @insn:       &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * immediates bytes.
+ */
+void insn_get_length(struct insn *insn)
+{
+        if (insn->length)
+                return;
+        if (!insn->immediate.got)
+                insn_get_immediate(insn);
+        insn->length = (unsigned char)((unsigned long)insn->next_byte
+                                     - (unsigned long)insn->kaddr);
+}
author	Masami Hiramatsu <mhiramat@redhat.com>	2009-08-13 16:34:13 -0400
committer	Frederic Weisbecker <fweisbec@gmail.com>	2009-08-26 18:35:56 -0400
commit	eb13296cfaf6c699566473669a96a38a90562384 (patch)
tree	466c44bf0a747effaf85ec13dbf75ae857449bfd /arch/x86/lib/insn.c
parent	35dce1a99d010f3d738af4ce1b9b77302fdfe69c (diff)

diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c new file mode 100644 index 000000000000..dfd56a30053f --- /dev/null +++ b/arch/x86/lib/insn.c
@@ -0,0 +1,464 @@
	1	/*
	2	* x86 instruction analysis
	3	*
	4	* This program is free software; you can redistribute it and/or modify
	5	* it under the terms of the GNU General Public License as published by
	6	* the Free Software Foundation; either version 2 of the License, or
	7	* (at your option) any later version.
	8	*
	9	* This program is distributed in the hope that it will be useful,
	10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	* GNU General Public License for more details.
	13	*
	14	* You should have received a copy of the GNU General Public License
	15	* along with this program; if not, write to the Free Software
	16	* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	17	*
	18	* Copyright (C) IBM Corporation, 2002, 2004, 2009
	19	*/
	20
	21	#include <linux/string.h>
	22	#include <asm/inat.h>
	23	#include <asm/insn.h>
	24
	25	#define get_next(t, insn) \
	26	({t r; r = (t)insn->next_byte; insn->next_byte += sizeof(t); r; })
	27
	28	#define peek_next(t, insn) \
	29	({t r; r = (t)insn->next_byte; r; })
	30
	31	/**
	32	* insn_init() - initialize struct insn
	33	* @insn: &struct insn to be initialized
	34	* @kaddr: address (in kernel memory) of instruction (or copy thereof)
	35	* @x86_64: !0 for 64-bit kernel or 64-bit app
	36	*/
	37	void insn_init(struct insn insn, const void kaddr, int x86_64)
	38	{
	39	memset(insn, 0, sizeof(*insn));
	40	insn->kaddr = kaddr;
	41	insn->next_byte = kaddr;
	42	insn->x86_64 = x86_64 ? 1 : 0;
	43	insn->opnd_bytes = 4;
	44	if (x86_64)
	45	insn->addr_bytes = 8;
	46	else
	47	insn->addr_bytes = 4;
	48	}
	49
	50	/**
	51	* insn_get_prefixes - scan x86 instruction prefix bytes
	52	* @insn: &struct insn containing instruction
	53	*
	54	* Populates the @insn->prefixes bitmap, and updates @insn->next_byte
	55	* to point to the (first) opcode. No effect if @insn->prefixes.got
	56	* is already set.
	57	*/
	58	void insn_get_prefixes(struct insn *insn)
	59	{
	60	struct insn_field *prefixes = &insn->prefixes;
	61	insn_attr_t attr;
	62	insn_byte_t b, lb;
	63	int i, nb;
	64
	65	if (prefixes->got)
	66	return;
	67
	68	nb = 0;
	69	lb = 0;
	70	b = peek_next(insn_byte_t, insn);
	71	attr = inat_get_opcode_attribute(b);
	72	while (inat_is_prefix(attr)) {
	73	/* Skip if same prefix */
	74	for (i = 0; i < nb; i++)
	75	if (prefixes->bytes[i] == b)
	76	goto found;
	77	if (nb == 4)
	78	/* Invalid instruction */
	79	break;
	80	prefixes->bytes[nb++] = b;
	81	if (inat_is_address_size_prefix(attr)) {
	82	/* address size switches 2/4 or 4/8 */
	83	if (insn->x86_64)
	84	insn->addr_bytes ^= 12;
	85	else
	86	insn->addr_bytes ^= 6;
	87	} else if (inat_is_operand_size_prefix(attr)) {
	88	/* oprand size switches 2/4 */
	89	insn->opnd_bytes ^= 6;
	90	}
	91	found:
	92	prefixes->nbytes++;
	93	insn->next_byte++;
	94	lb = b;
	95	b = peek_next(insn_byte_t, insn);
	96	attr = inat_get_opcode_attribute(b);
	97	}
	98	/* Set the last prefix */
	99	if (lb && lb != insn->prefixes.bytes[3]) {
	100	if (unlikely(insn->prefixes.bytes[3])) {
	101	/* Swap the last prefix */
	102	b = insn->prefixes.bytes[3];
	103	for (i = 0; i < nb; i++)
	104	if (prefixes->bytes[i] == lb)
	105	prefixes->bytes[i] = b;
	106	}
	107	insn->prefixes.bytes[3] = lb;
	108	}
	109
	110	if (insn->x86_64) {
	111	b = peek_next(insn_byte_t, insn);
	112	attr = inat_get_opcode_attribute(b);
	113	if (inat_is_rex_prefix(attr)) {
	114	insn->rex_prefix.value = b;
	115	insn->rex_prefix.nbytes = 1;
	116	insn->next_byte++;
	117	if (X86_REX_W(b))
	118	/* REX.W overrides opnd_size */
	119	insn->opnd_bytes = 8;
	120	}
	121	}
	122	insn->rex_prefix.got = 1;
	123	prefixes->got = 1;
	124	return;
	125	}
	126
	127	/**
	128	* insn_get_opcode - collect opcode(s)
	129	* @insn: &struct insn containing instruction
	130	*
	131	* Populates @insn->opcode, updates @insn->next_byte to point past the
	132	* opcode byte(s), and set @insn->attr (except for groups).
	133	* If necessary, first collects any preceding (prefix) bytes.
	134	* Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
	135	* is already 1.
	136	*/
	137	void insn_get_opcode(struct insn *insn)
	138	{
	139	struct insn_field *opcode = &insn->opcode;
	140	insn_byte_t op, pfx;
	141	if (opcode->got)
	142	return;
	143	if (!insn->prefixes.got)
	144	insn_get_prefixes(insn);
	145
	146	/* Get first opcode */
	147	op = get_next(insn_byte_t, insn);
	148	opcode->bytes[0] = op;
	149	opcode->nbytes = 1;
	150	insn->attr = inat_get_opcode_attribute(op);
	151	while (inat_is_escape(insn->attr)) {
	152	/* Get escaped opcode */
	153	op = get_next(insn_byte_t, insn);
	154	opcode->bytes[opcode->nbytes++] = op;
	155	pfx = insn_last_prefix(insn);
	156	insn->attr = inat_get_escape_attribute(op, pfx, insn->attr);
	157	}
	158	opcode->got = 1;
	159	}
	160
	161	/**
	162	* insn_get_modrm - collect ModRM byte, if any
	163	* @insn: &struct insn containing instruction
	164	*
	165	* Populates @insn->modrm and updates @insn->next_byte to point past the
	166	* ModRM byte, if any. If necessary, first collects the preceding bytes
	167	* (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
	168	*/
	169	void insn_get_modrm(struct insn *insn)
	170	{
	171	struct insn_field *modrm = &insn->modrm;
	172	insn_byte_t pfx, mod;
	173	if (modrm->got)
	174	return;
	175	if (!insn->opcode.got)
	176	insn_get_opcode(insn);
	177
	178	if (inat_has_modrm(insn->attr)) {
	179	mod = get_next(insn_byte_t, insn);
	180	modrm->value = mod;
	181	modrm->nbytes = 1;
	182	if (inat_is_group(insn->attr)) {
	183	pfx = insn_last_prefix(insn);
	184	insn->attr = inat_get_group_attribute(mod, pfx,
	185	insn->attr);
	186	}
	187	}
	188
	189	if (insn->x86_64 && inat_is_force64(insn->attr))
	190	insn->opnd_bytes = 8;
	191	modrm->got = 1;
	192	}
	193
	194
	195	/**
	196	* insn_rip_relative() - Does instruction use RIP-relative addressing mode?
	197	* @insn: &struct insn containing instruction
	198	*
	199	* If necessary, first collects the instruction up to and including the
	200	* ModRM byte. No effect if @insn->x86_64 is 0.
	201	*/
	202	int insn_rip_relative(struct insn *insn)
	203	{
	204	struct insn_field *modrm = &insn->modrm;
	205
	206	if (!insn->x86_64)
	207	return 0;
	208	if (!modrm->got)
	209	insn_get_modrm(insn);
	210	/*
	211	* For rip-relative instructions, the mod field (top 2 bits)
	212	* is zero and the r/m field (bottom 3 bits) is 0x5.
	213	*/
	214	return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
	215	}
	216
	217	/**
	218	* insn_get_sib() - Get the SIB byte of instruction
	219	* @insn: &struct insn containing instruction
	220	*
	221	* If necessary, first collects the instruction up to and including the
	222	* ModRM byte.
	223	*/
	224	void insn_get_sib(struct insn *insn)
	225	{
	226	insn_byte_t modrm;
	227
	228	if (insn->sib.got)
	229	return;
	230	if (!insn->modrm.got)
	231	insn_get_modrm(insn);
	232	if (insn->modrm.nbytes) {
	233	modrm = (insn_byte_t)insn->modrm.value;
	234	if (insn->addr_bytes != 2 &&
	235	X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
	236	insn->sib.value = get_next(insn_byte_t, insn);
	237	insn->sib.nbytes = 1;
	238	}
	239	}
	240	insn->sib.got = 1;
	241	}
	242
	243
	244	/**
	245	* insn_get_displacement() - Get the displacement of instruction
	246	* @insn: &struct insn containing instruction
	247	*
	248	* If necessary, first collects the instruction up to and including the
	249	* SIB byte.
	250	* Displacement value is sign-expanded.
	251	*/
	252	void insn_get_displacement(struct insn *insn)
	253	{
	254	insn_byte_t mod, rm, base;
	255
	256	if (insn->displacement.got)
	257	return;
	258	if (!insn->sib.got)
	259	insn_get_sib(insn);
	260	if (insn->modrm.nbytes) {
	261	/*
	262	* Interpreting the modrm byte:
	263	* mod = 00 - no displacement fields (exceptions below)
	264	* mod = 01 - 1-byte displacement field
	265	* mod = 10 - displacement field is 4 bytes, or 2 bytes if
	266	* address size = 2 (0x67 prefix in 32-bit mode)
	267	* mod = 11 - no memory operand
	268	*
	269	* If address size = 2...
	270	* mod = 00, r/m = 110 - displacement field is 2 bytes
	271	*
	272	* If address size != 2...
	273	* mod != 11, r/m = 100 - SIB byte exists
	274	* mod = 00, SIB base = 101 - displacement field is 4 bytes
	275	* mod = 00, r/m = 101 - rip-relative addressing, displacement
	276	* field is 4 bytes
	277	*/
	278	mod = X86_MODRM_MOD(insn->modrm.value);
	279	rm = X86_MODRM_RM(insn->modrm.value);
	280	base = X86_SIB_BASE(insn->sib.value);
	281	if (mod == 3)
	282	goto out;
	283	if (mod == 1) {
	284	insn->displacement.value = get_next(char, insn);
	285	insn->displacement.nbytes = 1;
	286	} else if (insn->addr_bytes == 2) {
	287	if ((mod == 0 && rm == 6) \|\| mod == 2) {
	288	insn->displacement.value =
	289	get_next(short, insn);
	290	insn->displacement.nbytes = 2;
	291	}
	292	} else {
	293	if ((mod == 0 && rm == 5) \|\| mod == 2 \|\|
	294	(mod == 0 && base == 5)) {
	295	insn->displacement.value = get_next(int, insn);
	296	insn->displacement.nbytes = 4;
	297	}
	298	}
	299	}
	300	out:
	301	insn->displacement.got = 1;
	302	}
	303
	304	/* Decode moffset16/32/64 */
	305	static void __get_moffset(struct insn *insn)
	306	{
	307	switch (insn->addr_bytes) {
	308	case 2:
	309	insn->moffset1.value = get_next(short, insn);
	310	insn->moffset1.nbytes = 2;
	311	break;
	312	case 4:
	313	insn->moffset1.value = get_next(int, insn);
	314	insn->moffset1.nbytes = 4;
	315	break;
	316	case 8:
	317	insn->moffset1.value = get_next(int, insn);
	318	insn->moffset1.nbytes = 4;
	319	insn->moffset2.value = get_next(int, insn);
	320	insn->moffset2.nbytes = 4;
	321	break;
	322	}
	323	insn->moffset1.got = insn->moffset2.got = 1;
	324	}
	325
	326	/* Decode imm v32(Iz) */
	327	static void __get_immv32(struct insn *insn)
	328	{
	329	switch (insn->opnd_bytes) {
	330	case 2:
	331	insn->immediate.value = get_next(short, insn);
	332	insn->immediate.nbytes = 2;
	333	break;
	334	case 4:
	335	case 8:
	336	insn->immediate.value = get_next(int, insn);
	337	insn->immediate.nbytes = 4;
	338	break;
	339	}
	340	}
	341
	342	/* Decode imm v64(Iv/Ov) */
	343	static void __get_immv(struct insn *insn)
	344	{
	345	switch (insn->opnd_bytes) {
	346	case 2:
	347	insn->immediate1.value = get_next(short, insn);
	348	insn->immediate1.nbytes = 2;
	349	break;
	350	case 4:
	351	insn->immediate1.value = get_next(int, insn);
	352	insn->immediate1.nbytes = 4;
	353	break;
	354	case 8:
	355	insn->immediate1.value = get_next(int, insn);
	356	insn->immediate1.nbytes = 4;
	357	insn->immediate2.value = get_next(int, insn);
	358	insn->immediate2.nbytes = 4;
	359	break;
	360	}
	361	insn->immediate1.got = insn->immediate2.got = 1;
	362	}
	363
	364	/* Decode ptr16:16/32(Ap) */
	365	static void __get_immptr(struct insn *insn)
	366	{
	367	switch (insn->opnd_bytes) {
	368	case 2:
	369	insn->immediate1.value = get_next(short, insn);
	370	insn->immediate1.nbytes = 2;
	371	break;
	372	case 4:
	373	insn->immediate1.value = get_next(int, insn);
	374	insn->immediate1.nbytes = 4;
	375	break;
	376	case 8:
	377	/* ptr16:64 is not exist (no segment) */
	378	return;
	379	}
	380	insn->immediate2.value = get_next(unsigned short, insn);
	381	insn->immediate2.nbytes = 2;
	382	insn->immediate1.got = insn->immediate2.got = 1;
	383	}
	384
	385	/**
	386	* insn_get_immediate() - Get the immediates of instruction
	387	* @insn: &struct insn containing instruction
	388	*
	389	* If necessary, first collects the instruction up to and including the
	390	* displacement bytes.
	391	* Basically, most of immediates are sign-expanded. Unsigned-value can be
	392	* get by bit masking with ((1 << (nbytes * 8)) - 1)
	393	*/
	394	void insn_get_immediate(struct insn *insn)
	395	{
	396	if (insn->immediate.got)
	397	return;
	398	if (!insn->displacement.got)
	399	insn_get_displacement(insn);
	400
	401	if (inat_has_moffset(insn->attr)) {
	402	__get_moffset(insn);
	403	goto done;
	404	}
	405
	406	if (!inat_has_immediate(insn->attr))
	407	/* no immediates */
	408	goto done;
	409
	410	switch (inat_immediate_size(insn->attr)) {
	411	case INAT_IMM_BYTE:
	412	insn->immediate.value = get_next(char, insn);
	413	insn->immediate.nbytes = 1;
	414	break;
	415	case INAT_IMM_WORD:
	416	insn->immediate.value = get_next(short, insn);
	417	insn->immediate.nbytes = 2;
	418	break;
	419	case INAT_IMM_DWORD:
	420	insn->immediate.value = get_next(int, insn);
	421	insn->immediate.nbytes = 4;
	422	break;
	423	case INAT_IMM_QWORD:
	424	insn->immediate1.value = get_next(int, insn);
	425	insn->immediate1.nbytes = 4;
	426	insn->immediate2.value = get_next(int, insn);
	427	insn->immediate2.nbytes = 4;
	428	break;
	429	case INAT_IMM_PTR:
	430	__get_immptr(insn);
	431	break;
	432	case INAT_IMM_VWORD32:
	433	__get_immv32(insn);
	434	break;
	435	case INAT_IMM_VWORD:
	436	__get_immv(insn);
	437	break;
	438	default:
	439	break;
	440	}
	441	if (inat_has_second_immediate(insn->attr)) {
	442	insn->immediate2.value = get_next(char, insn);
	443	insn->immediate2.nbytes = 1;
	444	}
	445	done:
	446	insn->immediate.got = 1;
	447	}
	448
	449	/**
	450	* insn_get_length() - Get the length of instruction
	451	* @insn: &struct insn containing instruction
	452	*
	453	* If necessary, first collects the instruction up to and including the
	454	* immediates bytes.
	455	*/
	456	void insn_get_length(struct insn *insn)
	457	{
	458	if (insn->length)
	459	return;
	460	if (!insn->immediate.got)
	461	insn_get_immediate(insn);
	462	insn->length = (unsigned char)((unsigned long)insn->next_byte
	463	- (unsigned long)insn->kaddr);
	464	}