diff options
author | Adrian Hunter <adrian.hunter@intel.com> | 2016-07-20 04:30:36 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2016-07-21 08:37:18 -0400 |
commit | c61f4d5ebaf05fbd90bf43aa2096690b85e34761 (patch) | |
tree | 5edd82fde5b9daa6752e7258a64f23cfde4b50ff | |
parent | 25af37f4e1e0a747824e3713b80d6b97dad28b7c (diff) |
perf tools: Add AVX-512 support to the instruction decoder used by Intel PT
Add support for Intel's AVX-512 instructions to perf tools instruction
decoder used by Intel PT. The kernel's instruction decoder was updated in
a previous patch.
AVX-512 instructions are documented in Intel Architecture Instruction Set
Extensions Programming Reference (February 2016).
AVX-512 instructions are identified by a EVEX prefix which, for the purpose
of instruction decoding, can be treated as though it were a 4-byte VEX
prefix.
Existing instructions which can now accept an EVEX prefix need not be
further annotated in the op code map (x86-opcode-map.txt). In the case of
new instructions, the op code map is updated accordingly.
Also add associated Mask Instructions that are used to manipulate mask
registers used in AVX-512 instructions.
A representative set of instructions is added to the perf tools new
instructions test in a subsequent patch.
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: X86 ML <x86@kernel.org>
Link: http://lkml.kernel.org/r/1469003437-32706-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r-- | tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk | 11 | ||||
-rw-r--r-- | tools/perf/util/intel-pt-decoder/inat.h | 17 | ||||
-rw-r--r-- | tools/perf/util/intel-pt-decoder/insn.c | 18 | ||||
-rw-r--r-- | tools/perf/util/intel-pt-decoder/insn.h | 12 | ||||
-rw-r--r-- | tools/perf/util/intel-pt-decoder/x86-opcode-map.txt | 263 |
5 files changed, 220 insertions, 101 deletions
diff --git a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk index 517567347aac..54e961659514 100644 --- a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk +++ b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk | |||
@@ -72,12 +72,14 @@ BEGIN { | |||
72 | lprefix_expr = "\\((66|F2|F3)\\)" | 72 | lprefix_expr = "\\((66|F2|F3)\\)" |
73 | max_lprefix = 4 | 73 | max_lprefix = 4 |
74 | 74 | ||
75 | # All opcodes starting with lower-case 'v' or with (v1) superscript | 75 | # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript |
76 | # accepts VEX prefix | 76 | # accepts VEX prefix |
77 | vexok_opcode_expr = "^v.*" | 77 | vexok_opcode_expr = "^[vk].*" |
78 | vexok_expr = "\\(v1\\)" | 78 | vexok_expr = "\\(v1\\)" |
79 | # All opcodes with (v) superscript supports *only* VEX prefix | 79 | # All opcodes with (v) superscript supports *only* VEX prefix |
80 | vexonly_expr = "\\(v\\)" | 80 | vexonly_expr = "\\(v\\)" |
81 | # All opcodes with (ev) superscript supports *only* EVEX prefix | ||
82 | evexonly_expr = "\\(ev\\)" | ||
81 | 83 | ||
82 | prefix_expr = "\\(Prefix\\)" | 84 | prefix_expr = "\\(Prefix\\)" |
83 | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" | 85 | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" |
@@ -95,6 +97,7 @@ BEGIN { | |||
95 | prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" | 97 | prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" |
96 | prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" | 98 | prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" |
97 | prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" | 99 | prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" |
100 | prefix_num["EVEX"] = "INAT_PFX_EVEX" | ||
98 | 101 | ||
99 | clear_vars() | 102 | clear_vars() |
100 | } | 103 | } |
@@ -319,7 +322,9 @@ function convert_operands(count,opnd, i,j,imm,mod) | |||
319 | flags = add_flags(flags, "INAT_MODRM") | 322 | flags = add_flags(flags, "INAT_MODRM") |
320 | 323 | ||
321 | # check VEX codes | 324 | # check VEX codes |
322 | if (match(ext, vexonly_expr)) | 325 | if (match(ext, evexonly_expr)) |
326 | flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY") | ||
327 | else if (match(ext, vexonly_expr)) | ||
323 | flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") | 328 | flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") |
324 | else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) | 329 | else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) |
325 | flags = add_flags(flags, "INAT_VEXOK") | 330 | flags = add_flags(flags, "INAT_VEXOK") |
diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h index 611645e903a8..125ecd2a300d 100644 --- a/tools/perf/util/intel-pt-decoder/inat.h +++ b/tools/perf/util/intel-pt-decoder/inat.h | |||
@@ -48,6 +48,7 @@ | |||
48 | /* AVX VEX prefixes */ | 48 | /* AVX VEX prefixes */ |
49 | #define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ | 49 | #define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ |
50 | #define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ | 50 | #define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ |
51 | #define INAT_PFX_EVEX 15 /* EVEX prefix */ | ||
51 | 52 | ||
52 | #define INAT_LSTPFX_MAX 3 | 53 | #define INAT_LSTPFX_MAX 3 |
53 | #define INAT_LGCPFX_MAX 11 | 54 | #define INAT_LGCPFX_MAX 11 |
@@ -89,6 +90,7 @@ | |||
89 | #define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) | 90 | #define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) |
90 | #define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) | 91 | #define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) |
91 | #define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) | 92 | #define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) |
93 | #define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7)) | ||
92 | /* Attribute making macros for attribute tables */ | 94 | /* Attribute making macros for attribute tables */ |
93 | #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) | 95 | #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) |
94 | #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) | 96 | #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) |
@@ -141,7 +143,13 @@ static inline int inat_last_prefix_id(insn_attr_t attr) | |||
141 | static inline int inat_is_vex_prefix(insn_attr_t attr) | 143 | static inline int inat_is_vex_prefix(insn_attr_t attr) |
142 | { | 144 | { |
143 | attr &= INAT_PFX_MASK; | 145 | attr &= INAT_PFX_MASK; |
144 | return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; | 146 | return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 || |
147 | attr == INAT_PFX_EVEX; | ||
148 | } | ||
149 | |||
150 | static inline int inat_is_evex_prefix(insn_attr_t attr) | ||
151 | { | ||
152 | return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX; | ||
145 | } | 153 | } |
146 | 154 | ||
147 | static inline int inat_is_vex3_prefix(insn_attr_t attr) | 155 | static inline int inat_is_vex3_prefix(insn_attr_t attr) |
@@ -216,6 +224,11 @@ static inline int inat_accept_vex(insn_attr_t attr) | |||
216 | 224 | ||
217 | static inline int inat_must_vex(insn_attr_t attr) | 225 | static inline int inat_must_vex(insn_attr_t attr) |
218 | { | 226 | { |
219 | return attr & INAT_VEXONLY; | 227 | return attr & (INAT_VEXONLY | INAT_EVEXONLY); |
228 | } | ||
229 | |||
230 | static inline int inat_must_evex(insn_attr_t attr) | ||
231 | { | ||
232 | return attr & INAT_EVEXONLY; | ||
220 | } | 233 | } |
221 | #endif | 234 | #endif |
diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c index 9f26eae6c9f0..ca983e2bea8b 100644 --- a/tools/perf/util/intel-pt-decoder/insn.c +++ b/tools/perf/util/intel-pt-decoder/insn.c | |||
@@ -155,14 +155,24 @@ found: | |||
155 | /* | 155 | /* |
156 | * In 32-bits mode, if the [7:6] bits (mod bits of | 156 | * In 32-bits mode, if the [7:6] bits (mod bits of |
157 | * ModRM) on the second byte are not 11b, it is | 157 | * ModRM) on the second byte are not 11b, it is |
158 | * LDS or LES. | 158 | * LDS or LES or BOUND. |
159 | */ | 159 | */ |
160 | if (X86_MODRM_MOD(b2) != 3) | 160 | if (X86_MODRM_MOD(b2) != 3) |
161 | goto vex_end; | 161 | goto vex_end; |
162 | } | 162 | } |
163 | insn->vex_prefix.bytes[0] = b; | 163 | insn->vex_prefix.bytes[0] = b; |
164 | insn->vex_prefix.bytes[1] = b2; | 164 | insn->vex_prefix.bytes[1] = b2; |
165 | if (inat_is_vex3_prefix(attr)) { | 165 | if (inat_is_evex_prefix(attr)) { |
166 | b2 = peek_nbyte_next(insn_byte_t, insn, 2); | ||
167 | insn->vex_prefix.bytes[2] = b2; | ||
168 | b2 = peek_nbyte_next(insn_byte_t, insn, 3); | ||
169 | insn->vex_prefix.bytes[3] = b2; | ||
170 | insn->vex_prefix.nbytes = 4; | ||
171 | insn->next_byte += 4; | ||
172 | if (insn->x86_64 && X86_VEX_W(b2)) | ||
173 | /* VEX.W overrides opnd_size */ | ||
174 | insn->opnd_bytes = 8; | ||
175 | } else if (inat_is_vex3_prefix(attr)) { | ||
166 | b2 = peek_nbyte_next(insn_byte_t, insn, 2); | 176 | b2 = peek_nbyte_next(insn_byte_t, insn, 2); |
167 | insn->vex_prefix.bytes[2] = b2; | 177 | insn->vex_prefix.bytes[2] = b2; |
168 | insn->vex_prefix.nbytes = 3; | 178 | insn->vex_prefix.nbytes = 3; |
@@ -221,7 +231,9 @@ void insn_get_opcode(struct insn *insn) | |||
221 | m = insn_vex_m_bits(insn); | 231 | m = insn_vex_m_bits(insn); |
222 | p = insn_vex_p_bits(insn); | 232 | p = insn_vex_p_bits(insn); |
223 | insn->attr = inat_get_avx_attribute(op, m, p); | 233 | insn->attr = inat_get_avx_attribute(op, m, p); |
224 | if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) | 234 | if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || |
235 | (!inat_accept_vex(insn->attr) && | ||
236 | !inat_is_group(insn->attr))) | ||
225 | insn->attr = 0; /* This instruction is bad */ | 237 | insn->attr = 0; /* This instruction is bad */ |
226 | goto end; /* VEX has only 1 byte for opcode */ | 238 | goto end; /* VEX has only 1 byte for opcode */ |
227 | } | 239 | } |
diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h index dd12da0f4593..e23578c7b1be 100644 --- a/tools/perf/util/intel-pt-decoder/insn.h +++ b/tools/perf/util/intel-pt-decoder/insn.h | |||
@@ -91,6 +91,7 @@ struct insn { | |||
91 | #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ | 91 | #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ |
92 | #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ | 92 | #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ |
93 | /* VEX bit fields */ | 93 | /* VEX bit fields */ |
94 | #define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ | ||
94 | #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ | 95 | #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ |
95 | #define X86_VEX2_M 1 /* VEX2.M always 1 */ | 96 | #define X86_VEX2_M 1 /* VEX2.M always 1 */ |
96 | #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ | 97 | #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ |
@@ -133,6 +134,13 @@ static inline int insn_is_avx(struct insn *insn) | |||
133 | return (insn->vex_prefix.value != 0); | 134 | return (insn->vex_prefix.value != 0); |
134 | } | 135 | } |
135 | 136 | ||
137 | static inline int insn_is_evex(struct insn *insn) | ||
138 | { | ||
139 | if (!insn->prefixes.got) | ||
140 | insn_get_prefixes(insn); | ||
141 | return (insn->vex_prefix.nbytes == 4); | ||
142 | } | ||
143 | |||
136 | /* Ensure this instruction is decoded completely */ | 144 | /* Ensure this instruction is decoded completely */ |
137 | static inline int insn_complete(struct insn *insn) | 145 | static inline int insn_complete(struct insn *insn) |
138 | { | 146 | { |
@@ -144,8 +152,10 @@ static inline insn_byte_t insn_vex_m_bits(struct insn *insn) | |||
144 | { | 152 | { |
145 | if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ | 153 | if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ |
146 | return X86_VEX2_M; | 154 | return X86_VEX2_M; |
147 | else | 155 | else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */ |
148 | return X86_VEX3_M(insn->vex_prefix.bytes[1]); | 156 | return X86_VEX3_M(insn->vex_prefix.bytes[1]); |
157 | else /* EVEX */ | ||
158 | return X86_EVEX_M(insn->vex_prefix.bytes[1]); | ||
149 | } | 159 | } |
150 | 160 | ||
151 | static inline insn_byte_t insn_vex_p_bits(struct insn *insn) | 161 | static inline insn_byte_t insn_vex_p_bits(struct insn *insn) |
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt index 28082de46f0d..ec378cd7b71e 100644 --- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt +++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt | |||
@@ -13,12 +13,17 @@ | |||
13 | # opcode: escape # escaped-name | 13 | # opcode: escape # escaped-name |
14 | # EndTable | 14 | # EndTable |
15 | # | 15 | # |
16 | # mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix | ||
17 | # mnemonics that begin with lowercase 'k' accept a VEX prefix | ||
18 | # | ||
16 | #<group maps> | 19 | #<group maps> |
17 | # GrpTable: GrpXXX | 20 | # GrpTable: GrpXXX |
18 | # reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] | 21 | # reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] |
19 | # EndTable | 22 | # EndTable |
20 | # | 23 | # |
21 | # AVX Superscripts | 24 | # AVX Superscripts |
25 | # (ev): this opcode requires EVEX prefix. | ||
26 | # (evo): this opcode is changed by EVEX prefix (EVEX opcode) | ||
22 | # (v): this opcode requires VEX prefix. | 27 | # (v): this opcode requires VEX prefix. |
23 | # (v1): this opcode only supports 128bit VEX. | 28 | # (v1): this opcode only supports 128bit VEX. |
24 | # | 29 | # |
@@ -137,7 +142,7 @@ AVXcode: | |||
137 | # 0x60 - 0x6f | 142 | # 0x60 - 0x6f |
138 | 60: PUSHA/PUSHAD (i64) | 143 | 60: PUSHA/PUSHAD (i64) |
139 | 61: POPA/POPAD (i64) | 144 | 61: POPA/POPAD (i64) |
140 | 62: BOUND Gv,Ma (i64) | 145 | 62: BOUND Gv,Ma (i64) | EVEX (Prefix) |
141 | 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) | 146 | 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) |
142 | 64: SEG=FS (Prefix) | 147 | 64: SEG=FS (Prefix) |
143 | 65: SEG=GS (Prefix) | 148 | 65: SEG=GS (Prefix) |
@@ -399,17 +404,17 @@ AVXcode: 1 | |||
399 | 3f: | 404 | 3f: |
400 | # 0x0f 0x40-0x4f | 405 | # 0x0f 0x40-0x4f |
401 | 40: CMOVO Gv,Ev | 406 | 40: CMOVO Gv,Ev |
402 | 41: CMOVNO Gv,Ev | 407 | 41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66) |
403 | 42: CMOVB/C/NAE Gv,Ev | 408 | 42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66) |
404 | 43: CMOVAE/NB/NC Gv,Ev | 409 | 43: CMOVAE/NB/NC Gv,Ev |
405 | 44: CMOVE/Z Gv,Ev | 410 | 44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66) |
406 | 45: CMOVNE/NZ Gv,Ev | 411 | 45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66) |
407 | 46: CMOVBE/NA Gv,Ev | 412 | 46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66) |
408 | 47: CMOVA/NBE Gv,Ev | 413 | 47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66) |
409 | 48: CMOVS Gv,Ev | 414 | 48: CMOVS Gv,Ev |
410 | 49: CMOVNS Gv,Ev | 415 | 49: CMOVNS Gv,Ev |
411 | 4a: CMOVP/PE Gv,Ev | 416 | 4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66) |
412 | 4b: CMOVNP/PO Gv,Ev | 417 | 4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk |
413 | 4c: CMOVL/NGE Gv,Ev | 418 | 4c: CMOVL/NGE Gv,Ev |
414 | 4d: CMOVNL/GE Gv,Ev | 419 | 4d: CMOVNL/GE Gv,Ev |
415 | 4e: CMOVLE/NG Gv,Ev | 420 | 4e: CMOVLE/NG Gv,Ev |
@@ -426,7 +431,7 @@ AVXcode: 1 | |||
426 | 58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) | 431 | 58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) |
427 | 59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) | 432 | 59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) |
428 | 5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) | 433 | 5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) |
429 | 5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) | 434 | 5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) |
430 | 5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) | 435 | 5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) |
431 | 5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) | 436 | 5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) |
432 | 5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) | 437 | 5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) |
@@ -447,7 +452,7 @@ AVXcode: 1 | |||
447 | 6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) | 452 | 6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) |
448 | 6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) | 453 | 6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) |
449 | 6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) | 454 | 6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) |
450 | 6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) | 455 | 6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev) |
451 | # 0x0f 0x70-0x7f | 456 | # 0x0f 0x70-0x7f |
452 | 70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) | 457 | 70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) |
453 | 71: Grp12 (1A) | 458 | 71: Grp12 (1A) |
@@ -458,14 +463,14 @@ AVXcode: 1 | |||
458 | 76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) | 463 | 76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) |
459 | # Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. | 464 | # Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. |
460 | 77: emms | vzeroupper | vzeroall | 465 | 77: emms | vzeroupper | vzeroall |
461 | 78: VMREAD Ey,Gy | 466 | 78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev) |
462 | 79: VMWRITE Gy,Ey | 467 | 79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev) |
463 | 7a: | 468 | 7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev) |
464 | 7b: | 469 | 7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev) |
465 | 7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) | 470 | 7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) |
466 | 7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) | 471 | 7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) |
467 | 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) | 472 | 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) |
468 | 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) | 473 | 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) |
469 | # 0x0f 0x80-0x8f | 474 | # 0x0f 0x80-0x8f |
470 | # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). | 475 | # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). |
471 | 80: JO Jz (f64) | 476 | 80: JO Jz (f64) |
@@ -485,16 +490,16 @@ AVXcode: 1 | |||
485 | 8e: JLE/JNG Jz (f64) | 490 | 8e: JLE/JNG Jz (f64) |
486 | 8f: JNLE/JG Jz (f64) | 491 | 8f: JNLE/JG Jz (f64) |
487 | # 0x0f 0x90-0x9f | 492 | # 0x0f 0x90-0x9f |
488 | 90: SETO Eb | 493 | 90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) |
489 | 91: SETNO Eb | 494 | 91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) |
490 | 92: SETB/C/NAE Eb | 495 | 92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2) |
491 | 93: SETAE/NB/NC Eb | 496 | 93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2) |
492 | 94: SETE/Z Eb | 497 | 94: SETE/Z Eb |
493 | 95: SETNE/NZ Eb | 498 | 95: SETNE/NZ Eb |
494 | 96: SETBE/NA Eb | 499 | 96: SETBE/NA Eb |
495 | 97: SETA/NBE Eb | 500 | 97: SETA/NBE Eb |
496 | 98: SETS Eb | 501 | 98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66) |
497 | 99: SETNS Eb | 502 | 99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66) |
498 | 9a: SETP/PE Eb | 503 | 9a: SETP/PE Eb |
499 | 9b: SETNP/PO Eb | 504 | 9b: SETNP/PO Eb |
500 | 9c: SETL/NGE Eb | 505 | 9c: SETL/NGE Eb |
@@ -564,11 +569,11 @@ d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) | |||
564 | d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) | 569 | d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) |
565 | d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) | 570 | d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) |
566 | da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) | 571 | da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) |
567 | db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | 572 | db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo) |
568 | dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) | 573 | dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) |
569 | dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) | 574 | dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) |
570 | de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) | 575 | de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) |
571 | df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | 576 | df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo) |
572 | # 0x0f 0xe0-0xef | 577 | # 0x0f 0xe0-0xef |
573 | e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) | 578 | e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) |
574 | e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) | 579 | e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) |
@@ -576,16 +581,16 @@ e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) | |||
576 | e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) | 581 | e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) |
577 | e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) | 582 | e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) |
578 | e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) | 583 | e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) |
579 | e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) | 584 | e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2) |
580 | e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) | 585 | e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) |
581 | e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) | 586 | e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) |
582 | e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) | 587 | e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) |
583 | ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) | 588 | ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) |
584 | eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | 589 | eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo) |
585 | ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) | 590 | ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) |
586 | ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) | 591 | ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) |
587 | ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) | 592 | ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) |
588 | ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | 593 | ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo) |
589 | # 0x0f 0xf0-0xff | 594 | # 0x0f 0xf0-0xff |
590 | f0: vlddqu Vx,Mx (F2) | 595 | f0: vlddqu Vx,Mx (F2) |
591 | f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) | 596 | f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) |
@@ -626,81 +631,105 @@ AVXcode: 2 | |||
626 | 0e: vtestps Vx,Wx (66),(v) | 631 | 0e: vtestps Vx,Wx (66),(v) |
627 | 0f: vtestpd Vx,Wx (66),(v) | 632 | 0f: vtestpd Vx,Wx (66),(v) |
628 | # 0x0f 0x38 0x10-0x1f | 633 | # 0x0f 0x38 0x10-0x1f |
629 | 10: pblendvb Vdq,Wdq (66) | 634 | 10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev) |
630 | 11: | 635 | 11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev) |
631 | 12: | 636 | 12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev) |
632 | 13: vcvtph2ps Vx,Wx (66),(v) | 637 | 13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev) |
633 | 14: blendvps Vdq,Wdq (66) | 638 | 14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo) |
634 | 15: blendvpd Vdq,Wdq (66) | 639 | 15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo) |
635 | 16: vpermps Vqq,Hqq,Wqq (66),(v) | 640 | 16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo) |
636 | 17: vptest Vx,Wx (66) | 641 | 17: vptest Vx,Wx (66) |
637 | 18: vbroadcastss Vx,Wd (66),(v) | 642 | 18: vbroadcastss Vx,Wd (66),(v) |
638 | 19: vbroadcastsd Vqq,Wq (66),(v) | 643 | 19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo) |
639 | 1a: vbroadcastf128 Vqq,Mdq (66),(v) | 644 | 1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo) |
640 | 1b: | 645 | 1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev) |
641 | 1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) | 646 | 1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) |
642 | 1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) | 647 | 1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) |
643 | 1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) | 648 | 1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) |
644 | 1f: | 649 | 1f: vpabsq Vx,Wx (66),(ev) |
645 | # 0x0f 0x38 0x20-0x2f | 650 | # 0x0f 0x38 0x20-0x2f |
646 | 20: vpmovsxbw Vx,Ux/Mq (66),(v1) | 651 | 20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev) |
647 | 21: vpmovsxbd Vx,Ux/Md (66),(v1) | 652 | 21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev) |
648 | 22: vpmovsxbq Vx,Ux/Mw (66),(v1) | 653 | 22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev) |
649 | 23: vpmovsxwd Vx,Ux/Mq (66),(v1) | 654 | 23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev) |
650 | 24: vpmovsxwq Vx,Ux/Md (66),(v1) | 655 | 24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev) |
651 | 25: vpmovsxdq Vx,Ux/Mq (66),(v1) | 656 | 25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev) |
652 | 26: | 657 | 26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev) |
653 | 27: | 658 | 27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev) |
654 | 28: vpmuldq Vx,Hx,Wx (66),(v1) | 659 | 28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev) |
655 | 29: vpcmpeqq Vx,Hx,Wx (66),(v1) | 660 | 29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev) |
656 | 2a: vmovntdqa Vx,Mx (66),(v1) | 661 | 2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev) |
657 | 2b: vpackusdw Vx,Hx,Wx (66),(v1) | 662 | 2b: vpackusdw Vx,Hx,Wx (66),(v1) |
658 | 2c: vmaskmovps Vx,Hx,Mx (66),(v) | 663 | 2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo) |
659 | 2d: vmaskmovpd Vx,Hx,Mx (66),(v) | 664 | 2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo) |
660 | 2e: vmaskmovps Mx,Hx,Vx (66),(v) | 665 | 2e: vmaskmovps Mx,Hx,Vx (66),(v) |
661 | 2f: vmaskmovpd Mx,Hx,Vx (66),(v) | 666 | 2f: vmaskmovpd Mx,Hx,Vx (66),(v) |
662 | # 0x0f 0x38 0x30-0x3f | 667 | # 0x0f 0x38 0x30-0x3f |
663 | 30: vpmovzxbw Vx,Ux/Mq (66),(v1) | 668 | 30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev) |
664 | 31: vpmovzxbd Vx,Ux/Md (66),(v1) | 669 | 31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev) |
665 | 32: vpmovzxbq Vx,Ux/Mw (66),(v1) | 670 | 32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev) |
666 | 33: vpmovzxwd Vx,Ux/Mq (66),(v1) | 671 | 33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev) |
667 | 34: vpmovzxwq Vx,Ux/Md (66),(v1) | 672 | 34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev) |
668 | 35: vpmovzxdq Vx,Ux/Mq (66),(v1) | 673 | 35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev) |
669 | 36: vpermd Vqq,Hqq,Wqq (66),(v) | 674 | 36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo) |
670 | 37: vpcmpgtq Vx,Hx,Wx (66),(v1) | 675 | 37: vpcmpgtq Vx,Hx,Wx (66),(v1) |
671 | 38: vpminsb Vx,Hx,Wx (66),(v1) | 676 | 38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev) |
672 | 39: vpminsd Vx,Hx,Wx (66),(v1) | 677 | 39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev) |
673 | 3a: vpminuw Vx,Hx,Wx (66),(v1) | 678 | 3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev) |
674 | 3b: vpminud Vx,Hx,Wx (66),(v1) | 679 | 3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo) |
675 | 3c: vpmaxsb Vx,Hx,Wx (66),(v1) | 680 | 3c: vpmaxsb Vx,Hx,Wx (66),(v1) |
676 | 3d: vpmaxsd Vx,Hx,Wx (66),(v1) | 681 | 3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo) |
677 | 3e: vpmaxuw Vx,Hx,Wx (66),(v1) | 682 | 3e: vpmaxuw Vx,Hx,Wx (66),(v1) |
678 | 3f: vpmaxud Vx,Hx,Wx (66),(v1) | 683 | 3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo) |
679 | # 0x0f 0x38 0x40-0x8f | 684 | # 0x0f 0x38 0x40-0x8f |
680 | 40: vpmulld Vx,Hx,Wx (66),(v1) | 685 | 40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo) |
681 | 41: vphminposuw Vdq,Wdq (66),(v1) | 686 | 41: vphminposuw Vdq,Wdq (66),(v1) |
682 | 42: | 687 | 42: vgetexpps/d Vx,Wx (66),(ev) |
683 | 43: | 688 | 43: vgetexpss/d Vx,Hx,Wx (66),(ev) |
684 | 44: | 689 | 44: vplzcntd/q Vx,Wx (66),(ev) |
685 | 45: vpsrlvd/q Vx,Hx,Wx (66),(v) | 690 | 45: vpsrlvd/q Vx,Hx,Wx (66),(v) |
686 | 46: vpsravd Vx,Hx,Wx (66),(v) | 691 | 46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) |
687 | 47: vpsllvd/q Vx,Hx,Wx (66),(v) | 692 | 47: vpsllvd/q Vx,Hx,Wx (66),(v) |
688 | # Skip 0x48-0x57 | 693 | # Skip 0x48-0x4b |
694 | 4c: vrcp14ps/d Vpd,Wpd (66),(ev) | ||
695 | 4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) | ||
696 | 4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) | ||
697 | 4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev) | ||
698 | # Skip 0x50-0x57 | ||
689 | 58: vpbroadcastd Vx,Wx (66),(v) | 699 | 58: vpbroadcastd Vx,Wx (66),(v) |
690 | 59: vpbroadcastq Vx,Wx (66),(v) | 700 | 59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) |
691 | 5a: vbroadcasti128 Vqq,Mdq (66),(v) | 701 | 5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) |
692 | # Skip 0x5b-0x77 | 702 | 5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) |
703 | # Skip 0x5c-0x63 | ||
704 | 64: vpblendmd/q Vx,Hx,Wx (66),(ev) | ||
705 | 65: vblendmps/d Vx,Hx,Wx (66),(ev) | ||
706 | 66: vpblendmb/w Vx,Hx,Wx (66),(ev) | ||
707 | # Skip 0x67-0x74 | ||
708 | 75: vpermi2b/w Vx,Hx,Wx (66),(ev) | ||
709 | 76: vpermi2d/q Vx,Hx,Wx (66),(ev) | ||
710 | 77: vpermi2ps/d Vx,Hx,Wx (66),(ev) | ||
693 | 78: vpbroadcastb Vx,Wx (66),(v) | 711 | 78: vpbroadcastb Vx,Wx (66),(v) |
694 | 79: vpbroadcastw Vx,Wx (66),(v) | 712 | 79: vpbroadcastw Vx,Wx (66),(v) |
695 | # Skip 0x7a-0x7f | 713 | 7a: vpbroadcastb Vx,Rv (66),(ev) |
714 | 7b: vpbroadcastw Vx,Rv (66),(ev) | ||
715 | 7c: vpbroadcastd/q Vx,Rv (66),(ev) | ||
716 | 7d: vpermt2b/w Vx,Hx,Wx (66),(ev) | ||
717 | 7e: vpermt2d/q Vx,Hx,Wx (66),(ev) | ||
718 | 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) | ||
696 | 80: INVEPT Gy,Mdq (66) | 719 | 80: INVEPT Gy,Mdq (66) |
697 | 81: INVPID Gy,Mdq (66) | 720 | 81: INVPID Gy,Mdq (66) |
698 | 82: INVPCID Gy,Mdq (66) | 721 | 82: INVPCID Gy,Mdq (66) |
722 | 83: vpmultishiftqb Vx,Hx,Wx (66),(ev) | ||
723 | 88: vexpandps/d Vpd,Wpd (66),(ev) | ||
724 | 89: vpexpandd/q Vx,Wx (66),(ev) | ||
725 | 8a: vcompressps/d Wx,Vx (66),(ev) | ||
726 | 8b: vpcompressd/q Wx,Vx (66),(ev) | ||
699 | 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) | 727 | 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) |
728 | 8d: vpermb/w Vx,Hx,Wx (66),(ev) | ||
700 | 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) | 729 | 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) |
701 | # 0x0f 0x38 0x90-0xbf (FMA) | 730 | # 0x0f 0x38 0x90-0xbf (FMA) |
702 | 90: vgatherdd/q Vx,Hx,Wx (66),(v) | 731 | 90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo) |
703 | 91: vgatherqd/q Vx,Hx,Wx (66),(v) | 732 | 91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo) |
704 | 92: vgatherdps/d Vx,Hx,Wx (66),(v) | 733 | 92: vgatherdps/d Vx,Hx,Wx (66),(v) |
705 | 93: vgatherqps/d Vx,Hx,Wx (66),(v) | 734 | 93: vgatherqps/d Vx,Hx,Wx (66),(v) |
706 | 94: | 735 | 94: |
@@ -715,6 +744,10 @@ AVXcode: 2 | |||
715 | 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) | 744 | 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) |
716 | 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) | 745 | 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) |
717 | 9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) | 746 | 9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) |
747 | a0: vpscatterdd/q Wx,Vx (66),(ev) | ||
748 | a1: vpscatterqd/q Wx,Vx (66),(ev) | ||
749 | a2: vscatterdps/d Wx,Vx (66),(ev) | ||
750 | a3: vscatterqps/d Wx,Vx (66),(ev) | ||
718 | a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) | 751 | a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) |
719 | a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) | 752 | a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) |
720 | a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) | 753 | a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) |
@@ -725,6 +758,8 @@ ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) | |||
725 | ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) | 758 | ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) |
726 | ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) | 759 | ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) |
727 | af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) | 760 | af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) |
761 | b4: vpmadd52luq Vx,Hx,Wx (66),(ev) | ||
762 | b5: vpmadd52huq Vx,Hx,Wx (66),(ev) | ||
728 | b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) | 763 | b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) |
729 | b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) | 764 | b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) |
730 | b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) | 765 | b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) |
@@ -736,12 +771,15 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) | |||
736 | be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) | 771 | be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) |
737 | bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) | 772 | bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) |
738 | # 0x0f 0x38 0xc0-0xff | 773 | # 0x0f 0x38 0xc0-0xff |
739 | c8: sha1nexte Vdq,Wdq | 774 | c4: vpconflictd/q Vx,Wx (66),(ev) |
775 | c6: Grp18 (1A) | ||
776 | c7: Grp19 (1A) | ||
777 | c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev) | ||
740 | c9: sha1msg1 Vdq,Wdq | 778 | c9: sha1msg1 Vdq,Wdq |
741 | ca: sha1msg2 Vdq,Wdq | 779 | ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev) |
742 | cb: sha256rnds2 Vdq,Wdq | 780 | cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev) |
743 | cc: sha256msg1 Vdq,Wdq | 781 | cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev) |
744 | cd: sha256msg2 Vdq,Wdq | 782 | cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev) |
745 | db: VAESIMC Vdq,Wdq (66),(v1) | 783 | db: VAESIMC Vdq,Wdq (66),(v1) |
746 | dc: VAESENC Vdq,Hdq,Wdq (66),(v1) | 784 | dc: VAESENC Vdq,Hdq,Wdq (66),(v1) |
747 | dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) | 785 | dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) |
@@ -763,15 +801,15 @@ AVXcode: 3 | |||
763 | 00: vpermq Vqq,Wqq,Ib (66),(v) | 801 | 00: vpermq Vqq,Wqq,Ib (66),(v) |
764 | 01: vpermpd Vqq,Wqq,Ib (66),(v) | 802 | 01: vpermpd Vqq,Wqq,Ib (66),(v) |
765 | 02: vpblendd Vx,Hx,Wx,Ib (66),(v) | 803 | 02: vpblendd Vx,Hx,Wx,Ib (66),(v) |
766 | 03: | 804 | 03: valignd/q Vx,Hx,Wx,Ib (66),(ev) |
767 | 04: vpermilps Vx,Wx,Ib (66),(v) | 805 | 04: vpermilps Vx,Wx,Ib (66),(v) |
768 | 05: vpermilpd Vx,Wx,Ib (66),(v) | 806 | 05: vpermilpd Vx,Wx,Ib (66),(v) |
769 | 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) | 807 | 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) |
770 | 07: | 808 | 07: |
771 | 08: vroundps Vx,Wx,Ib (66) | 809 | 08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) |
772 | 09: vroundpd Vx,Wx,Ib (66) | 810 | 09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) |
773 | 0a: vroundss Vss,Wss,Ib (66),(v1) | 811 | 0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) |
774 | 0b: vroundsd Vsd,Wsd,Ib (66),(v1) | 812 | 0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) |
775 | 0c: vblendps Vx,Hx,Wx,Ib (66) | 813 | 0c: vblendps Vx,Hx,Wx,Ib (66) |
776 | 0d: vblendpd Vx,Hx,Wx,Ib (66) | 814 | 0d: vblendpd Vx,Hx,Wx,Ib (66) |
777 | 0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) | 815 | 0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) |
@@ -780,26 +818,51 @@ AVXcode: 3 | |||
780 | 15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) | 818 | 15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) |
781 | 16: vpextrd/q Ey,Vdq,Ib (66),(v1) | 819 | 16: vpextrd/q Ey,Vdq,Ib (66),(v1) |
782 | 17: vextractps Ed,Vdq,Ib (66),(v1) | 820 | 17: vextractps Ed,Vdq,Ib (66),(v1) |
783 | 18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | 821 | 18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) |
784 | 19: vextractf128 Wdq,Vqq,Ib (66),(v) | 822 | 19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo) |
823 | 1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) | ||
824 | 1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev) | ||
785 | 1d: vcvtps2ph Wx,Vx,Ib (66),(v) | 825 | 1d: vcvtps2ph Wx,Vx,Ib (66),(v) |
826 | 1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev) | ||
827 | 1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev) | ||
786 | 20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) | 828 | 20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) |
787 | 21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) | 829 | 21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) |
788 | 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) | 830 | 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) |
789 | 38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | 831 | 23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) |
790 | 39: vextracti128 Wdq,Vqq,Ib (66),(v) | 832 | 25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) |
833 | 26: vgetmantps/d Vx,Wx,Ib (66),(ev) | ||
834 | 27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | ||
835 | 30: kshiftrb/w Vk,Uk,Ib (66),(v) | ||
836 | 31: kshiftrd/q Vk,Uk,Ib (66),(v) | ||
837 | 32: kshiftlb/w Vk,Uk,Ib (66),(v) | ||
838 | 33: kshiftld/q Vk,Uk,Ib (66),(v) | ||
839 | 38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) | ||
840 | 39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo) | ||
841 | 3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) | ||
842 | 3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev) | ||
843 | 3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev) | ||
844 | 3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev) | ||
791 | 40: vdpps Vx,Hx,Wx,Ib (66) | 845 | 40: vdpps Vx,Hx,Wx,Ib (66) |
792 | 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) | 846 | 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) |
793 | 42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | 847 | 42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo) |
848 | 43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) | ||
794 | 44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) | 849 | 44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) |
795 | 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) | 850 | 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) |
796 | 4a: vblendvps Vx,Hx,Wx,Lx (66),(v) | 851 | 4a: vblendvps Vx,Hx,Wx,Lx (66),(v) |
797 | 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) | 852 | 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) |
798 | 4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) | 853 | 4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) |
854 | 50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev) | ||
855 | 51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) | ||
856 | 54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) | ||
857 | 55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) | ||
858 | 56: vreduceps/d Vx,Wx,Ib (66),(ev) | ||
859 | 57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | ||
799 | 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) | 860 | 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) |
800 | 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) | 861 | 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) |
801 | 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) | 862 | 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) |
802 | 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) | 863 | 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) |
864 | 66: vfpclassps/d Vk,Wx,Ib (66),(ev) | ||
865 | 67: vfpclassss/d Vk,Wx,Ib (66),(ev) | ||
803 | cc: sha1rnds4 Vdq,Wdq,Ib | 866 | cc: sha1rnds4 Vdq,Wdq,Ib |
804 | df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) | 867 | df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) |
805 | f0: RORX Gy,Ey,Ib (F2),(v) | 868 | f0: RORX Gy,Ey,Ib (F2),(v) |
@@ -927,8 +990,10 @@ GrpTable: Grp12 | |||
927 | EndTable | 990 | EndTable |
928 | 991 | ||
929 | GrpTable: Grp13 | 992 | GrpTable: Grp13 |
993 | 0: vprord/q Hx,Wx,Ib (66),(ev) | ||
994 | 1: vprold/q Hx,Wx,Ib (66),(ev) | ||
930 | 2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) | 995 | 2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) |
931 | 4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | 996 | 4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo) |
932 | 6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) | 997 | 6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) |
933 | EndTable | 998 | EndTable |
934 | 999 | ||
@@ -963,6 +1028,20 @@ GrpTable: Grp17 | |||
963 | 3: BLSI By,Ey (v) | 1028 | 3: BLSI By,Ey (v) |
964 | EndTable | 1029 | EndTable |
965 | 1030 | ||
1031 | GrpTable: Grp18 | ||
1032 | 1: vgatherpf0dps/d Wx (66),(ev) | ||
1033 | 2: vgatherpf1dps/d Wx (66),(ev) | ||
1034 | 5: vscatterpf0dps/d Wx (66),(ev) | ||
1035 | 6: vscatterpf1dps/d Wx (66),(ev) | ||
1036 | EndTable | ||
1037 | |||
1038 | GrpTable: Grp19 | ||
1039 | 1: vgatherpf0qps/d Wx (66),(ev) | ||
1040 | 2: vgatherpf1qps/d Wx (66),(ev) | ||
1041 | 5: vscatterpf0qps/d Wx (66),(ev) | ||
1042 | 6: vscatterpf1qps/d Wx (66),(ev) | ||
1043 | EndTable | ||
1044 | |||
966 | # AMD's Prefetch Group | 1045 | # AMD's Prefetch Group |
967 | GrpTable: GrpP | 1046 | GrpTable: GrpP |
968 | 0: PREFETCH | 1047 | 0: PREFETCH |