aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2016-07-20 04:30:36 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2016-07-21 08:37:18 -0400
commitc61f4d5ebaf05fbd90bf43aa2096690b85e34761 (patch)
tree5edd82fde5b9daa6752e7258a64f23cfde4b50ff
parent25af37f4e1e0a747824e3713b80d6b97dad28b7c (diff)
perf tools: Add AVX-512 support to the instruction decoder used by Intel PT
Add support for Intel's AVX-512 instructions to perf tools instruction decoder used by Intel PT. The kernel's instruction decoder was updated in a previous patch. AVX-512 instructions are documented in Intel Architecture Instruction Set Extensions Programming Reference (February 2016). AVX-512 instructions are identified by a EVEX prefix which, for the purpose of instruction decoding, can be treated as though it were a 4-byte VEX prefix. Existing instructions which can now accept an EVEX prefix need not be further annotated in the op code map (x86-opcode-map.txt). In the case of new instructions, the op code map is updated accordingly. Also add associated Mask Instructions that are used to manipulate mask registers used in AVX-512 instructions. A representative set of instructions is added to the perf tools new instructions test in a subsequent patch. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Acked-by: Ingo Molnar <mingo@kernel.org> Acked-by: Masami Hiramatsu <mhiramat@kernel.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Dan Williams <dan.j.williams@intel.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: X86 ML <x86@kernel.org> Link: http://lkml.kernel.org/r/1469003437-32706-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk11
-rw-r--r--tools/perf/util/intel-pt-decoder/inat.h17
-rw-r--r--tools/perf/util/intel-pt-decoder/insn.c18
-rw-r--r--tools/perf/util/intel-pt-decoder/insn.h12
-rw-r--r--tools/perf/util/intel-pt-decoder/x86-opcode-map.txt263
5 files changed, 220 insertions, 101 deletions
diff --git a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
index 517567347aac..54e961659514 100644
--- a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
+++ b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
@@ -72,12 +72,14 @@ BEGIN {
72 lprefix_expr = "\\((66|F2|F3)\\)" 72 lprefix_expr = "\\((66|F2|F3)\\)"
73 max_lprefix = 4 73 max_lprefix = 4
74 74
75 # All opcodes starting with lower-case 'v' or with (v1) superscript 75 # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
76 # accepts VEX prefix 76 # accepts VEX prefix
77 vexok_opcode_expr = "^v.*" 77 vexok_opcode_expr = "^[vk].*"
78 vexok_expr = "\\(v1\\)" 78 vexok_expr = "\\(v1\\)"
79 # All opcodes with (v) superscript supports *only* VEX prefix 79 # All opcodes with (v) superscript supports *only* VEX prefix
80 vexonly_expr = "\\(v\\)" 80 vexonly_expr = "\\(v\\)"
81 # All opcodes with (ev) superscript supports *only* EVEX prefix
82 evexonly_expr = "\\(ev\\)"
81 83
82 prefix_expr = "\\(Prefix\\)" 84 prefix_expr = "\\(Prefix\\)"
83 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" 85 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@@ -95,6 +97,7 @@ BEGIN {
95 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" 97 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
96 prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" 98 prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
97 prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" 99 prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
100 prefix_num["EVEX"] = "INAT_PFX_EVEX"
98 101
99 clear_vars() 102 clear_vars()
100} 103}
@@ -319,7 +322,9 @@ function convert_operands(count,opnd, i,j,imm,mod)
319 flags = add_flags(flags, "INAT_MODRM") 322 flags = add_flags(flags, "INAT_MODRM")
320 323
321 # check VEX codes 324 # check VEX codes
322 if (match(ext, vexonly_expr)) 325 if (match(ext, evexonly_expr))
326 flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
327 else if (match(ext, vexonly_expr))
323 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") 328 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
324 else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) 329 else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
325 flags = add_flags(flags, "INAT_VEXOK") 330 flags = add_flags(flags, "INAT_VEXOK")
diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h
index 611645e903a8..125ecd2a300d 100644
--- a/tools/perf/util/intel-pt-decoder/inat.h
+++ b/tools/perf/util/intel-pt-decoder/inat.h
@@ -48,6 +48,7 @@
48/* AVX VEX prefixes */ 48/* AVX VEX prefixes */
49#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ 49#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
50#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ 50#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
51#define INAT_PFX_EVEX 15 /* EVEX prefix */
51 52
52#define INAT_LSTPFX_MAX 3 53#define INAT_LSTPFX_MAX 3
53#define INAT_LGCPFX_MAX 11 54#define INAT_LGCPFX_MAX 11
@@ -89,6 +90,7 @@
89#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) 90#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
90#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) 91#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
91#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) 92#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
93#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7))
92/* Attribute making macros for attribute tables */ 94/* Attribute making macros for attribute tables */
93#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) 95#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
94#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) 96#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
@@ -141,7 +143,13 @@ static inline int inat_last_prefix_id(insn_attr_t attr)
141static inline int inat_is_vex_prefix(insn_attr_t attr) 143static inline int inat_is_vex_prefix(insn_attr_t attr)
142{ 144{
143 attr &= INAT_PFX_MASK; 145 attr &= INAT_PFX_MASK;
144 return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; 146 return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 ||
147 attr == INAT_PFX_EVEX;
148}
149
150static inline int inat_is_evex_prefix(insn_attr_t attr)
151{
152 return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX;
145} 153}
146 154
147static inline int inat_is_vex3_prefix(insn_attr_t attr) 155static inline int inat_is_vex3_prefix(insn_attr_t attr)
@@ -216,6 +224,11 @@ static inline int inat_accept_vex(insn_attr_t attr)
216 224
217static inline int inat_must_vex(insn_attr_t attr) 225static inline int inat_must_vex(insn_attr_t attr)
218{ 226{
219 return attr & INAT_VEXONLY; 227 return attr & (INAT_VEXONLY | INAT_EVEXONLY);
228}
229
230static inline int inat_must_evex(insn_attr_t attr)
231{
232 return attr & INAT_EVEXONLY;
220} 233}
221#endif 234#endif
diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c
index 9f26eae6c9f0..ca983e2bea8b 100644
--- a/tools/perf/util/intel-pt-decoder/insn.c
+++ b/tools/perf/util/intel-pt-decoder/insn.c
@@ -155,14 +155,24 @@ found:
155 /* 155 /*
156 * In 32-bits mode, if the [7:6] bits (mod bits of 156 * In 32-bits mode, if the [7:6] bits (mod bits of
157 * ModRM) on the second byte are not 11b, it is 157 * ModRM) on the second byte are not 11b, it is
158 * LDS or LES. 158 * LDS or LES or BOUND.
159 */ 159 */
160 if (X86_MODRM_MOD(b2) != 3) 160 if (X86_MODRM_MOD(b2) != 3)
161 goto vex_end; 161 goto vex_end;
162 } 162 }
163 insn->vex_prefix.bytes[0] = b; 163 insn->vex_prefix.bytes[0] = b;
164 insn->vex_prefix.bytes[1] = b2; 164 insn->vex_prefix.bytes[1] = b2;
165 if (inat_is_vex3_prefix(attr)) { 165 if (inat_is_evex_prefix(attr)) {
166 b2 = peek_nbyte_next(insn_byte_t, insn, 2);
167 insn->vex_prefix.bytes[2] = b2;
168 b2 = peek_nbyte_next(insn_byte_t, insn, 3);
169 insn->vex_prefix.bytes[3] = b2;
170 insn->vex_prefix.nbytes = 4;
171 insn->next_byte += 4;
172 if (insn->x86_64 && X86_VEX_W(b2))
173 /* VEX.W overrides opnd_size */
174 insn->opnd_bytes = 8;
175 } else if (inat_is_vex3_prefix(attr)) {
166 b2 = peek_nbyte_next(insn_byte_t, insn, 2); 176 b2 = peek_nbyte_next(insn_byte_t, insn, 2);
167 insn->vex_prefix.bytes[2] = b2; 177 insn->vex_prefix.bytes[2] = b2;
168 insn->vex_prefix.nbytes = 3; 178 insn->vex_prefix.nbytes = 3;
@@ -221,7 +231,9 @@ void insn_get_opcode(struct insn *insn)
221 m = insn_vex_m_bits(insn); 231 m = insn_vex_m_bits(insn);
222 p = insn_vex_p_bits(insn); 232 p = insn_vex_p_bits(insn);
223 insn->attr = inat_get_avx_attribute(op, m, p); 233 insn->attr = inat_get_avx_attribute(op, m, p);
224 if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) 234 if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
235 (!inat_accept_vex(insn->attr) &&
236 !inat_is_group(insn->attr)))
225 insn->attr = 0; /* This instruction is bad */ 237 insn->attr = 0; /* This instruction is bad */
226 goto end; /* VEX has only 1 byte for opcode */ 238 goto end; /* VEX has only 1 byte for opcode */
227 } 239 }
diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h
index dd12da0f4593..e23578c7b1be 100644
--- a/tools/perf/util/intel-pt-decoder/insn.h
+++ b/tools/perf/util/intel-pt-decoder/insn.h
@@ -91,6 +91,7 @@ struct insn {
91#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ 91#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
92#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ 92#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
93/* VEX bit fields */ 93/* VEX bit fields */
94#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */
94#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ 95#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
95#define X86_VEX2_M 1 /* VEX2.M always 1 */ 96#define X86_VEX2_M 1 /* VEX2.M always 1 */
96#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ 97#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
@@ -133,6 +134,13 @@ static inline int insn_is_avx(struct insn *insn)
133 return (insn->vex_prefix.value != 0); 134 return (insn->vex_prefix.value != 0);
134} 135}
135 136
137static inline int insn_is_evex(struct insn *insn)
138{
139 if (!insn->prefixes.got)
140 insn_get_prefixes(insn);
141 return (insn->vex_prefix.nbytes == 4);
142}
143
136/* Ensure this instruction is decoded completely */ 144/* Ensure this instruction is decoded completely */
137static inline int insn_complete(struct insn *insn) 145static inline int insn_complete(struct insn *insn)
138{ 146{
@@ -144,8 +152,10 @@ static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
144{ 152{
145 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ 153 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
146 return X86_VEX2_M; 154 return X86_VEX2_M;
147 else 155 else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */
148 return X86_VEX3_M(insn->vex_prefix.bytes[1]); 156 return X86_VEX3_M(insn->vex_prefix.bytes[1]);
157 else /* EVEX */
158 return X86_EVEX_M(insn->vex_prefix.bytes[1]);
149} 159}
150 160
151static inline insn_byte_t insn_vex_p_bits(struct insn *insn) 161static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
index 28082de46f0d..ec378cd7b71e 100644
--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -13,12 +13,17 @@
13# opcode: escape # escaped-name 13# opcode: escape # escaped-name
14# EndTable 14# EndTable
15# 15#
16# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix
17# mnemonics that begin with lowercase 'k' accept a VEX prefix
18#
16#<group maps> 19#<group maps>
17# GrpTable: GrpXXX 20# GrpTable: GrpXXX
18# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] 21# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
19# EndTable 22# EndTable
20# 23#
21# AVX Superscripts 24# AVX Superscripts
25# (ev): this opcode requires EVEX prefix.
26# (evo): this opcode is changed by EVEX prefix (EVEX opcode)
22# (v): this opcode requires VEX prefix. 27# (v): this opcode requires VEX prefix.
23# (v1): this opcode only supports 128bit VEX. 28# (v1): this opcode only supports 128bit VEX.
24# 29#
@@ -137,7 +142,7 @@ AVXcode:
137# 0x60 - 0x6f 142# 0x60 - 0x6f
13860: PUSHA/PUSHAD (i64) 14360: PUSHA/PUSHAD (i64)
13961: POPA/POPAD (i64) 14461: POPA/POPAD (i64)
14062: BOUND Gv,Ma (i64) 14562: BOUND Gv,Ma (i64) | EVEX (Prefix)
14163: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) 14663: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
14264: SEG=FS (Prefix) 14764: SEG=FS (Prefix)
14365: SEG=GS (Prefix) 14865: SEG=GS (Prefix)
@@ -399,17 +404,17 @@ AVXcode: 1
3993f: 4043f:
400# 0x0f 0x40-0x4f 405# 0x0f 0x40-0x4f
40140: CMOVO Gv,Ev 40640: CMOVO Gv,Ev
40241: CMOVNO Gv,Ev 40741: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66)
40342: CMOVB/C/NAE Gv,Ev 40842: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66)
40443: CMOVAE/NB/NC Gv,Ev 40943: CMOVAE/NB/NC Gv,Ev
40544: CMOVE/Z Gv,Ev 41044: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66)
40645: CMOVNE/NZ Gv,Ev 41145: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66)
40746: CMOVBE/NA Gv,Ev 41246: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66)
40847: CMOVA/NBE Gv,Ev 41347: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66)
40948: CMOVS Gv,Ev 41448: CMOVS Gv,Ev
41049: CMOVNS Gv,Ev 41549: CMOVNS Gv,Ev
4114a: CMOVP/PE Gv,Ev 4164a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66)
4124b: CMOVNP/PO Gv,Ev 4174b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk
4134c: CMOVL/NGE Gv,Ev 4184c: CMOVL/NGE Gv,Ev
4144d: CMOVNL/GE Gv,Ev 4194d: CMOVNL/GE Gv,Ev
4154e: CMOVLE/NG Gv,Ev 4204e: CMOVLE/NG Gv,Ev
@@ -426,7 +431,7 @@ AVXcode: 1
42658: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) 43158: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
42759: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) 43259: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
4285a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) 4335a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
4295b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) 4345b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
4305c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) 4355c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
4315d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) 4365d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
4325e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) 4375e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
@@ -447,7 +452,7 @@ AVXcode: 1
4476c: vpunpcklqdq Vx,Hx,Wx (66),(v1) 4526c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
4486d: vpunpckhqdq Vx,Hx,Wx (66),(v1) 4536d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
4496e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) 4546e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
4506f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) 4556f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev)
451# 0x0f 0x70-0x7f 456# 0x0f 0x70-0x7f
45270: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) 45770: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
45371: Grp12 (1A) 45871: Grp12 (1A)
@@ -458,14 +463,14 @@ AVXcode: 1
45876: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) 46376: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
459# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. 464# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
46077: emms | vzeroupper | vzeroall 46577: emms | vzeroupper | vzeroall
46178: VMREAD Ey,Gy 46678: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev)
46279: VMWRITE Gy,Ey 46779: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev)
4637a: 4687a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev)
4647b: 4697b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev)
4657c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) 4707c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
4667d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) 4717d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
4677e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) 4727e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
4687f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) 4737f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)
469# 0x0f 0x80-0x8f 474# 0x0f 0x80-0x8f
470# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). 475# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
47180: JO Jz (f64) 47680: JO Jz (f64)
@@ -485,16 +490,16 @@ AVXcode: 1
4858e: JLE/JNG Jz (f64) 4908e: JLE/JNG Jz (f64)
4868f: JNLE/JG Jz (f64) 4918f: JNLE/JG Jz (f64)
487# 0x0f 0x90-0x9f 492# 0x0f 0x90-0x9f
48890: SETO Eb 49390: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66)
48991: SETNO Eb 49491: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66)
49092: SETB/C/NAE Eb 49592: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2)
49193: SETAE/NB/NC Eb 49693: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2)
49294: SETE/Z Eb 49794: SETE/Z Eb
49395: SETNE/NZ Eb 49895: SETNE/NZ Eb
49496: SETBE/NA Eb 49996: SETBE/NA Eb
49597: SETA/NBE Eb 50097: SETA/NBE Eb
49698: SETS Eb 50198: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66)
49799: SETNS Eb 50299: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66)
4989a: SETP/PE Eb 5039a: SETP/PE Eb
4999b: SETNP/PO Eb 5049b: SETNP/PO Eb
5009c: SETL/NGE Eb 5059c: SETL/NGE Eb
@@ -564,11 +569,11 @@ d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
564d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) 569d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
565d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) 570d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
566da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) 571da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
567db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) 572db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo)
568dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) 573dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
569dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) 574dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
570de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) 575de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
571df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) 576df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo)
572# 0x0f 0xe0-0xef 577# 0x0f 0xe0-0xef
573e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) 578e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
574e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) 579e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
@@ -576,16 +581,16 @@ e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
576e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) 581e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
577e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) 582e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
578e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) 583e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
579e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) 584e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2)
580e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) 585e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
581e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) 586e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
582e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) 587e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
583ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) 588ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
584eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) 589eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo)
585ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) 590ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
586ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) 591ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
587ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) 592ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
588ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) 593ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo)
589# 0x0f 0xf0-0xff 594# 0x0f 0xf0-0xff
590f0: vlddqu Vx,Mx (F2) 595f0: vlddqu Vx,Mx (F2)
591f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) 596f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
@@ -626,81 +631,105 @@ AVXcode: 2
6260e: vtestps Vx,Wx (66),(v) 6310e: vtestps Vx,Wx (66),(v)
6270f: vtestpd Vx,Wx (66),(v) 6320f: vtestpd Vx,Wx (66),(v)
628# 0x0f 0x38 0x10-0x1f 633# 0x0f 0x38 0x10-0x1f
62910: pblendvb Vdq,Wdq (66) 63410: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev)
63011: 63511: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev)
63112: 63612: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev)
63213: vcvtph2ps Vx,Wx (66),(v) 63713: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev)
63314: blendvps Vdq,Wdq (66) 63814: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo)
63415: blendvpd Vdq,Wdq (66) 63915: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo)
63516: vpermps Vqq,Hqq,Wqq (66),(v) 64016: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo)
63617: vptest Vx,Wx (66) 64117: vptest Vx,Wx (66)
63718: vbroadcastss Vx,Wd (66),(v) 64218: vbroadcastss Vx,Wd (66),(v)
63819: vbroadcastsd Vqq,Wq (66),(v) 64319: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo)
6391a: vbroadcastf128 Vqq,Mdq (66),(v) 6441a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo)
6401b: 6451b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev)
6411c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) 6461c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
6421d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) 6471d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
6431e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) 6481e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
6441f: 6491f: vpabsq Vx,Wx (66),(ev)
645# 0x0f 0x38 0x20-0x2f 650# 0x0f 0x38 0x20-0x2f
64620: vpmovsxbw Vx,Ux/Mq (66),(v1) 65120: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev)
64721: vpmovsxbd Vx,Ux/Md (66),(v1) 65221: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev)
64822: vpmovsxbq Vx,Ux/Mw (66),(v1) 65322: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev)
64923: vpmovsxwd Vx,Ux/Mq (66),(v1) 65423: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev)
65024: vpmovsxwq Vx,Ux/Md (66),(v1) 65524: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev)
65125: vpmovsxdq Vx,Ux/Mq (66),(v1) 65625: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev)
65226: 65726: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev)
65327: 65827: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev)
65428: vpmuldq Vx,Hx,Wx (66),(v1) 65928: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev)
65529: vpcmpeqq Vx,Hx,Wx (66),(v1) 66029: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev)
6562a: vmovntdqa Vx,Mx (66),(v1) 6612a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev)
6572b: vpackusdw Vx,Hx,Wx (66),(v1) 6622b: vpackusdw Vx,Hx,Wx (66),(v1)
6582c: vmaskmovps Vx,Hx,Mx (66),(v) 6632c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo)
6592d: vmaskmovpd Vx,Hx,Mx (66),(v) 6642d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo)
6602e: vmaskmovps Mx,Hx,Vx (66),(v) 6652e: vmaskmovps Mx,Hx,Vx (66),(v)
6612f: vmaskmovpd Mx,Hx,Vx (66),(v) 6662f: vmaskmovpd Mx,Hx,Vx (66),(v)
662# 0x0f 0x38 0x30-0x3f 667# 0x0f 0x38 0x30-0x3f
66330: vpmovzxbw Vx,Ux/Mq (66),(v1) 66830: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev)
66431: vpmovzxbd Vx,Ux/Md (66),(v1) 66931: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev)
66532: vpmovzxbq Vx,Ux/Mw (66),(v1) 67032: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev)
66633: vpmovzxwd Vx,Ux/Mq (66),(v1) 67133: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev)
66734: vpmovzxwq Vx,Ux/Md (66),(v1) 67234: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev)
66835: vpmovzxdq Vx,Ux/Mq (66),(v1) 67335: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev)
66936: vpermd Vqq,Hqq,Wqq (66),(v) 67436: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo)
67037: vpcmpgtq Vx,Hx,Wx (66),(v1) 67537: vpcmpgtq Vx,Hx,Wx (66),(v1)
67138: vpminsb Vx,Hx,Wx (66),(v1) 67638: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev)
67239: vpminsd Vx,Hx,Wx (66),(v1) 67739: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev)
6733a: vpminuw Vx,Hx,Wx (66),(v1) 6783a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev)
6743b: vpminud Vx,Hx,Wx (66),(v1) 6793b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo)
6753c: vpmaxsb Vx,Hx,Wx (66),(v1) 6803c: vpmaxsb Vx,Hx,Wx (66),(v1)
6763d: vpmaxsd Vx,Hx,Wx (66),(v1) 6813d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo)
6773e: vpmaxuw Vx,Hx,Wx (66),(v1) 6823e: vpmaxuw Vx,Hx,Wx (66),(v1)
6783f: vpmaxud Vx,Hx,Wx (66),(v1) 6833f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo)
679# 0x0f 0x38 0x40-0x8f 684# 0x0f 0x38 0x40-0x8f
68040: vpmulld Vx,Hx,Wx (66),(v1) 68540: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo)
68141: vphminposuw Vdq,Wdq (66),(v1) 68641: vphminposuw Vdq,Wdq (66),(v1)
68242: 68742: vgetexpps/d Vx,Wx (66),(ev)
68343: 68843: vgetexpss/d Vx,Hx,Wx (66),(ev)
68444: 68944: vplzcntd/q Vx,Wx (66),(ev)
68545: vpsrlvd/q Vx,Hx,Wx (66),(v) 69045: vpsrlvd/q Vx,Hx,Wx (66),(v)
68646: vpsravd Vx,Hx,Wx (66),(v) 69146: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
68747: vpsllvd/q Vx,Hx,Wx (66),(v) 69247: vpsllvd/q Vx,Hx,Wx (66),(v)
688# Skip 0x48-0x57 693# Skip 0x48-0x4b
6944c: vrcp14ps/d Vpd,Wpd (66),(ev)
6954d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
6964e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
6974f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev)
698# Skip 0x50-0x57
68958: vpbroadcastd Vx,Wx (66),(v) 69958: vpbroadcastd Vx,Wx (66),(v)
69059: vpbroadcastq Vx,Wx (66),(v) 70059: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
6915a: vbroadcasti128 Vqq,Mdq (66),(v) 7015a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
692# Skip 0x5b-0x77 7025b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
703# Skip 0x5c-0x63
70464: vpblendmd/q Vx,Hx,Wx (66),(ev)
70565: vblendmps/d Vx,Hx,Wx (66),(ev)
70666: vpblendmb/w Vx,Hx,Wx (66),(ev)
707# Skip 0x67-0x74
70875: vpermi2b/w Vx,Hx,Wx (66),(ev)
70976: vpermi2d/q Vx,Hx,Wx (66),(ev)
71077: vpermi2ps/d Vx,Hx,Wx (66),(ev)
69378: vpbroadcastb Vx,Wx (66),(v) 71178: vpbroadcastb Vx,Wx (66),(v)
69479: vpbroadcastw Vx,Wx (66),(v) 71279: vpbroadcastw Vx,Wx (66),(v)
695# Skip 0x7a-0x7f 7137a: vpbroadcastb Vx,Rv (66),(ev)
7147b: vpbroadcastw Vx,Rv (66),(ev)
7157c: vpbroadcastd/q Vx,Rv (66),(ev)
7167d: vpermt2b/w Vx,Hx,Wx (66),(ev)
7177e: vpermt2d/q Vx,Hx,Wx (66),(ev)
7187f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
69680: INVEPT Gy,Mdq (66) 71980: INVEPT Gy,Mdq (66)
69781: INVPID Gy,Mdq (66) 72081: INVPID Gy,Mdq (66)
69882: INVPCID Gy,Mdq (66) 72182: INVPCID Gy,Mdq (66)
72283: vpmultishiftqb Vx,Hx,Wx (66),(ev)
72388: vexpandps/d Vpd,Wpd (66),(ev)
72489: vpexpandd/q Vx,Wx (66),(ev)
7258a: vcompressps/d Wx,Vx (66),(ev)
7268b: vpcompressd/q Wx,Vx (66),(ev)
6998c: vpmaskmovd/q Vx,Hx,Mx (66),(v) 7278c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
7288d: vpermb/w Vx,Hx,Wx (66),(ev)
7008e: vpmaskmovd/q Mx,Vx,Hx (66),(v) 7298e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
701# 0x0f 0x38 0x90-0xbf (FMA) 730# 0x0f 0x38 0x90-0xbf (FMA)
70290: vgatherdd/q Vx,Hx,Wx (66),(v) 73190: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo)
70391: vgatherqd/q Vx,Hx,Wx (66),(v) 73291: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo)
70492: vgatherdps/d Vx,Hx,Wx (66),(v) 73392: vgatherdps/d Vx,Hx,Wx (66),(v)
70593: vgatherqps/d Vx,Hx,Wx (66),(v) 73493: vgatherqps/d Vx,Hx,Wx (66),(v)
70694: 73594:
@@ -715,6 +744,10 @@ AVXcode: 2
7159d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) 7449d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
7169e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) 7459e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
7179f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) 7469f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
747a0: vpscatterdd/q Wx,Vx (66),(ev)
748a1: vpscatterqd/q Wx,Vx (66),(ev)
749a2: vscatterdps/d Wx,Vx (66),(ev)
750a3: vscatterqps/d Wx,Vx (66),(ev)
718a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) 751a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
719a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) 752a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
720a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) 753a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
@@ -725,6 +758,8 @@ ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
725ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) 758ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
726ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) 759ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
727af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) 760af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
761b4: vpmadd52luq Vx,Hx,Wx (66),(ev)
762b5: vpmadd52huq Vx,Hx,Wx (66),(ev)
728b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) 763b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
729b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) 764b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
730b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) 765b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
@@ -736,12 +771,15 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
736be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) 771be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
737bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) 772bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
738# 0x0f 0x38 0xc0-0xff 773# 0x0f 0x38 0xc0-0xff
739c8: sha1nexte Vdq,Wdq 774c4: vpconflictd/q Vx,Wx (66),(ev)
775c6: Grp18 (1A)
776c7: Grp19 (1A)
777c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev)
740c9: sha1msg1 Vdq,Wdq 778c9: sha1msg1 Vdq,Wdq
741ca: sha1msg2 Vdq,Wdq 779ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev)
742cb: sha256rnds2 Vdq,Wdq 780cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev)
743cc: sha256msg1 Vdq,Wdq 781cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev)
744cd: sha256msg2 Vdq,Wdq 782cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev)
745db: VAESIMC Vdq,Wdq (66),(v1) 783db: VAESIMC Vdq,Wdq (66),(v1)
746dc: VAESENC Vdq,Hdq,Wdq (66),(v1) 784dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
747dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) 785dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
@@ -763,15 +801,15 @@ AVXcode: 3
76300: vpermq Vqq,Wqq,Ib (66),(v) 80100: vpermq Vqq,Wqq,Ib (66),(v)
76401: vpermpd Vqq,Wqq,Ib (66),(v) 80201: vpermpd Vqq,Wqq,Ib (66),(v)
76502: vpblendd Vx,Hx,Wx,Ib (66),(v) 80302: vpblendd Vx,Hx,Wx,Ib (66),(v)
76603: 80403: valignd/q Vx,Hx,Wx,Ib (66),(ev)
76704: vpermilps Vx,Wx,Ib (66),(v) 80504: vpermilps Vx,Wx,Ib (66),(v)
76805: vpermilpd Vx,Wx,Ib (66),(v) 80605: vpermilpd Vx,Wx,Ib (66),(v)
76906: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) 80706: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
77007: 80807:
77108: vroundps Vx,Wx,Ib (66) 80908: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
77209: vroundpd Vx,Wx,Ib (66) 81009: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
7730a: vroundss Vss,Wss,Ib (66),(v1) 8110a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
7740b: vroundsd Vsd,Wsd,Ib (66),(v1) 8120b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
7750c: vblendps Vx,Hx,Wx,Ib (66) 8130c: vblendps Vx,Hx,Wx,Ib (66)
7760d: vblendpd Vx,Hx,Wx,Ib (66) 8140d: vblendpd Vx,Hx,Wx,Ib (66)
7770e: vpblendw Vx,Hx,Wx,Ib (66),(v1) 8150e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
@@ -780,26 +818,51 @@ AVXcode: 3
78015: vpextrw Rd/Mw,Vdq,Ib (66),(v1) 81815: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
78116: vpextrd/q Ey,Vdq,Ib (66),(v1) 81916: vpextrd/q Ey,Vdq,Ib (66),(v1)
78217: vextractps Ed,Vdq,Ib (66),(v1) 82017: vextractps Ed,Vdq,Ib (66),(v1)
78318: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) 82118: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
78419: vextractf128 Wdq,Vqq,Ib (66),(v) 82219: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo)
8231a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
8241b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev)
7851d: vcvtps2ph Wx,Vx,Ib (66),(v) 8251d: vcvtps2ph Wx,Vx,Ib (66),(v)
8261e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev)
8271f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev)
78620: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) 82820: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
78721: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) 82921: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
78822: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) 83022: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
78938: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) 83123: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
79039: vextracti128 Wdq,Vqq,Ib (66),(v) 83225: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
83326: vgetmantps/d Vx,Wx,Ib (66),(ev)
83427: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
83530: kshiftrb/w Vk,Uk,Ib (66),(v)
83631: kshiftrd/q Vk,Uk,Ib (66),(v)
83732: kshiftlb/w Vk,Uk,Ib (66),(v)
83833: kshiftld/q Vk,Uk,Ib (66),(v)
83938: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
84039: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo)
8413a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
8423b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev)
8433e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev)
8443f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev)
79140: vdpps Vx,Hx,Wx,Ib (66) 84540: vdpps Vx,Hx,Wx,Ib (66)
79241: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) 84641: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
79342: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) 84742: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo)
84843: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
79444: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) 84944: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
79546: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) 85046: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
7964a: vblendvps Vx,Hx,Wx,Lx (66),(v) 8514a: vblendvps Vx,Hx,Wx,Lx (66),(v)
7974b: vblendvpd Vx,Hx,Wx,Lx (66),(v) 8524b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
7984c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) 8534c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
85450: vrangeps/d Vx,Hx,Wx,Ib (66),(ev)
85551: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
85654: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
85755: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
85856: vreduceps/d Vx,Wx,Ib (66),(ev)
85957: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
79960: vpcmpestrm Vdq,Wdq,Ib (66),(v1) 86060: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
80061: vpcmpestri Vdq,Wdq,Ib (66),(v1) 86161: vpcmpestri Vdq,Wdq,Ib (66),(v1)
80162: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 86262: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
80263: vpcmpistri Vdq,Wdq,Ib (66),(v1) 86363: vpcmpistri Vdq,Wdq,Ib (66),(v1)
86466: vfpclassps/d Vk,Wx,Ib (66),(ev)
86567: vfpclassss/d Vk,Wx,Ib (66),(ev)
803cc: sha1rnds4 Vdq,Wdq,Ib 866cc: sha1rnds4 Vdq,Wdq,Ib
804df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) 867df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
805f0: RORX Gy,Ey,Ib (F2),(v) 868f0: RORX Gy,Ey,Ib (F2),(v)
@@ -927,8 +990,10 @@ GrpTable: Grp12
927EndTable 990EndTable
928 991
929GrpTable: Grp13 992GrpTable: Grp13
9930: vprord/q Hx,Wx,Ib (66),(ev)
9941: vprold/q Hx,Wx,Ib (66),(ev)
9302: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) 9952: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
9314: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) 9964: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo)
9326: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) 9976: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
933EndTable 998EndTable
934 999
@@ -963,6 +1028,20 @@ GrpTable: Grp17
9633: BLSI By,Ey (v) 10283: BLSI By,Ey (v)
964EndTable 1029EndTable
965 1030
1031GrpTable: Grp18
10321: vgatherpf0dps/d Wx (66),(ev)
10332: vgatherpf1dps/d Wx (66),(ev)
10345: vscatterpf0dps/d Wx (66),(ev)
10356: vscatterpf1dps/d Wx (66),(ev)
1036EndTable
1037
1038GrpTable: Grp19
10391: vgatherpf0qps/d Wx (66),(ev)
10402: vgatherpf1qps/d Wx (66),(ev)
10415: vscatterpf0qps/d Wx (66),(ev)
10426: vscatterpf1qps/d Wx (66),(ev)
1043EndTable
1044
966# AMD's Prefetch Group 1045# AMD's Prefetch Group
967GrpTable: GrpP 1046GrpTable: GrpP
9680: PREFETCH 10470: PREFETCH