From eb13296cfaf6c699566473669a96a38a90562384 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:13 -0400 Subject: x86: Instruction decoder API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add x86 instruction decoder to arch-specific libraries. This decoder can decode x86 instructions used in kernel into prefix, opcode, modrm, sib, displacement and immediates. This can also show the length of instructions. This version introduces instruction attributes for decoding instructions. The instruction attribute tables are generated from the opcode map file (x86-opcode-map.txt) by the generator script(gen-insn-attr-x86.awk). Currently, the opcode maps are based on opcode maps in Intel(R) 64 and IA-32 Architectures Software Developers Manual Vol.2: Appendix.A, and consist of below two types of opcode tables. 1-byte/2-bytes/3-bytes opcodes, which has 256 elements, are written as below; Table: table-name Referrer: escaped-name opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] (or) opcode: escape # escaped-name EndTable Group opcodes, which has 8 elements, are written as below; GrpTable: GrpXXX reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] EndTable These opcode maps include a few SSE and FP opcodes (for setup), because those opcodes are used in the kernel. Signed-off-by: Masami Hiramatsu Signed-off-by: Jim Keniston Acked-by: H. Peter Anvin Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Przemysław Pawełczyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203413.31965.49709.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- arch/x86/lib/Makefile | 13 + arch/x86/lib/inat.c | 78 +++++ arch/x86/lib/insn.c | 464 ++++++++++++++++++++++++++ arch/x86/lib/x86-opcode-map.txt | 719 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1274 insertions(+) create mode 100644 arch/x86/lib/inat.c create mode 100644 arch/x86/lib/insn.c create mode 100644 arch/x86/lib/x86-opcode-map.txt (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 07c31899c9c2..c77f8a7c531d 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -2,12 +2,25 @@ # Makefile for x86 specific library files. # +inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk +inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt +quiet_cmd_inat_tables = GEN $@ + cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ + +$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + $(call cmd,inat_tables) + +$(obj)/inat.o: $(obj)/inat-tables.c + +clean-files := inat-tables.c + obj-$(CONFIG_SMP) := msr.o lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o +lib-y += insn.o inat.o ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c new file mode 100644 index 000000000000..054656a01dfd --- /dev/null +++ b/arch/x86/lib/inat.c @@ -0,0 +1,78 @@ +/* + * x86 instruction attribute tables + * + * Written by Masami Hiramatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include + +/* Attribute tables are generated from opcode map */ +#include "inat-tables.c" + +/* Attribute search APIs */ +insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) +{ + return inat_primary_table[opcode]; +} + +insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, + insn_attr_t esc_attr) +{ + const insn_attr_t *table; + insn_attr_t lpfx_attr; + int n, m = 0; + + n = inat_escape_id(esc_attr); + if (last_pfx) { + lpfx_attr = inat_get_opcode_attribute(last_pfx); + m = inat_last_prefix_id(lpfx_attr); + } + table = inat_escape_tables[n][0]; + if (!table) + return 0; + if (inat_has_variant(table[opcode]) && m) { + table = inat_escape_tables[n][m]; + if (!table) + return 0; + } + return table[opcode]; +} + +insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, + insn_attr_t grp_attr) +{ + const insn_attr_t *table; + insn_attr_t lpfx_attr; + int n, m = 0; + + n = inat_group_id(grp_attr); + if (last_pfx) { + lpfx_attr = inat_get_opcode_attribute(last_pfx); + m = inat_last_prefix_id(lpfx_attr); + } + table = inat_group_tables[n][0]; + if (!table) + return inat_group_common_attribute(grp_attr); + if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { + table = inat_escape_tables[n][m]; + if (!table) + return inat_group_common_attribute(grp_attr); + } + return table[X86_MODRM_REG(modrm)] | + inat_group_common_attribute(grp_attr); +} + diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c new file mode 100644 index 000000000000..dfd56a30053f --- /dev/null +++ b/arch/x86/lib/insn.c @@ -0,0 +1,464 @@ +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004, 2009 + */ + +#include +#include +#include + +#define get_next(t, insn) \ + ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + +#define peek_next(t, insn) \ + ({t r; r = *(t*)insn->next_byte; r; }) + +/** + * insn_init() - initialize struct insn + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @x86_64: !0 for 64-bit kernel or 64-bit app + */ +void insn_init(struct insn *insn, const void *kaddr, int x86_64) +{ + memset(insn, 0, sizeof(*insn)); + insn->kaddr = kaddr; + insn->next_byte = kaddr; + insn->x86_64 = x86_64 ? 1 : 0; + insn->opnd_bytes = 4; + if (x86_64) + insn->addr_bytes = 8; + else + insn->addr_bytes = 4; +} + +/** + * insn_get_prefixes - scan x86 instruction prefix bytes + * @insn: &struct insn containing instruction + * + * Populates the @insn->prefixes bitmap, and updates @insn->next_byte + * to point to the (first) opcode. No effect if @insn->prefixes.got + * is already set. + */ +void insn_get_prefixes(struct insn *insn) +{ + struct insn_field *prefixes = &insn->prefixes; + insn_attr_t attr; + insn_byte_t b, lb; + int i, nb; + + if (prefixes->got) + return; + + nb = 0; + lb = 0; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + while (inat_is_prefix(attr)) { + /* Skip if same prefix */ + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == b) + goto found; + if (nb == 4) + /* Invalid instruction */ + break; + prefixes->bytes[nb++] = b; + if (inat_is_address_size_prefix(attr)) { + /* address size switches 2/4 or 4/8 */ + if (insn->x86_64) + insn->addr_bytes ^= 12; + else + insn->addr_bytes ^= 6; + } else if (inat_is_operand_size_prefix(attr)) { + /* oprand size switches 2/4 */ + insn->opnd_bytes ^= 6; + } +found: + prefixes->nbytes++; + insn->next_byte++; + lb = b; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + } + /* Set the last prefix */ + if (lb && lb != insn->prefixes.bytes[3]) { + if (unlikely(insn->prefixes.bytes[3])) { + /* Swap the last prefix */ + b = insn->prefixes.bytes[3]; + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == lb) + prefixes->bytes[i] = b; + } + insn->prefixes.bytes[3] = lb; + } + + if (insn->x86_64) { + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_rex_prefix(attr)) { + insn->rex_prefix.value = b; + insn->rex_prefix.nbytes = 1; + insn->next_byte++; + if (X86_REX_W(b)) + /* REX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } + } + insn->rex_prefix.got = 1; + prefixes->got = 1; + return; +} + +/** + * insn_get_opcode - collect opcode(s) + * @insn: &struct insn containing instruction + * + * Populates @insn->opcode, updates @insn->next_byte to point past the + * opcode byte(s), and set @insn->attr (except for groups). + * If necessary, first collects any preceding (prefix) bytes. + * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got + * is already 1. + */ +void insn_get_opcode(struct insn *insn) +{ + struct insn_field *opcode = &insn->opcode; + insn_byte_t op, pfx; + if (opcode->got) + return; + if (!insn->prefixes.got) + insn_get_prefixes(insn); + + /* Get first opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[0] = op; + opcode->nbytes = 1; + insn->attr = inat_get_opcode_attribute(op); + while (inat_is_escape(insn->attr)) { + /* Get escaped opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[opcode->nbytes++] = op; + pfx = insn_last_prefix(insn); + insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); + } + opcode->got = 1; +} + +/** + * insn_get_modrm - collect ModRM byte, if any + * @insn: &struct insn containing instruction + * + * Populates @insn->modrm and updates @insn->next_byte to point past the + * ModRM byte, if any. If necessary, first collects the preceding bytes + * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + */ +void insn_get_modrm(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + insn_byte_t pfx, mod; + if (modrm->got) + return; + if (!insn->opcode.got) + insn_get_opcode(insn); + + if (inat_has_modrm(insn->attr)) { + mod = get_next(insn_byte_t, insn); + modrm->value = mod; + modrm->nbytes = 1; + if (inat_is_group(insn->attr)) { + pfx = insn_last_prefix(insn); + insn->attr = inat_get_group_attribute(mod, pfx, + insn->attr); + } + } + + if (insn->x86_64 && inat_is_force64(insn->attr)) + insn->opnd_bytes = 8; + modrm->got = 1; +} + + +/** + * insn_rip_relative() - Does instruction use RIP-relative addressing mode? + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. No effect if @insn->x86_64 is 0. + */ +int insn_rip_relative(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + + if (!insn->x86_64) + return 0; + if (!modrm->got) + insn_get_modrm(insn); + /* + * For rip-relative instructions, the mod field (top 2 bits) + * is zero and the r/m field (bottom 3 bits) is 0x5. + */ + return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); +} + +/** + * insn_get_sib() - Get the SIB byte of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. + */ +void insn_get_sib(struct insn *insn) +{ + insn_byte_t modrm; + + if (insn->sib.got) + return; + if (!insn->modrm.got) + insn_get_modrm(insn); + if (insn->modrm.nbytes) { + modrm = (insn_byte_t)insn->modrm.value; + if (insn->addr_bytes != 2 && + X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { + insn->sib.value = get_next(insn_byte_t, insn); + insn->sib.nbytes = 1; + } + } + insn->sib.got = 1; +} + + +/** + * insn_get_displacement() - Get the displacement of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * SIB byte. + * Displacement value is sign-expanded. + */ +void insn_get_displacement(struct insn *insn) +{ + insn_byte_t mod, rm, base; + + if (insn->displacement.got) + return; + if (!insn->sib.got) + insn_get_sib(insn); + if (insn->modrm.nbytes) { + /* + * Interpreting the modrm byte: + * mod = 00 - no displacement fields (exceptions below) + * mod = 01 - 1-byte displacement field + * mod = 10 - displacement field is 4 bytes, or 2 bytes if + * address size = 2 (0x67 prefix in 32-bit mode) + * mod = 11 - no memory operand + * + * If address size = 2... + * mod = 00, r/m = 110 - displacement field is 2 bytes + * + * If address size != 2... + * mod != 11, r/m = 100 - SIB byte exists + * mod = 00, SIB base = 101 - displacement field is 4 bytes + * mod = 00, r/m = 101 - rip-relative addressing, displacement + * field is 4 bytes + */ + mod = X86_MODRM_MOD(insn->modrm.value); + rm = X86_MODRM_RM(insn->modrm.value); + base = X86_SIB_BASE(insn->sib.value); + if (mod == 3) + goto out; + if (mod == 1) { + insn->displacement.value = get_next(char, insn); + insn->displacement.nbytes = 1; + } else if (insn->addr_bytes == 2) { + if ((mod == 0 && rm == 6) || mod == 2) { + insn->displacement.value = + get_next(short, insn); + insn->displacement.nbytes = 2; + } + } else { + if ((mod == 0 && rm == 5) || mod == 2 || + (mod == 0 && base == 5)) { + insn->displacement.value = get_next(int, insn); + insn->displacement.nbytes = 4; + } + } + } +out: + insn->displacement.got = 1; +} + +/* Decode moffset16/32/64 */ +static void __get_moffset(struct insn *insn) +{ + switch (insn->addr_bytes) { + case 2: + insn->moffset1.value = get_next(short, insn); + insn->moffset1.nbytes = 2; + break; + case 4: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + break; + case 8: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + insn->moffset2.value = get_next(int, insn); + insn->moffset2.nbytes = 4; + break; + } + insn->moffset1.got = insn->moffset2.got = 1; +} + +/* Decode imm v32(Iz) */ +static void __get_immv32(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case 4: + case 8: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + } +} + +/* Decode imm v64(Iv/Ov) */ +static void __get_immv(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + } + insn->immediate1.got = insn->immediate2.got = 1; +} + +/* Decode ptr16:16/32(Ap) */ +static void __get_immptr(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + /* ptr16:64 is not exist (no segment) */ + return; + } + insn->immediate2.value = get_next(unsigned short, insn); + insn->immediate2.nbytes = 2; + insn->immediate1.got = insn->immediate2.got = 1; +} + +/** + * insn_get_immediate() - Get the immediates of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * displacement bytes. + * Basically, most of immediates are sign-expanded. Unsigned-value can be + * get by bit masking with ((1 << (nbytes * 8)) - 1) + */ +void insn_get_immediate(struct insn *insn) +{ + if (insn->immediate.got) + return; + if (!insn->displacement.got) + insn_get_displacement(insn); + + if (inat_has_moffset(insn->attr)) { + __get_moffset(insn); + goto done; + } + + if (!inat_has_immediate(insn->attr)) + /* no immediates */ + goto done; + + switch (inat_immediate_size(insn->attr)) { + case INAT_IMM_BYTE: + insn->immediate.value = get_next(char, insn); + insn->immediate.nbytes = 1; + break; + case INAT_IMM_WORD: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case INAT_IMM_DWORD: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + case INAT_IMM_QWORD: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + case INAT_IMM_PTR: + __get_immptr(insn); + break; + case INAT_IMM_VWORD32: + __get_immv32(insn); + break; + case INAT_IMM_VWORD: + __get_immv(insn); + break; + default: + break; + } + if (inat_has_second_immediate(insn->attr)) { + insn->immediate2.value = get_next(char, insn); + insn->immediate2.nbytes = 1; + } +done: + insn->immediate.got = 1; +} + +/** + * insn_get_length() - Get the length of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * immediates bytes. + */ +void insn_get_length(struct insn *insn) +{ + if (insn->length) + return; + if (!insn->immediate.got) + insn_get_immediate(insn); + insn->length = (unsigned char)((unsigned long)insn->next_byte + - (unsigned long)insn->kaddr); +} diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt new file mode 100644 index 000000000000..083dd59dd74b --- /dev/null +++ b/arch/x86/lib/x86-opcode-map.txt @@ -0,0 +1,719 @@ +# x86 Opcode Maps +# +# +# Table: table-name +# Referrer: escaped-name +# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# (or) +# opcode: escape # escaped-name +# EndTable +# +# +# GrpTable: GrpXXX +# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# EndTable +# + +Table: one byte opcode +Referrer: +# 0x00 - 0x0f +00: ADD Eb,Gb +01: ADD Ev,Gv +02: ADD Gb,Eb +03: ADD Gv,Ev +04: ADD AL,Ib +05: ADD rAX,Iz +06: PUSH ES (i64) +07: POP ES (i64) +08: OR Eb,Gb +09: OR Ev,Gv +0a: OR Gb,Eb +0b: OR Gv,Ev +0c: OR AL,Ib +0d: OR rAX,Iz +0e: PUSH CS (i64) +0f: escape # 2-byte escape +# 0x10 - 0x1f +10: ADC Eb,Gb +11: ADC Ev,Gv +12: ADC Gb,Eb +13: ADC Gv,Ev +14: ADC AL,Ib +15: ADC rAX,Iz +16: PUSH SS (i64) +17: POP SS (i64) +18: SBB Eb,Gb +19: SBB Ev,Gv +1a: SBB Gb,Eb +1b: SBB Gv,Ev +1c: SBB AL,Ib +1d: SBB rAX,Iz +1e: PUSH DS (i64) +1f: POP DS (i64) +# 0x20 - 0x2f +20: AND Eb,Gb +21: AND Ev,Gv +22: AND Gb,Eb +23: AND Gv,Ev +24: AND AL,Ib +25: AND rAx,Iz +26: SEG=ES (Prefix) +27: DAA (i64) +28: SUB Eb,Gb +29: SUB Ev,Gv +2a: SUB Gb,Eb +2b: SUB Gv,Ev +2c: SUB AL,Ib +2d: SUB rAX,Iz +2e: SEG=CS (Prefix) +2f: DAS (i64) +# 0x30 - 0x3f +30: XOR Eb,Gb +31: XOR Ev,Gv +32: XOR Gb,Eb +33: XOR Gv,Ev +34: XOR AL,Ib +35: XOR rAX,Iz +36: SEG=SS (Prefix) +37: AAA (i64) +38: CMP Eb,Gb +39: CMP Ev,Gv +3a: CMP Gb,Eb +3b: CMP Gv,Ev +3c: CMP AL,Ib +3d: CMP rAX,Iz +3e: SEG=DS (Prefix) +3f: AAS (i64) +# 0x40 - 0x4f +40: INC eAX (i64) | REX (o64) +41: INC eCX (i64) | REX.B (o64) +42: INC eDX (i64) | REX.X (o64) +43: INC eBX (i64) | REX.XB (o64) +44: INC eSP (i64) | REX.R (o64) +45: INC eBP (i64) | REX.RB (o64) +46: INC eSI (i64) | REX.RX (o64) +47: INC eDI (i64) | REX.RXB (o64) +48: DEC eAX (i64) | REX.W (o64) +49: DEC eCX (i64) | REX.WB (o64) +4a: DEC eDX (i64) | REX.WX (o64) +4b: DEC eBX (i64) | REX.WXB (o64) +4c: DEC eSP (i64) | REX.WR (o64) +4d: DEC eBP (i64) | REX.WRB (o64) +4e: DEC eSI (i64) | REX.WRX (o64) +4f: DEC eDI (i64) | REX.WRXB (o64) +# 0x50 - 0x5f +50: PUSH rAX/r8 (d64) +51: PUSH rCX/r9 (d64) +52: PUSH rDX/r10 (d64) +53: PUSH rBX/r11 (d64) +54: PUSH rSP/r12 (d64) +55: PUSH rBP/r13 (d64) +56: PUSH rSI/r14 (d64) +57: PUSH rDI/r15 (d64) +58: POP rAX/r8 (d64) +59: POP rCX/r9 (d64) +5a: POP rDX/r10 (d64) +5b: POP rBX/r11 (d64) +5c: POP rSP/r12 (d64) +5d: POP rBP/r13 (d64) +5e: POP rSI/r14 (d64) +5f: POP rDI/r15 (d64) +# 0x60 - 0x6f +60: PUSHA/PUSHAD (i64) +61: POPA/POPAD (i64) +62: BOUND Gv,Ma (i64) +63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) +64: SEG=FS (Prefix) +65: SEG=GS (Prefix) +66: Operand-Size (Prefix) +67: Address-Size (Prefix) +68: PUSH Iz (d64) +69: IMUL Gv,Ev,Iz +6a: PUSH Ib (d64) +6b: IMUL Gv,Ev,Ib +6c: INS/INSB Yb,DX +6d: INS/INSW/INSD Yz,DX +6e: OUTS/OUTSB DX,Xb +6f: OUTS/OUTSW/OUTSD DX,Xz +# 0x70 - 0x7f +70: JO Jb +71: JNO Jb +72: JB/JNAE/JC Jb +73: JNB/JAE/JNC Jb +74: JZ/JE Jb +75: JNZ/JNE Jb +76: JBE/JNA Jb +77: JNBE/JA Jb +78: JS Jb +79: JNS Jb +7a: JP/JPE Jb +7b: JNP/JPO Jb +7c: JL/JNGE Jb +7d: JNL/JGE Jb +7e: JLE/JNG Jb +7f: JNLE/JG Jb +# 0x80 - 0x8f +80: Grp1 Eb,Ib (1A) +81: Grp1 Ev,Iz (1A) +82: Grp1 Eb,Ib (1A),(i64) +83: Grp1 Ev,Ib (1A) +84: TEST Eb,Gb +85: TEST Ev,Gv +86: XCHG Eb,Gb +87: XCHG Ev,Gv +88: MOV Eb,Gb +89: MOV Ev,Gv +8a: MOV Gb,Eb +8b: MOV Gv,Ev +8c: MOV Ev,Sw +8d: LEA Gv,M +8e: MOV Sw,Ew +8f: Grp1A (1A) | POP Ev (d64) +# 0x90 - 0x9f +90: NOP | PAUSE (F3) | XCHG r8,rAX +91: XCHG rCX/r9,rAX +92: XCHG rDX/r10,rAX +93: XCHG rBX/r11,rAX +94: XCHG rSP/r12,rAX +95: XCHG rBP/r13,rAX +96: XCHG rSI/r14,rAX +97: XCHG rDI/r15,rAX +98: CBW/CWDE/CDQE +99: CWD/CDQ/CQO +9a: CALLF Ap (i64) +9b: FWAIT/WAIT +9c: PUSHF/D/Q Fv (d64) +9d: POPF/D/Q Fv (d64) +9e: SAHF +9f: LAHF +# 0xa0 - 0xaf +a0: MOV AL,Ob +a1: MOV rAX,Ov +a2: MOV Ob,AL +a3: MOV Ov,rAX +a4: MOVS/B Xb,Yb +a5: MOVS/W/D/Q Xv,Yv +a6: CMPS/B Xb,Yb +a7: CMPS/W/D Xv,Yv +a8: TEST AL,Ib +a9: TEST rAX,Iz +aa: STOS/B Yb,AL +ab: STOS/W/D/Q Yv,rAX +ac: LODS/B AL,Xb +ad: LODS/W/D/Q rAX,Xv +ae: SCAS/B AL,Yb +af: SCAS/W/D/Q rAX,Xv +# 0xb0 - 0xbf +b0: MOV AL/R8L,Ib +b1: MOV CL/R9L,Ib +b2: MOV DL/R10L,Ib +b3: MOV BL/R11L,Ib +b4: MOV AH/R12L,Ib +b5: MOV CH/R13L,Ib +b6: MOV DH/R14L,Ib +b7: MOV BH/R15L,Ib +b8: MOV rAX/r8,Iv +b9: MOV rCX/r9,Iv +ba: MOV rDX/r10,Iv +bb: MOV rBX/r11,Iv +bc: MOV rSP/r12,Iv +bd: MOV rBP/r13,Iv +be: MOV rSI/r14,Iv +bf: MOV rDI/r15,Iv +# 0xc0 - 0xcf +c0: Grp2 Eb,Ib (1A) +c1: Grp2 Ev,Ib (1A) +c2: RETN Iw (f64) +c3: RETN +c4: LES Gz,Mp (i64) +c5: LDS Gz,Mp (i64) +c6: Grp11 Eb,Ib (1A) +c7: Grp11 Ev,Iz (1A) +c8: ENTER Iw,Ib +c9: LEAVE (d64) +ca: RETF Iw +cb: RETF +cc: INT3 +cd: INT Ib +ce: INTO (i64) +cf: IRET/D/Q +# 0xd0 - 0xdf +d0: Grp2 Eb,1 (1A) +d1: Grp2 Ev,1 (1A) +d2: Grp2 Eb,CL (1A) +d3: Grp2 Ev,CL (1A) +d4: AAM Ib (i64) +d5: AAD Ib (i64) +d6: +d7: XLAT/XLATB +d8: ESC +d9: ESC +da: ESC +db: ESC +dc: ESC +dd: ESC +de: ESC +df: ESC +# 0xe0 - 0xef +e0: LOOPNE/LOOPNZ Jb (f64) +e1: LOOPE/LOOPZ Jb (f64) +e2: LOOP Jb (f64) +e3: JrCXZ Jb (f64) +e4: IN AL,Ib +e5: IN eAX,Ib +e6: OUT Ib,AL +e7: OUT Ib,eAX +e8: CALL Jz (f64) +e9: JMP-near Jz (f64) +ea: JMP-far Ap (i64) +eb: JMP-short Jb (f64) +ec: IN AL,DX +ed: IN eAX,DX +ee: OUT DX,AL +ef: OUT DX,eAX +# 0xf0 - 0xff +f0: LOCK (Prefix) +f1: +f2: REPNE (Prefix) +f3: REP/REPE (Prefix) +f4: HLT +f5: CMC +f6: Grp3_1 Eb (1A) +f7: Grp3_2 Ev (1A) +f8: CLC +f9: STC +fa: CLI +fb: STI +fc: CLD +fd: STD +fe: Grp4 (1A) +ff: Grp5 (1A) +EndTable + +Table: 2-byte opcode # First Byte is 0x0f +Referrer: 2-byte escape +# 0x0f 0x00-0x0f +00: Grp6 (1A) +01: Grp7 (1A) +02: LAR Gv,Ew +03: LSL Gv,Ew +04: +05: SYSCALL (o64) +06: CLTS +07: SYSRET (o64) +08: INVD +09: WBINVD +0a: +0b: UD2 (1B) +0c: +0d: NOP Ev +0e: +0f: +# 0x0f 0x10-0x1f +10: +11: +12: +13: +14: +15: +16: +17: +18: Grp16 (1A) +19: +1a: +1b: +1c: +1d: +1e: +1f: NOP Ev +# 0x0f 0x20-0x2f +20: MOV Rd,Cd +21: MOV Rd,Dd +22: MOV Cd,Rd +23: MOV Dd,Rd +24: +25: +26: +27: +28: movaps Vps,Wps | movapd Vpd,Wpd (66) +29: movaps Wps,Vps | movapd Wpd,Vpd (66) +2a: +2b: +2c: +2d: +2e: +2f: +# 0x0f 0x30-0x3f +30: WRMSR +31: RDTSC +32: RDMSR +33: RDPMC +34: SYSENTER +35: SYSEXIT +36: +37: GETSEC +38: escape # 3-byte escape 1 +39: +3a: escape # 3-byte escape 2 +3b: +3c: +3d: +3e: +3f: +# 0x0f 0x40-0x4f +40: CMOVO Gv,Ev +41: CMOVNO Gv,Ev +42: CMOVB/C/NAE Gv,Ev +43: CMOVAE/NB/NC Gv,Ev +44: CMOVE/Z Gv,Ev +45: CMOVNE/NZ Gv,Ev +46: CMOVBE/NA Gv,Ev +47: CMOVA/NBE Gv,Ev +48: CMOVS Gv,Ev +49: CMOVNS Gv,Ev +4a: CMOVP/PE Gv,Ev +4b: CMOVNP/PO Gv,Ev +4c: CMOVL/NGE Gv,Ev +4d: CMOVNL/GE Gv,Ev +4e: CMOVLE/NG Gv,Ev +4f: CMOVNLE/G Gv,Ev +# 0x0f 0x50-0x5f +50: +51: +52: +53: +54: +55: +56: +57: +58: +59: +5a: +5b: +5c: +5d: +5e: +5f: +# 0x0f 0x60-0x6f +60: +61: +62: +63: +64: +65: +66: +67: +68: +69: +6a: +6b: +6c: +6d: +6e: +6f: +# 0x0f 0x70-0x7f +70: +71: Grp12 (1A) +72: Grp13 (1A) +73: Grp14 (1A) +74: +75: +76: +77: +78: VMREAD Ed/q,Gd/q +79: VMWRITE Gd/q,Ed/q +7a: +7b: +7c: +7d: +7e: +7f: +# 0x0f 0x80-0x8f +80: JO Jz (f64) +81: JNO Jz (f64) +82: JB/JNAE/JC Jz (f64) +83: JNB/JAE/JNC Jz (f64) +84: JZ/JE Jz (f64) +85: JNZ/JNE Jz (f64) +86: JBE/JNA Jz (f64) +87: JNBE/JA Jz (f64) +88: JS Jz (f64) +89: JNS Jz (f64) +8a: JP/JPE Jz (f64) +8b: JNP/JPO Jz (f64) +8c: JL/JNGE Jz (f64) +8d: JNL/JGE Jz (f64) +8e: JLE/JNG Jz (f64) +8f: JNLE/JG Jz (f64) +# 0x0f 0x90-0x9f +90: SETO Eb +91: SETNO Eb +92: SETB/C/NAE Eb +93: SETAE/NB/NC Eb +94: SETE/Z Eb +95: SETNE/NZ Eb +96: SETBE/NA Eb +97: SETA/NBE Eb +98: SETS Eb +99: SETNS Eb +9a: SETP/PE Eb +9b: SETNP/PO Eb +9c: SETL/NGE Eb +9d: SETNL/GE Eb +9e: SETLE/NG Eb +9f: SETNLE/G Eb +# 0x0f 0xa0-0xaf +a0: PUSH FS (d64) +a1: POP FS (d64) +a2: CPUID +a3: BT Ev,Gv +a4: SHLD Ev,Gv,Ib +a5: SHLD Ev,Gv,CL +a6: +a7: GrpRNG +a8: PUSH GS (d64) +a9: POP GS (d64) +aa: RSM +ab: BTS Ev,Gv +ac: SHRD Ev,Gv,Ib +ad: SHRD Ev,Gv,CL +ae: Grp15 (1A),(1C) +af: IMUL Gv,Ev +# 0x0f 0xb0-0xbf +b0: CMPXCHG Eb,Gb +b1: CMPXCHG Ev,Gv +b2: LSS Gv,Mp +b3: BTR Ev,Gv +b4: LFS Gv,Mp +b5: LGS Gv,Mp +b6: MOVZX Gv,Eb +b7: MOVZX Gv,Ew +b8: JMPE | POPCNT Gv,Ev (F3) +b9: Grp10 (1A) +ba: Grp8 Ev,Ib (1A) +bb: BTC Ev,Gv +bc: BSF Gv,Ev +bd: BSR Gv,Ev +be: MOVSX Gv,Eb +bf: MOVSX Gv,Ew +# 0x0f 0xc0-0xcf +c0: XADD Eb,Gb +c1: XADD Ev,Gv +c2: +c3: movnti Md/q,Gd/q +c4: +c5: +c6: +c7: Grp9 (1A) +c8: BSWAP RAX/EAX/R8/R8D +c9: BSWAP RCX/ECX/R9/R9D +ca: BSWAP RDX/EDX/R10/R10D +cb: BSWAP RBX/EBX/R11/R11D +cc: BSWAP RSP/ESP/R12/R12D +cd: BSWAP RBP/EBP/R13/R13D +ce: BSWAP RSI/ESI/R14/R14D +cf: BSWAP RDI/EDI/R15/R15D +# 0x0f 0xd0-0xdf +d0: +d1: +d2: +d3: +d4: +d5: +d6: +d7: +d8: +d9: +da: +db: +dc: +dd: +de: +df: +# 0x0f 0xe0-0xef +e0: +e1: +e2: +e3: +e4: +e5: +e6: +e7: +e8: +e9: +ea: +eb: +ec: +ed: +ee: +ef: +# 0x0f 0xf0-0xff +f0: +f1: +f2: +f3: +f4: +f5: +f6: +f7: +f8: +f9: +fa: +fb: +fc: +fd: +fe: +ff: +EndTable + +Table: 3-byte opcode 1 +Referrer: 3-byte escape 1 +80: INVEPT Gd/q,Mdq (66) +81: INVPID Gd/q,Mdq (66) +f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) +f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) +EndTable + +Table: 3-byte opcode 2 +Referrer: 3-byte escape 2 +# all opcode is for SSE +EndTable + +GrpTable: Grp1 +0: ADD +1: OR +2: ADC +3: SBB +4: AND +5: SUB +6: XOR +7: CMP +EndTable + +GrpTable: Grp1A +0: POP +EndTable + +GrpTable: Grp2 +0: ROL +1: ROR +2: RCL +3: RCR +4: SHL/SAL +5: SHR +6: +7: SAR +EndTable + +GrpTable: Grp3_1 +0: TEST Eb,Ib +1: +2: NOT Eb +3: NEG Eb +4: MUL AL,Eb +5: IMUL AL,Eb +6: DIV AL,Eb +7: IDIV AL,Eb +EndTable + +GrpTable: Grp3_2 +0: TEST Ev,Iz +1: +2: NOT Ev +3: NEG Ev +4: MUL rAX,Ev +5: IMUL rAX,Ev +6: DIV rAX,Ev +7: IDIV rAX,Ev +EndTable + +GrpTable: Grp4 +0: INC Eb +1: DEC Eb +EndTable + +GrpTable: Grp5 +0: INC Ev +1: DEC Ev +2: CALLN Ev (f64) +3: CALLF Ep +4: JMPN Ev (f64) +5: JMPF Ep +6: PUSH Ev (d64) +7: +EndTable + +GrpTable: Grp6 +0: SLDT Rv/Mw +1: STR Rv/Mw +2: LLDT Ew +3: LTR Ew +4: VERR Ew +5: VERW Ew +EndTable + +GrpTable: Grp7 +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) +3: LIDT Ms +4: SMSW Mw/Rv +5: +6: LMSW Ew +7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) +EndTable + +GrpTable: Grp8 +4: BT +5: BTS +6: BTR +7: BTC +EndTable + +GrpTable: Grp9 +1: CMPXCHG8B/16B Mq/Mdq +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) +7: VMPTRST Mq +EndTable + +GrpTable: Grp10 +EndTable + +GrpTable: Grp11 +0: MOV +EndTable + +GrpTable: Grp12 +EndTable + +GrpTable: Grp13 +EndTable + +GrpTable: Grp14 +EndTable + +GrpTable: Grp15 +0: fxsave +1: fxstor +2: ldmxcsr +3: stmxcsr +4: XSAVE +5: XRSTOR | lfence (11B) +6: mfence (11B) +7: clflush | sfence (11B) +EndTable + +GrpTable: Grp16 +0: prefetch NTA +1: prefetch T0 +2: prefetch T1 +3: prefetch T2 +EndTable + +GrpTable: GrpRNG +0: xstore-rng +1: xcrypt-ecb +2: xcrypt-cbc +4: xcrypt-cfb +5: xcrypt-ofb +EndTable -- cgit v1.2.2 From f12b4f546b4e327d5620a544a2bddab68de66027 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 8 Sep 2009 12:32:46 -0400 Subject: x86: Add MMX support for instruction decoder Add MMX/SSE instructions to x86 opcode maps, since some of those instructions are used in the kernel. This also fixes failures in the x86 instruction decoder seftest. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: H. Peter Anvin Cc: Sam Ravnborg Cc: Frederic Weisbecker Cc: Ingo Molnar LKML-Reference: <20090908163246.23516.78835.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker --- arch/x86/lib/x86-opcode-map.txt | 307 ++++++++++++++++++++++++++-------------- 1 file changed, 200 insertions(+), 107 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 083dd59dd74b..59e20d5c2a52 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -310,14 +310,14 @@ Referrer: 2-byte escape 0e: 0f: # 0x0f 0x10-0x1f -10: -11: -12: -13: -14: -15: -16: -17: +10: movups Vps,Wps | movss Vss,Wss (F3) | movupd Vpd,Wpd (66) | movsd Vsd,Wsd (F2) +11: movups Wps,Vps | movss Wss,Vss (F3) | movupd Wpd,Vpd (66) | movsd Wsd,Vsd (F2) +12: movlps Vq,Mq | movlpd Vq,Mq (66) | movhlps Vq,Uq | movddup Vq,Wq (F2) | movsldup Vq,Wq (F3) +13: mpvlps Mq,Vq | movlpd Mq,Vq (66) +14: unpcklps Vps,Wq | unpcklpd Vpd,Wq (66) +15: unpckhps Vps,Wq | unpckhpd Vpd,Wq (66) +16: movhps Vq,Mq | movhpd Vq,Mq (66) | movlsps Vq,Uq | movshdup Vq,Wq (F3) +17: movhps Mq,Vq | movhpd Mq,Vq (66) 18: Grp16 (1A) 19: 1a: @@ -337,12 +337,12 @@ Referrer: 2-byte escape 27: 28: movaps Vps,Wps | movapd Vpd,Wpd (66) 29: movaps Wps,Vps | movapd Wpd,Vpd (66) -2a: -2b: -2c: -2d: -2e: -2f: +2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2) +2b: movntps Mps,Vps | movntpd Mpd,Vpd (66) +2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2) +2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2) +2e: ucomiss Vss,Wss | ucomisd Vsd,Wsd (66) +2f: comiss Vss,Wss | comisd Vsd,Wsd (66) # 0x0f 0x30-0x3f 30: WRMSR 31: RDTSC @@ -378,56 +378,56 @@ Referrer: 2-byte escape 4e: CMOVLE/NG Gv,Ev 4f: CMOVNLE/G Gv,Ev # 0x0f 0x50-0x5f -50: -51: -52: -53: -54: -55: -56: -57: -58: -59: -5a: -5b: -5c: -5d: -5e: -5f: +50: movmskps Gd/q,Ups | movmskpd Gd/q,Upd (66) +51: sqrtps Vps,Wps | sqrtss Vss,Wss (F3) | sqrtpd Vpd,Wpd (66) | sqrtsd Vsd,Wsd (F2) +52: rsqrtps Vps,Wps | rsqrtss Vss,Wss (F3) +53: rcpps Vps,Wps | rcpss Vss,Wss (F3) +54: andps Vps,Wps | andpd Vpd,Wpd (66) +55: andnps Vps,Wps | andnpd Vpd,Wpd (66) +56: orps Vps,Wps | orpd Vpd,Wpd (66) +57: xorps Vps,Wps | xorpd Vpd,Wpd (66) +58: addps Vps,Wps | addss Vss,Wss (F3) | addpd Vpd,Wpd (66) | addsd Vsd,Wsd (F2) +59: mulps Vps,Wps | mulss Vss,Wss (F3) | mulpd Vpd,Wpd (66) | mulsd Vsd,Wsd (F2) +5a: cvtps2pd Vpd,Wps | cvtss2sd Vsd,Wss (F3) | cvtpd2ps Vps,Wpd (66) | cvtsd2ss Vsd,Wsd (F2) +5b: cvtdq2ps Vps,Wdq | cvtps2dq Vdq,Wps (66) | cvttps2dq Vdq,Wps (F3) +5c: subps Vps,Wps | subss Vss,Wss (F3) | subpd Vpd,Wpd (66) | subsd Vsd,Wsd (F2) +5d: minps Vps,Wps | minss Vss,Wss (F3) | minpd Vpd,Wpd (66) | minsd Vsd,Wsd (F2) +5e: divps Vps,Wps | divss Vss,Wss (F3) | divpd Vpd,Wpd (66) | divsd Vsd,Wsd (F2) +5f: maxps Vps,Wps | maxss Vss,Wss (F3) | maxpd Vpd,Wpd (66) | maxsd Vsd,Wsd (F2) # 0x0f 0x60-0x6f -60: -61: -62: -63: -64: -65: -66: -67: -68: -69: -6a: -6b: -6c: -6d: -6e: -6f: +60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66) +61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66) +62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66) +63: packsswb Pq,Qq | packsswb Vdq,Wdq (66) +64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66) +65: pcmpgtw Pq,Qq | pcmpgtw(66) Vdq,Wdq +66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66) +67: packuswb Pq,Qq | packuswb(66) Vdq,Wdq +68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66) +69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66) +6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66) +6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66) +6c: punpcklqdq Vdq,Wdq (66) +6d: punpckhqdq Vdq,Wdq (66) +6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66) +6f: movq Pq,Qq | movdqa Vdq,Wdq (66) | movdqu Vdq,Wdq (F3) # 0x0f 0x70-0x7f -70: +70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66) | pshufhw Vdq,Wdq,Ib (F3) | pshuflw VdqWdq,Ib (F2) 71: Grp12 (1A) 72: Grp13 (1A) 73: Grp14 (1A) -74: -75: -76: -77: +74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66) +75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66) +76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66) +77: emms 78: VMREAD Ed/q,Gd/q 79: VMWRITE Gd/q,Ed/q 7a: 7b: -7c: -7d: -7e: -7f: +7c: haddps(F2) Vps,Wps | haddpd(66) Vpd,Wpd +7d: hsubps(F2) Vps,Wps | hsubpd(66) Vpd,Wpd +7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66) | movq Vq,Wq (F3) +7f: movq Qq,Pq | movdqa Wdq,Vdq (66) | movdqu Wdq,Vdq (F3) # 0x0f 0x80-0x8f 80: JO Jz (f64) 81: JNO Jz (f64) @@ -499,11 +499,11 @@ bf: MOVSX Gv,Ew # 0x0f 0xc0-0xcf c0: XADD Eb,Gb c1: XADD Ev,Gv -c2: +c2: cmpps Vps,Wps,Ib | cmpss Vss,Wss,Ib (F3) | cmppd Vpd,Wpd,Ib (66) | cmpsd Vsd,Wsd,Ib (F2) c3: movnti Md/q,Gd/q -c4: -c5: -c6: +c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66) +c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66) +c6: shufps Vps,Wps,Ib | shufpd Vpd,Wpd,Ib (66) c7: Grp9 (1A) c8: BSWAP RAX/EAX/R8/R8D c9: BSWAP RCX/ECX/R9/R9D @@ -514,60 +514,131 @@ cd: BSWAP RBP/EBP/R13/R13D ce: BSWAP RSI/ESI/R14/R14D cf: BSWAP RDI/EDI/R15/R15D # 0x0f 0xd0-0xdf -d0: -d1: -d2: -d3: -d4: -d5: -d6: -d7: -d8: -d9: -da: -db: -dc: -dd: -de: -df: +d0: addsubps Vps,Wps (F2) | addsubpd Vpd,Wpd (66) +d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66) +d2: psrld Pq,Qq | psrld Vdq,Wdq (66) +d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66) +d4: paddq Pq,Qq | paddq Vdq,Wdq (66) +d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66) +d6: movq Wq,Vq (66) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66) +d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66) +d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66) +da: pminub Pq,Qq | pminub Vdq,Wdq (66) +db: pand Pq,Qq | pand Vdq,Wdq (66) +dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66) +dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66) +de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66) +df: pandn Pq,Qq | pandn Vdq,Wdq (66) # 0x0f 0xe0-0xef -e0: -e1: -e2: -e3: -e4: -e5: -e6: -e7: -e8: -e9: -ea: -eb: -ec: -ed: -ee: -ef: +e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66) +e1: psraw Pq,Qq | psraw Vdq,Wdq (66) +e2: psrad Pq,Qq | psrad Vdq,Wdq (66) +e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66) +e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66) +e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66) +e6: cvtpd2dq Vdq,Wpd (F2) | cvttpd2dq Vdq,Wpd (66) | cvtdq2pd Vpd,Wdq (F3) +e7: movntq Mq,Pq | movntdq Mdq,Vdq (66) +e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66) +e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66) +ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66) +eb: por Pq,Qq | por Vdq,Wdq (66) +ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66) +ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66) +ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66) +ef: pxor Pq,Qq | pxor Vdq,Wdq (66) # 0x0f 0xf0-0xff -f0: -f1: -f2: -f3: -f4: -f5: -f6: -f7: -f8: -f9: -fa: -fb: -fc: -fd: -fe: +f0: lddqu Vdq,Mdq (F2) +f1: psllw Pq,Qq | psllw Vdq,Wdq (66) +f2: pslld Pq,Qq | pslld Vdq,Wdq (66) +f3: psllq Pq,Qq | psllq Vdq,Wdq (66) +f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66) +f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66) +f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66) +f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66) +f8: psubb Pq,Qq | psubb Vdq,Wdq (66) +f9: psubw Pq,Qq | psubw Vdq,Wdq (66) +fa: psubd Pq,Qq | psubd Vdq,Wdq (66) +fb: psubq Pq,Qq | psubq Vdq,Wdq (66) +fc: paddb Pq,Qq | paddb Vdq,Wdq (66) +fd: paddw Pq,Qq | paddw Vdq,Wdq (66) +fe: paddd Pq,Qq | paddd Vdq,Wdq (66) ff: EndTable Table: 3-byte opcode 1 Referrer: 3-byte escape 1 +# 0x0f 0x38 0x00-0x0f +00: pshufb Pq,Qq | pshufb Vdq,Wdq (66) +01: phaddw Pq,Qq | phaddw Vdq,Wdq (66) +02: phaddd Pq,Qq | phaddd Vdq,Wdq (66) +03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66) +04: pmaddubsw Pq,Qq | pmaddubsw (66)Vdq,Wdq +05: phsubw Pq,Qq | phsubw Vdq,Wdq (66) +06: phsubd Pq,Qq | phsubd Vdq,Wdq (66) +07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66) +08: psignb Pq,Qq | psignb Vdq,Wdq (66) +09: psignw Pq,Qq | psignw Vdq,Wdq (66) +0a: psignd Pq,Qq | psignd Vdq,Wdq (66) +0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66) +0c: +0d: +0e: +0f: +# 0x0f 0x38 0x10-0x1f +10: pblendvb Vdq,Wdq (66) +11: +12: +13: +14: blendvps Vdq,Wdq (66) +15: blendvpd Vdq,Wdq (66) +16: +17: ptest Vdq,Wdq (66) +18: +19: +1a: +1b: +1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66) +1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66) +1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66) +1f: +# 0x0f 0x38 0x20-0x2f +20: pmovsxbw Vdq,Udq/Mq (66) +21: pmovsxbd Vdq,Udq/Md (66) +22: pmovsxbq Vdq,Udq/Mw (66) +23: pmovsxwd Vdq,Udq/Mq (66) +24: pmovsxwq Vdq,Udq/Md (66) +25: pmovsxdq Vdq,Udq/Mq (66) +26: +27: +28: pmuldq Vdq,Wdq (66) +29: pcmpeqq Vdq,Wdq (66) +2a: movntdqa Vdq,Mdq (66) +2b: packusdw Vdq,Wdq (66) +2c: +2d: +2e: +2f: +# 0x0f 0x38 0x30-0x3f +30: pmovzxbw Vdq,Udq/Mq (66) +31: pmovzxbd Vdq,Udq/Md (66) +32: pmovzxbq Vdq,Udq/Mw (66) +33: pmovzxwd Vdq,Udq/Mq (66) +34: pmovzxwq Vdq,Udq/Md (66) +35: pmovzxdq Vdq,Udq/Mq (66) +36: +37: pcmpgtq Vdq,Wdq (66) +38: pminsb Vdq,Wdq (66) +39: pminsd Vdq,Wdq (66) +3a: pminuw Vdq,Wdq (66) +3b: pminud Vdq,Wdq (66) +3c: pmaxsb Vdq,Wdq (66) +3d: pmaxsd Vdq,Wdq (66) +3e: pmaxuw Vdq,Wdq (66) +3f: pmaxud Vdq,Wdq (66) +# 0x0f 0x38 0x4f-0xff +40: pmulld Vdq,Wdq (66) +41: phminposuw Vdq,Wdq (66) 80: INVEPT Gd/q,Mdq (66) 81: INVPID Gd/q,Mdq (66) f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) @@ -576,7 +647,29 @@ EndTable Table: 3-byte opcode 2 Referrer: 3-byte escape 2 -# all opcode is for SSE +# 0x0f 0x3a 0x00-0xff +08: roundps Vdq,Wdq,Ib (66) +09: roundpd Vdq,Wdq,Ib (66) +0a: roundss Vss,Wss,Ib (66) +0b: roundsd Vsd,Wsd,Ib (66) +0c: blendps Vdq,Wdq,Ib (66) +0d: blendpd Vdq,Wdq,Ib (66) +0e: pblendw Vdq,Wdq,Ib (66) +0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66) +14: pextrb Rd/Mb,Vdq,Ib (66) +15: pextrw Rd/Mw,Vdq,Ib (66) +16: pextrd/pextrq Ed/q,Vdq,Ib (66) +17: extractps Ed,Vdq,Ib (66) +20: pinsrb Vdq,Rd/q/Mb,Ib (66) +21: insertps Vdq,Udq/Md,Ib (66) +22: pinsrd/pinsrq Vdq,Ed/q,Ib (66) +40: dpps Vdq,Wdq,Ib (66) +41: dppd Vdq,Wdq,Ib (66) +42: mpsadbw Vdq,Wdq,Ib (66) +60: pcmpestrm Vdq,Wdq,Ib (66) +61: pcmpestri Vdq,Wdq,Ib (66) +62: pcmpistrm Vdq,Wdq,Ib (66) +63: pcmpistri Vdq,Wdq,Ib (66) EndTable GrpTable: Grp1 -- cgit v1.2.2 From b8a4754147d61f5359a765a3afd3eb03012aa052 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 30 Jul 2009 11:10:02 +0200 Subject: x86, msr: Unify rdmsr_on_cpus/wrmsr_on_cpus Since rdmsr_on_cpus and wrmsr_on_cpus are almost identical, unify them into a common __rwmsr_on_cpus helper thus avoiding code duplication. While at it, convert cpumask_t's to const struct cpumask *. Signed-off-by: Borislav Petkov Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/msr.c | 46 +++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 33a1e3ca22d8..41628b104b9e 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -71,14 +71,9 @@ int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) } EXPORT_SYMBOL(wrmsr_on_cpu); -/* rdmsr on a bunch of CPUs - * - * @mask: which CPUs - * @msr_no: which MSR - * @msrs: array of MSR values - * - */ -void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) +static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, + struct msr *msrs, + void (*msr_func) (void *info)) { struct msr_info rv; int this_cpu; @@ -92,11 +87,23 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) this_cpu = get_cpu(); if (cpumask_test_cpu(this_cpu, mask)) - __rdmsr_on_cpu(&rv); + msr_func(&rv); - smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1); + smp_call_function_many(mask, msr_func, &rv, 1); put_cpu(); } + +/* rdmsr on a bunch of CPUs + * + * @mask: which CPUs + * @msr_no: which MSR + * @msrs: array of MSR values + * + */ +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +{ + __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); +} EXPORT_SYMBOL(rdmsr_on_cpus); /* @@ -107,24 +114,9 @@ EXPORT_SYMBOL(rdmsr_on_cpus); * @msrs: array of MSR values * */ -void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) { - struct msr_info rv; - int this_cpu; - - memset(&rv, 0, sizeof(rv)); - - rv.off = cpumask_first(mask); - rv.msrs = msrs; - rv.msr_no = msr_no; - - this_cpu = get_cpu(); - - if (cpumask_test_cpu(this_cpu, mask)) - __wrmsr_on_cpu(&rv); - - smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1); - put_cpu(); + __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); } EXPORT_SYMBOL(wrmsr_on_cpus); -- cgit v1.2.2 From 9f0cf4adb6aa0bfccf675c938124e68f7f06349d Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 26 Sep 2009 14:33:01 +0200 Subject: x86: Use __builtin_object_size() to validate the buffer size for copy_from_user() gcc (4.x) supports the __builtin_object_size() builtin, which reports the size of an object that a pointer point to, when known at compile time. If the buffer size is not known at compile time, a constant -1 is returned. This patch uses this feature to add a sanity check to copy_from_user(); if the target buffer is known to be smaller than the copy size, the copy is aborted and a WARNing is emitted in memory debug mode. These extra checks compile away when the object size is not known, or if both the buffer size and the copy length are constants. Signed-off-by: Arjan van de Ven LKML-Reference: <20090926143301.2c396b94@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/lib/copy_user_64.S | 4 ++-- arch/x86/lib/usercopy_32.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 6ba0f7bb85ea..4be3c415b3e9 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -78,7 +78,7 @@ ENTRY(copy_to_user) ENDPROC(copy_to_user) /* Standard copy_from_user with segment limit checking */ -ENTRY(copy_from_user) +ENTRY(_copy_from_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rsi,%rcx @@ -88,7 +88,7 @@ ENTRY(copy_from_user) jae bad_from_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENDPROC(copy_from_user) +ENDPROC(_copy_from_user) ENTRY(copy_user_generic) CFI_STARTPROC diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1f118d462acc..8498684e45b0 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -874,7 +874,7 @@ EXPORT_SYMBOL(copy_to_user); * data to the requested size using zero bytes. */ unsigned long -copy_from_user(void *to, const void __user *from, unsigned long n) +_copy_from_user(void *to, const void __user *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) n = __copy_from_user(to, from, n); @@ -882,4 +882,4 @@ copy_from_user(void *to, const void __user *from, unsigned long n) memset(to, 0, n); return n; } -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(_copy_from_user); -- cgit v1.2.2 From 4a3127693001c61a21d1ce680db6340623f52e93 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 30 Sep 2009 13:05:23 +0200 Subject: x86: Turn the copy_from_user check into an (optional) compile time warning A previous patch added the buffer size check to copy_from_user(). One of the things learned from analyzing the result of the previous patch is that in general, gcc is really good at proving that the code contains sufficient security checks to not need to do a runtime check. But that for those cases where gcc could not prove this, there was a relatively high percentage of real security issues. This patch turns the case of "gcc cannot prove" into a compile time warning, as long as a sufficiently new gcc is in use that supports this. The objective is that these warnings will trigger developers checking new cases out before a security hole enters a linux kernel release. Signed-off-by: Arjan van de Ven Cc: Linus Torvalds Cc: "David S. Miller" Cc: James Morris Cc: Jan Beulich LKML-Reference: <20090930130523.348ae6c4@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/lib/usercopy_32.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 8498684e45b0..e218d5df85ff 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -883,3 +883,9 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) return n; } EXPORT_SYMBOL(_copy_from_user); + +void copy_from_user_overflow(void) +{ + WARN(1, "Buffer overflow detected!\n"); +} +EXPORT_SYMBOL(copy_from_user_overflow); -- cgit v1.2.2 From c0b11d3af164947c71e2491912c5b8418900dafb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 25 Sep 2009 11:20:38 -0700 Subject: x86: Add VIA processor instructions in opcodes decoder Add VIA processor's Padlock instructions(MONTMUL, XSHA1, XSHA256) as parts of the kernel may use them. This fixes the following crash in opcodes decoder selftests: make[2]: `scripts/unifdef' is up to date. TEST posttest Error: c145cf71: f3 0f a6 d0 repz xsha256 Error: objdump says 4 bytes, but insn_get_length() says 3 (attr:0) make[1]: *** [posttest] Error 2 make: *** [bzImage] Error 2 Reported-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Acked-by: Ingo Molnar Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Tom Zanussi LKML-Reference: <20090925182037.10157.3180.stgit@omoto> Signed-off-by: Frederic Weisbecker --- arch/x86/lib/x86-opcode-map.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 59e20d5c2a52..78a0daf12e15 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -469,7 +469,7 @@ a2: CPUID a3: BT Ev,Gv a4: SHLD Ev,Gv,Ib a5: SHLD Ev,Gv,CL -a6: +a6: GrpPDLK a7: GrpRNG a8: PUSH GS (d64) a9: POP GS (d64) @@ -803,6 +803,12 @@ GrpTable: Grp16 3: prefetch T2 EndTable +GrpTable: GrpPDLK +0: MONTMUL +1: XSHA1 +2: XSHA2 +EndTable + GrpTable: GrpRNG 0: xstore-rng 1: xcrypt-ecb -- cgit v1.2.2 From 8c95bc3e206cff7a55edd2fc5f0e2b305d57903f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 16 Oct 2009 20:07:36 -0400 Subject: x86: Add MMX/SSE opcode groups to opcode map Add missing MMX/SSE opcode groups to x86 opcode map. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20091017000736.16556.29061.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 78a0daf12e15..e7285d8379e3 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -777,12 +777,22 @@ GrpTable: Grp11 EndTable GrpTable: Grp12 +2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B) +4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B) +6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B) EndTable GrpTable: Grp13 +2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B) +4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B) +6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B) EndTable GrpTable: Grp14 +2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B) +3: psrldq Udq,Ib (66),(11B) +6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B) +7: pslldq Udq,Ib (66),(11B) EndTable GrpTable: Grp15 -- cgit v1.2.2 From d1baf5a5a6088e2991b7dbbd370ff200bd6615ce Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 16 Oct 2009 20:07:44 -0400 Subject: x86: Add AMD prefetch and 3DNow! opcodes to opcode map Add AMD prefetch and 3DNow! opcode including FEMMS. Since 3DNow! uses the last immediate byte as an opcode extension byte, x86 insn just treats the extenstion byte as an immediate byte instead of a part of opcode (insn_get_opcode() decodes first "0x0f 0x0f" bytes.) Users who are interested in analyzing 3DNow! opcode still can decode it by analyzing the immediate byte. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20091017000744.16556.27881.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index e7285d8379e3..894497f77808 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -306,9 +306,10 @@ Referrer: 2-byte escape 0a: 0b: UD2 (1B) 0c: -0d: NOP Ev -0e: -0f: +0d: NOP Ev | GrpP +0e: FEMMS +# 3DNow! uses the last imm byte as opcode extension. +0f: 3DNow! Pq,Qq,Ib # 0x0f 0x10-0x1f 10: movups Vps,Wps | movss Vss,Wss (F3) | movupd Vpd,Wpd (66) | movsd Vsd,Wsd (F2) 11: movups Wps,Vps | movss Wss,Vss (F3) | movupd Wpd,Vpd (66) | movsd Wsd,Vsd (F2) @@ -813,6 +814,12 @@ GrpTable: Grp16 3: prefetch T2 EndTable +# AMD's Prefetch Group +GrpTable: GrpP +0: PREFETCH +1: PREFETCHW +EndTable + GrpTable: GrpPDLK 0: MONTMUL 1: XSHA1 -- cgit v1.2.2 From 06ed6ba5ecb771cc3a967838a4bb1d9cbd8786b9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 20 Oct 2009 12:55:24 -0400 Subject: x86: Fix group attribute decoding bug Fix a typo in inat_get_group_attribute() which should refer inat_group_tables, not inat_escape_tables. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Jim Keniston Cc: Frederic Weisbecker LKML-Reference: <20091020165524.4145.97333.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/inat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 054656a01dfd..3fb5998b823e 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c @@ -68,7 +68,7 @@ insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, if (!table) return inat_group_common_attribute(grp_attr); if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { - table = inat_escape_tables[n][m]; + table = inat_group_tables[n][m]; if (!table) return inat_group_common_attribute(grp_attr); } -- cgit v1.2.2 From 9983d60d74db9e544c6cb6f65351849fe8e9c1de Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 20 Oct 2009 12:55:31 -0400 Subject: x86: Add AES opcodes to opcode map Add Intel AES opcodes to x86 opcode map. These opcodes are used in arch/x86/crypt/aesni-intel_asm.S. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Jim Keniston Cc: Frederic Weisbecker LKML-Reference: <20091020165531.4145.21872.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 894497f77808..701c4678687b 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -567,7 +567,7 @@ fe: paddd Pq,Qq | paddd Vdq,Wdq (66) ff: EndTable -Table: 3-byte opcode 1 +Table: 3-byte opcode 1 (0x0f 0x38) Referrer: 3-byte escape 1 # 0x0f 0x38 0x00-0x0f 00: pshufb Pq,Qq | pshufb Vdq,Wdq (66) @@ -642,11 +642,16 @@ Referrer: 3-byte escape 1 41: phminposuw Vdq,Wdq (66) 80: INVEPT Gd/q,Mdq (66) 81: INVPID Gd/q,Mdq (66) +db: aesimc Vdq,Wdq (66) +dc: aesenc Vdq,Wdq (66) +dd: aesenclast Vdq,Wdq (66) +de: aesdec Vdq,Wdq (66) +df: aesdeclast Vdq,Wdq (66) f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) EndTable -Table: 3-byte opcode 2 +Table: 3-byte opcode 2 (0x0f 0x3a) Referrer: 3-byte escape 2 # 0x0f 0x3a 0x00-0xff 08: roundps Vdq,Wdq,Ib (66) @@ -671,6 +676,7 @@ Referrer: 3-byte escape 2 61: pcmpestri Vdq,Wdq,Ib (66) 62: pcmpistrm Vdq,Wdq,Ib (66) 63: pcmpistri Vdq,Wdq,Ib (66) +df: aeskeygenassist Vdq,Wdq,Ib (66) EndTable GrpTable: Grp1 -- cgit v1.2.2 From 7f387d3f2421781610588faa2f49ae5f1737b137 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:04 -0400 Subject: x86: Fix SSE opcode map bug Fix superscripts position because some superscripts of SSE opcode are not put in correct position. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204204.30545.97296.stgit@harusame> Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 701c4678687b..efef3cada84e 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -401,9 +401,9 @@ Referrer: 2-byte escape 62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66) 63: packsswb Pq,Qq | packsswb Vdq,Wdq (66) 64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66) -65: pcmpgtw Pq,Qq | pcmpgtw(66) Vdq,Wdq +65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66) 66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66) -67: packuswb Pq,Qq | packuswb(66) Vdq,Wdq +67: packuswb Pq,Qq | packuswb Vdq,Wdq (66) 68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66) 69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66) 6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66) @@ -425,8 +425,8 @@ Referrer: 2-byte escape 79: VMWRITE Gd/q,Ed/q 7a: 7b: -7c: haddps(F2) Vps,Wps | haddpd(66) Vpd,Wpd -7d: hsubps(F2) Vps,Wps | hsubpd(66) Vpd,Wpd +7c: haddps Vps,Wps (F2) | haddpd Vpd,Wpd (66) +7d: hsubps Vps,Wps (F2) | hsubpd Vpd,Wpd (66) 7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66) | movq Vq,Wq (F3) 7f: movq Qq,Pq | movdqa Wdq,Vdq (66) | movdqu Wdq,Vdq (F3) # 0x0f 0x80-0x8f @@ -574,7 +574,7 @@ Referrer: 3-byte escape 1 01: phaddw Pq,Qq | phaddw Vdq,Wdq (66) 02: phaddd Pq,Qq | phaddd Vdq,Wdq (66) 03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66) -04: pmaddubsw Pq,Qq | pmaddubsw (66)Vdq,Wdq +04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66) 05: phsubw Pq,Qq | phsubw Vdq,Wdq (66) 06: phsubd Pq,Qq | phsubd Vdq,Wdq (66) 07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66) -- cgit v1.2.2 From 04d46c1b13b02e1e5c24eb270a01cf3f94ee4d04 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:11 -0400 Subject: x86: Merge INAT_REXPFX into INAT_PFX_* Merge INAT_REXPFX into INAT_PFX_* macro and rename it to INAT_PFX_REX. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204211.30545.58090.stgit@harusame> Signed-off-by: Ingo Molnar --- arch/x86/lib/insn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index dfd56a30053f..9f483179a8a6 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -69,7 +69,7 @@ void insn_get_prefixes(struct insn *insn) lb = 0; b = peek_next(insn_byte_t, insn); attr = inat_get_opcode_attribute(b); - while (inat_is_prefix(attr)) { + while (inat_is_legacy_prefix(attr)) { /* Skip if same prefix */ for (i = 0; i < nb; i++) if (prefixes->bytes[i] == b) -- cgit v1.2.2 From 82cb57028c864822c5a260f806d051e2ce28c86a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:19 -0400 Subject: x86: Add pclmulq to x86 opcode map Add pclmulq opcode to x86 opcode map. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204219.30545.82039.stgit@harusame> Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index efef3cada84e..1f41246e6e3c 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -672,6 +672,7 @@ Referrer: 3-byte escape 2 40: dpps Vdq,Wdq,Ib (66) 41: dppd Vdq,Wdq,Ib (66) 42: mpsadbw Vdq,Wdq,Ib (66) +44: pclmulq Vdq,Wdq,Ib (66) 60: pcmpestrm Vdq,Wdq,Ib (66) 61: pcmpestri Vdq,Wdq,Ib (66) 62: pcmpistrm Vdq,Wdq,Ib (66) -- cgit v1.2.2 From e0e492e99b372c6990a5daca9e4683c341f1330e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:27 -0400 Subject: x86: AVX instruction set decoder support Add Intel AVX(Advanced Vector Extensions) instruction set support to x86 instruction decoder. This adds insn.vex_prefix field for storing VEX prefixes, and introduces some original tags for expressing opcodes attributes. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204226.30545.23451.stgit@harusame> Signed-off-by: Ingo Molnar --- arch/x86/lib/inat.c | 12 ++ arch/x86/lib/insn.c | 52 +++++ arch/x86/lib/x86-opcode-map.txt | 431 +++++++++++++++++++++------------------- 3 files changed, 289 insertions(+), 206 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 3fb5998b823e..46fc4ee09fc4 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c @@ -76,3 +76,15 @@ insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, inat_group_common_attribute(grp_attr); } +insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, + insn_byte_t vex_p) +{ + const insn_attr_t *table; + if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) + return 0; + table = inat_avx_tables[vex_m][vex_p]; + if (!table) + return 0; + return table[opcode]; +} + diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 9f483179a8a6..9f33b984d0ef 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -28,6 +28,9 @@ #define peek_next(t, insn) \ ({t r; r = *(t*)insn->next_byte; r; }) +#define peek_nbyte_next(t, insn, n) \ + ({t r; r = *(t*)((insn)->next_byte + n); r; }) + /** * insn_init() - initialize struct insn * @insn: &struct insn to be initialized @@ -107,6 +110,7 @@ found: insn->prefixes.bytes[3] = lb; } + /* Decode REX prefix */ if (insn->x86_64) { b = peek_next(insn_byte_t, insn); attr = inat_get_opcode_attribute(b); @@ -120,6 +124,39 @@ found: } } insn->rex_prefix.got = 1; + + /* Decode VEX prefix */ + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_vex_prefix(attr)) { + insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); + if (!insn->x86_64) { + /* + * In 32-bits mode, if the [7:6] bits (mod bits of + * ModRM) on the second byte are not 11b, it is + * LDS or LES. + */ + if (X86_MODRM_MOD(b2) != 3) + goto vex_end; + } + insn->vex_prefix.bytes[0] = b; + insn->vex_prefix.bytes[1] = b2; + if (inat_is_vex3_prefix(attr)) { + b2 = peek_nbyte_next(insn_byte_t, insn, 2); + insn->vex_prefix.bytes[2] = b2; + insn->vex_prefix.nbytes = 3; + insn->next_byte += 3; + if (insn->x86_64 && X86_VEX_W(b2)) + /* VEX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } else { + insn->vex_prefix.nbytes = 2; + insn->next_byte += 2; + } + } +vex_end: + insn->vex_prefix.got = 1; + prefixes->got = 1; return; } @@ -147,6 +184,18 @@ void insn_get_opcode(struct insn *insn) op = get_next(insn_byte_t, insn); opcode->bytes[0] = op; opcode->nbytes = 1; + + /* Check if there is VEX prefix or not */ + if (insn_is_avx(insn)) { + insn_byte_t m, p; + m = insn_vex_m_bits(insn); + p = insn_vex_p_bits(insn); + insn->attr = inat_get_avx_attribute(op, m, p); + if (!inat_accept_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ + goto end; /* VEX has only 1 byte for opcode */ + } + insn->attr = inat_get_opcode_attribute(op); while (inat_is_escape(insn->attr)) { /* Get escaped opcode */ @@ -155,6 +204,9 @@ void insn_get_opcode(struct insn *insn) pfx = insn_last_prefix(insn); insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); } + if (inat_must_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ +end: opcode->got = 1; } diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 1f41246e6e3c..9887bfeeb2db 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -3,6 +3,7 @@ # # Table: table-name # Referrer: escaped-name +# AVXcode: avx-code # opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] # (or) # opcode: escape # escaped-name @@ -13,9 +14,16 @@ # reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] # EndTable # +# AVX Superscripts +# (VEX): this opcode can accept VEX prefix. +# (oVEX): this opcode requires VEX prefix. +# (o128): this opcode only supports 128bit VEX. +# (o256): this opcode only supports 256bit VEX. +# Table: one byte opcode Referrer: +AVXcode: # 0x00 - 0x0f 00: ADD Eb,Gb 01: ADD Ev,Gv @@ -225,8 +233,8 @@ c0: Grp2 Eb,Ib (1A) c1: Grp2 Ev,Ib (1A) c2: RETN Iw (f64) c3: RETN -c4: LES Gz,Mp (i64) -c5: LDS Gz,Mp (i64) +c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) +c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) c6: Grp11 Eb,Ib (1A) c7: Grp11 Ev,Iz (1A) c8: ENTER Iw,Ib @@ -290,8 +298,9 @@ fe: Grp4 (1A) ff: Grp5 (1A) EndTable -Table: 2-byte opcode # First Byte is 0x0f +Table: 2-byte opcode (0x0f) Referrer: 2-byte escape +AVXcode: 1 # 0x0f 0x00-0x0f 00: Grp6 (1A) 01: Grp7 (1A) @@ -311,14 +320,14 @@ Referrer: 2-byte escape # 3DNow! uses the last imm byte as opcode extension. 0f: 3DNow! Pq,Qq,Ib # 0x0f 0x10-0x1f -10: movups Vps,Wps | movss Vss,Wss (F3) | movupd Vpd,Wpd (66) | movsd Vsd,Wsd (F2) -11: movups Wps,Vps | movss Wss,Vss (F3) | movupd Wpd,Vpd (66) | movsd Wsd,Vsd (F2) -12: movlps Vq,Mq | movlpd Vq,Mq (66) | movhlps Vq,Uq | movddup Vq,Wq (F2) | movsldup Vq,Wq (F3) -13: mpvlps Mq,Vq | movlpd Mq,Vq (66) -14: unpcklps Vps,Wq | unpcklpd Vpd,Wq (66) -15: unpckhps Vps,Wq | unpckhpd Vpd,Wq (66) -16: movhps Vq,Mq | movhpd Vq,Mq (66) | movlsps Vq,Uq | movshdup Vq,Wq (F3) -17: movhps Mq,Vq | movhpd Mq,Vq (66) +10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) +11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) +12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) +13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) +14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) +15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) +16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) +17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) 18: Grp16 (1A) 19: 1a: @@ -336,14 +345,14 @@ Referrer: 2-byte escape 25: 26: 27: -28: movaps Vps,Wps | movapd Vpd,Wpd (66) -29: movaps Wps,Vps | movapd Wpd,Vpd (66) -2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2) -2b: movntps Mps,Vps | movntpd Mpd,Vpd (66) -2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2) -2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2) -2e: ucomiss Vss,Wss | ucomisd Vsd,Wsd (66) -2f: comiss Vss,Wss | comisd Vsd,Wsd (66) +28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) +29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) +2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) +2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) +2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) +2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) +2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) +2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) # 0x0f 0x30-0x3f 30: WRMSR 31: RDTSC @@ -379,56 +388,56 @@ Referrer: 2-byte escape 4e: CMOVLE/NG Gv,Ev 4f: CMOVNLE/G Gv,Ev # 0x0f 0x50-0x5f -50: movmskps Gd/q,Ups | movmskpd Gd/q,Upd (66) -51: sqrtps Vps,Wps | sqrtss Vss,Wss (F3) | sqrtpd Vpd,Wpd (66) | sqrtsd Vsd,Wsd (F2) -52: rsqrtps Vps,Wps | rsqrtss Vss,Wss (F3) -53: rcpps Vps,Wps | rcpss Vss,Wss (F3) -54: andps Vps,Wps | andpd Vpd,Wpd (66) -55: andnps Vps,Wps | andnpd Vpd,Wpd (66) -56: orps Vps,Wps | orpd Vpd,Wpd (66) -57: xorps Vps,Wps | xorpd Vpd,Wpd (66) -58: addps Vps,Wps | addss Vss,Wss (F3) | addpd Vpd,Wpd (66) | addsd Vsd,Wsd (F2) -59: mulps Vps,Wps | mulss Vss,Wss (F3) | mulpd Vpd,Wpd (66) | mulsd Vsd,Wsd (F2) -5a: cvtps2pd Vpd,Wps | cvtss2sd Vsd,Wss (F3) | cvtpd2ps Vps,Wpd (66) | cvtsd2ss Vsd,Wsd (F2) -5b: cvtdq2ps Vps,Wdq | cvtps2dq Vdq,Wps (66) | cvttps2dq Vdq,Wps (F3) -5c: subps Vps,Wps | subss Vss,Wss (F3) | subpd Vpd,Wpd (66) | subsd Vsd,Wsd (F2) -5d: minps Vps,Wps | minss Vss,Wss (F3) | minpd Vpd,Wpd (66) | minsd Vsd,Wsd (F2) -5e: divps Vps,Wps | divss Vss,Wss (F3) | divpd Vpd,Wpd (66) | divsd Vsd,Wsd (F2) -5f: maxps Vps,Wps | maxss Vss,Wss (F3) | maxpd Vpd,Wpd (66) | maxsd Vsd,Wsd (F2) +50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) +51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) +52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) +53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) +54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) +55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) +56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) +57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) +58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) +59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) +5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) +5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) +5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) +5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) +5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) +5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) # 0x0f 0x60-0x6f -60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66) -61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66) -62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66) -63: packsswb Pq,Qq | packsswb Vdq,Wdq (66) -64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66) -65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66) -66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66) -67: packuswb Pq,Qq | packuswb Vdq,Wdq (66) -68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66) -69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66) -6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66) -6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66) -6c: punpcklqdq Vdq,Wdq (66) -6d: punpckhqdq Vdq,Wdq (66) -6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66) -6f: movq Pq,Qq | movdqa Vdq,Wdq (66) | movdqu Vdq,Wdq (F3) +60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) +61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) +62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) +63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) +64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) +65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) +66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) +67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) +68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) +69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) +6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) +6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) +6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) +6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) +6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) +6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) # 0x0f 0x70-0x7f -70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66) | pshufhw Vdq,Wdq,Ib (F3) | pshuflw VdqWdq,Ib (F2) +70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) 71: Grp12 (1A) 72: Grp13 (1A) 73: Grp14 (1A) -74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66) -75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66) -76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66) -77: emms +74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) +75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) +76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) +77: emms/vzeroupper/vzeroall (VEX) 78: VMREAD Ed/q,Gd/q 79: VMWRITE Gd/q,Ed/q 7a: 7b: -7c: haddps Vps,Wps (F2) | haddpd Vpd,Wpd (66) -7d: hsubps Vps,Wps (F2) | hsubpd Vpd,Wpd (66) -7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66) | movq Vq,Wq (F3) -7f: movq Qq,Pq | movdqa Wdq,Vdq (66) | movdqu Wdq,Vdq (F3) +7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) +7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) +7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) +7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) # 0x0f 0x80-0x8f 80: JO Jz (f64) 81: JNO Jz (f64) @@ -500,11 +509,11 @@ bf: MOVSX Gv,Ew # 0x0f 0xc0-0xcf c0: XADD Eb,Gb c1: XADD Ev,Gv -c2: cmpps Vps,Wps,Ib | cmpss Vss,Wss,Ib (F3) | cmppd Vpd,Wpd,Ib (66) | cmpsd Vsd,Wsd,Ib (F2) +c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) c3: movnti Md/q,Gd/q -c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66) -c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66) -c6: shufps Vps,Wps,Ib | shufpd Vpd,Wpd,Ib (66) +c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) +c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) +c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) c7: Grp9 (1A) c8: BSWAP RAX/EAX/R8/R8D c9: BSWAP RCX/ECX/R9/R9D @@ -515,77 +524,78 @@ cd: BSWAP RBP/EBP/R13/R13D ce: BSWAP RSI/ESI/R14/R14D cf: BSWAP RDI/EDI/R15/R15D # 0x0f 0xd0-0xdf -d0: addsubps Vps,Wps (F2) | addsubpd Vpd,Wpd (66) -d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66) -d2: psrld Pq,Qq | psrld Vdq,Wdq (66) -d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66) -d4: paddq Pq,Qq | paddq Vdq,Wdq (66) -d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66) -d6: movq Wq,Vq (66) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) -d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66) -d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66) -d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66) -da: pminub Pq,Qq | pminub Vdq,Wdq (66) -db: pand Pq,Qq | pand Vdq,Wdq (66) -dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66) -dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66) -de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66) -df: pandn Pq,Qq | pandn Vdq,Wdq (66) +d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) +d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) +d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) +d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) +d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) +d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) +d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) +d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) +d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) +da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) +db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) +dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) +dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) +de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) +df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) # 0x0f 0xe0-0xef -e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66) -e1: psraw Pq,Qq | psraw Vdq,Wdq (66) -e2: psrad Pq,Qq | psrad Vdq,Wdq (66) -e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66) -e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66) -e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66) -e6: cvtpd2dq Vdq,Wpd (F2) | cvttpd2dq Vdq,Wpd (66) | cvtdq2pd Vpd,Wdq (F3) -e7: movntq Mq,Pq | movntdq Mdq,Vdq (66) -e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66) -e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66) -ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66) -eb: por Pq,Qq | por Vdq,Wdq (66) -ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66) -ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66) -ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66) -ef: pxor Pq,Qq | pxor Vdq,Wdq (66) +e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) +e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) +e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) +e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) +e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) +e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) +e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) +e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) +e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) +e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) +ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) +eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) +ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) +ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) +ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) +ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) # 0x0f 0xf0-0xff -f0: lddqu Vdq,Mdq (F2) -f1: psllw Pq,Qq | psllw Vdq,Wdq (66) -f2: pslld Pq,Qq | pslld Vdq,Wdq (66) -f3: psllq Pq,Qq | psllq Vdq,Wdq (66) -f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66) -f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66) -f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66) -f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66) -f8: psubb Pq,Qq | psubb Vdq,Wdq (66) -f9: psubw Pq,Qq | psubw Vdq,Wdq (66) -fa: psubd Pq,Qq | psubd Vdq,Wdq (66) -fb: psubq Pq,Qq | psubq Vdq,Wdq (66) -fc: paddb Pq,Qq | paddb Vdq,Wdq (66) -fd: paddw Pq,Qq | paddw Vdq,Wdq (66) -fe: paddd Pq,Qq | paddd Vdq,Wdq (66) +f0: lddqu Vdq,Mdq (F2),(VEX) +f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) +f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) +f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) +f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) +f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) +f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) +f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) +f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) +f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) +fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) +fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) +fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) +fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) +fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) ff: EndTable Table: 3-byte opcode 1 (0x0f 0x38) Referrer: 3-byte escape 1 +AVXcode: 2 # 0x0f 0x38 0x00-0x0f -00: pshufb Pq,Qq | pshufb Vdq,Wdq (66) -01: phaddw Pq,Qq | phaddw Vdq,Wdq (66) -02: phaddd Pq,Qq | phaddd Vdq,Wdq (66) -03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66) -04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66) -05: phsubw Pq,Qq | phsubw Vdq,Wdq (66) -06: phsubd Pq,Qq | phsubd Vdq,Wdq (66) -07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66) -08: psignb Pq,Qq | psignb Vdq,Wdq (66) -09: psignw Pq,Qq | psignw Vdq,Wdq (66) -0a: psignd Pq,Qq | psignd Vdq,Wdq (66) -0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66) -0c: -0d: -0e: -0f: +00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) +01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) +02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) +03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) +04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) +05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) +06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) +07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) +08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) +09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) +0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) +0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) +0c: Vpermilps /r (66),(oVEX) +0d: Vpermilpd /r (66),(oVEX) +0e: vtestps /r (66),(oVEX) +0f: vtestpd /r (66),(oVEX) # 0x0f 0x38 0x10-0x1f 10: pblendvb Vdq,Wdq (66) 11: @@ -594,90 +604,99 @@ Referrer: 3-byte escape 1 14: blendvps Vdq,Wdq (66) 15: blendvpd Vdq,Wdq (66) 16: -17: ptest Vdq,Wdq (66) -18: -19: -1a: +17: ptest Vdq,Wdq (66),(VEX) +18: vbroadcastss /r (66),(oVEX) +19: vbroadcastsd /r (66),(oVEX),(o256) +1a: vbroadcastf128 /r (66),(oVEX),(o256) 1b: -1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66) -1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66) -1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66) +1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) +1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) +1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) 1f: # 0x0f 0x38 0x20-0x2f -20: pmovsxbw Vdq,Udq/Mq (66) -21: pmovsxbd Vdq,Udq/Md (66) -22: pmovsxbq Vdq,Udq/Mw (66) -23: pmovsxwd Vdq,Udq/Mq (66) -24: pmovsxwq Vdq,Udq/Md (66) -25: pmovsxdq Vdq,Udq/Mq (66) +20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) +21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) +22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) +23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) +24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) +25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) 26: 27: -28: pmuldq Vdq,Wdq (66) -29: pcmpeqq Vdq,Wdq (66) -2a: movntdqa Vdq,Mdq (66) -2b: packusdw Vdq,Wdq (66) -2c: -2d: -2e: -2f: +28: pmuldq Vdq,Wdq (66),(VEX),(o128) +29: pcmpeqq Vdq,Wdq (66),(VEX),(o128) +2a: movntdqa Vdq,Mdq (66),(VEX),(o128) +2b: packusdw Vdq,Wdq (66),(VEX),(o128) +2c: vmaskmovps(ld) /r (66),(oVEX) +2d: vmaskmovpd(ld) /r (66),(oVEX) +2e: vmaskmovps(st) /r (66),(oVEX) +2f: vmaskmovpd(st) /r (66),(oVEX) # 0x0f 0x38 0x30-0x3f -30: pmovzxbw Vdq,Udq/Mq (66) -31: pmovzxbd Vdq,Udq/Md (66) -32: pmovzxbq Vdq,Udq/Mw (66) -33: pmovzxwd Vdq,Udq/Mq (66) -34: pmovzxwq Vdq,Udq/Md (66) -35: pmovzxdq Vdq,Udq/Mq (66) +30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) +31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) +32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) +33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) +34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) +35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) 36: -37: pcmpgtq Vdq,Wdq (66) -38: pminsb Vdq,Wdq (66) -39: pminsd Vdq,Wdq (66) -3a: pminuw Vdq,Wdq (66) -3b: pminud Vdq,Wdq (66) -3c: pmaxsb Vdq,Wdq (66) -3d: pmaxsd Vdq,Wdq (66) -3e: pmaxuw Vdq,Wdq (66) -3f: pmaxud Vdq,Wdq (66) +37: pcmpgtq Vdq,Wdq (66),(VEX),(o128) +38: pminsb Vdq,Wdq (66),(VEX),(o128) +39: pminsd Vdq,Wdq (66),(VEX),(o128) +3a: pminuw Vdq,Wdq (66),(VEX),(o128) +3b: pminud Vdq,Wdq (66),(VEX),(o128) +3c: pmaxsb Vdq,Wdq (66),(VEX),(o128) +3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) +3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) +3f: pmaxud Vdq,Wdq (66),(VEX),(o128) # 0x0f 0x38 0x4f-0xff -40: pmulld Vdq,Wdq (66) -41: phminposuw Vdq,Wdq (66) +40: pmulld Vdq,Wdq (66),(VEX),(o128) +41: phminposuw Vdq,Wdq (66),(VEX),(o128) 80: INVEPT Gd/q,Mdq (66) 81: INVPID Gd/q,Mdq (66) -db: aesimc Vdq,Wdq (66) -dc: aesenc Vdq,Wdq (66) -dd: aesenclast Vdq,Wdq (66) -de: aesdec Vdq,Wdq (66) -df: aesdeclast Vdq,Wdq (66) +db: aesimc Vdq,Wdq (66),(VEX),(o128) +dc: aesenc Vdq,Wdq (66),(VEX),(o128) +dd: aesenclast Vdq,Wdq (66),(VEX),(o128) +de: aesdec Vdq,Wdq (66),(VEX),(o128) +df: aesdeclast Vdq,Wdq (66),(VEX),(o128) f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) EndTable Table: 3-byte opcode 2 (0x0f 0x3a) Referrer: 3-byte escape 2 +AVXcode: 3 # 0x0f 0x3a 0x00-0xff -08: roundps Vdq,Wdq,Ib (66) -09: roundpd Vdq,Wdq,Ib (66) -0a: roundss Vss,Wss,Ib (66) -0b: roundsd Vsd,Wsd,Ib (66) -0c: blendps Vdq,Wdq,Ib (66) -0d: blendpd Vdq,Wdq,Ib (66) -0e: pblendw Vdq,Wdq,Ib (66) -0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66) -14: pextrb Rd/Mb,Vdq,Ib (66) -15: pextrw Rd/Mw,Vdq,Ib (66) -16: pextrd/pextrq Ed/q,Vdq,Ib (66) -17: extractps Ed,Vdq,Ib (66) -20: pinsrb Vdq,Rd/q/Mb,Ib (66) -21: insertps Vdq,Udq/Md,Ib (66) -22: pinsrd/pinsrq Vdq,Ed/q,Ib (66) -40: dpps Vdq,Wdq,Ib (66) -41: dppd Vdq,Wdq,Ib (66) -42: mpsadbw Vdq,Wdq,Ib (66) -44: pclmulq Vdq,Wdq,Ib (66) -60: pcmpestrm Vdq,Wdq,Ib (66) -61: pcmpestri Vdq,Wdq,Ib (66) -62: pcmpistrm Vdq,Wdq,Ib (66) -63: pcmpistri Vdq,Wdq,Ib (66) -df: aeskeygenassist Vdq,Wdq,Ib (66) +04: vpermilps /r,Ib (66),(oVEX) +05: vpermilpd /r,Ib (66),(oVEX) +06: vperm2f128 /r,Ib (66),(oVEX),(o256) +08: roundps Vdq,Wdq,Ib (66),(VEX) +09: roundpd Vdq,Wdq,Ib (66),(VEX) +0a: roundss Vss,Wss,Ib (66),(VEX),(o128) +0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) +0c: blendps Vdq,Wdq,Ib (66),(VEX) +0d: blendpd Vdq,Wdq,Ib (66),(VEX) +0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) +0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) +14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) +15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) +16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) +17: extractps Ed,Vdq,Ib (66),(VEX),(o128) +18: vinsertf128 /r,Ib (66),(oVEX),(o256) +19: vextractf128 /r,Ib (66),(oVEX),(o256) +20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) +21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) +22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) +40: dpps Vdq,Wdq,Ib (66),(VEX) +41: dppd Vdq,Wdq,Ib (66),(VEX),(o128) +42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) +44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) +4a: vblendvps /r,Ib (66),(oVEX) +4b: vblendvpd /r,Ib (66),(oVEX) +4c: vpblendvb /r,Ib (66),(oVEX),(o128) +60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) +61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) +62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) +63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) +df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) EndTable GrpTable: Grp1 @@ -785,29 +804,29 @@ GrpTable: Grp11 EndTable GrpTable: Grp12 -2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B) -4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B) -6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B) +2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) +4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) +6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) EndTable GrpTable: Grp13 -2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B) -4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B) -6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B) +2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) +4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) +6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) EndTable GrpTable: Grp14 -2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B) -3: psrldq Udq,Ib (66),(11B) -6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B) -7: pslldq Udq,Ib (66),(11B) +2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) +3: psrldq Udq,Ib (66),(11B),(VEX),(o128) +6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) +7: pslldq Udq,Ib (66),(11B),(VEX),(o128) EndTable GrpTable: Grp15 0: fxsave 1: fxstor -2: ldmxcsr -3: stmxcsr +2: ldmxcsr (VEX) +3: stmxcsr (VEX) 4: XSAVE 5: XRSTOR | lfence (11B) 6: mfence (11B) -- cgit v1.2.2 From 3f7e454af1dd8b9cea410d9380d3f71477e94f2b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:35 -0400 Subject: x86: Add Intel FMA instructions to x86 opcode map Add Intel FMA(FUSED-MULTIPLY-ADD) instructions to x86 opcode map for x86 instruction decoder. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204235.30545.33997.stgit@harusame> Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 9887bfeeb2db..a793da5e560e 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -647,11 +647,43 @@ AVXcode: 2 3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) 3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) 3f: pmaxud Vdq,Wdq (66),(VEX),(o128) -# 0x0f 0x38 0x4f-0xff +# 0x0f 0x38 0x40-0x8f 40: pmulld Vdq,Wdq (66),(VEX),(o128) 41: phminposuw Vdq,Wdq (66),(VEX),(o128) 80: INVEPT Gd/q,Mdq (66) 81: INVPID Gd/q,Mdq (66) +# 0x0f 0x38 0x90-0xbf (FMA) +96: vfmaddsub132pd/ps /r (66),(VEX) +97: vfmsubadd132pd/ps /r (66),(VEX) +98: vfmadd132pd/ps /r (66),(VEX) +99: vfmadd132sd/ss /r (66),(VEX),(o128) +9a: vfmsub132pd/ps /r (66),(VEX) +9b: vfmsub132sd/ss /r (66),(VEX),(o128) +9c: vfnmadd132pd/ps /r (66),(VEX) +9d: vfnmadd132sd/ss /r (66),(VEX),(o128) +9e: vfnmsub132pd/ps /r (66),(VEX) +9f: vfnmsub132sd/ss /r (66),(VEX),(o128) +a6: vfmaddsub213pd/ps /r (66),(VEX) +a7: vfmsubadd213pd/ps /r (66),(VEX) +a8: vfmadd213pd/ps /r (66),(VEX) +a9: vfmadd213sd/ss /r (66),(VEX),(o128) +aa: vfmsub213pd/ps /r (66),(VEX) +ab: vfmsub213sd/ss /r (66),(VEX),(o128) +ac: vfnmadd213pd/ps /r (66),(VEX) +ad: vfnmadd213sd/ss /r (66),(VEX),(o128) +ae: vfnmsub213pd/ps /r (66),(VEX) +af: vfnmsub213sd/ss /r (66),(VEX),(o128) +b6: vfmaddsub231pd/ps /r (66),(VEX) +b7: vfmsubadd231pd/ps /r (66),(VEX) +b8: vfmadd231pd/ps /r (66),(VEX) +b9: vfmadd231sd/ss /r (66),(VEX),(o128) +ba: vfmsub231pd/ps /r (66),(VEX) +bb: vfmsub231sd/ss /r (66),(VEX),(o128) +bc: vfnmadd231pd/ps /r (66),(VEX) +bd: vfnmadd231sd/ss /r (66),(VEX),(o128) +be: vfnmsub231pd/ps /r (66),(VEX) +bf: vfnmsub231sd/ss /r (66),(VEX),(o128) +# 0x0f 0x38 0xc0-0xff db: aesimc Vdq,Wdq (66),(VEX),(o128) dc: aesenc Vdq,Wdq (66),(VEX),(o128) dd: aesenclast Vdq,Wdq (66),(VEX),(o128) -- cgit v1.2.2 From 09879b99d44d701c603935ef2549004405d7f8f9 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 4 Nov 2009 12:58:15 +0900 Subject: x86: Gitignore: arch/x86/lib/inat-tables.c Ignore generated file arch/x86/lib/inat-tables.c. Signed-off-by: Hiroshi Shimamoto Acked-by: Masami Hiramatsu LKML-Reference: <4AF0FBD7.7000501@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 arch/x86/lib/.gitignore (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/.gitignore b/arch/x86/lib/.gitignore new file mode 100644 index 000000000000..8df89f0a3fe6 --- /dev/null +++ b/arch/x86/lib/.gitignore @@ -0,0 +1 @@ +inat-tables.c -- cgit v1.2.2 From 14722485830fe6baba738b91d96f06fbd6cf7a18 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 13 Nov 2009 11:56:24 +0000 Subject: x86-64: __copy_from_user_inatomic() adjustments This v2.6.26 commit: ad2fc2c: x86: fix copy_user on x86 rendered __copy_from_user_inatomic() identical to copy_user_generic(), yet didn't make the former just call the latter from an inline function. Furthermore, this v2.6.19 commit: b885808: [PATCH] Add proper sparse __user casts to __copy_to_user_inatomic converted the return type of __copy_to_user_inatomic() from unsigned long to int, but didn't do the same to __copy_from_user_inatomic(). Signed-off-by: Jan Beulich Cc: Linus Torvalds Cc: Alexander Viro Cc: Arjan van de Ven Cc: Andi Kleen Cc: LKML-Reference: <4AFD5778020000780001F8F4@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/copy_user_64.S | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 4be3c415b3e9..39369985f1cb 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -96,12 +96,6 @@ ENTRY(copy_user_generic) CFI_ENDPROC ENDPROC(copy_user_generic) -ENTRY(__copy_from_user_inatomic) - CFI_STARTPROC - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string - CFI_ENDPROC -ENDPROC(__copy_from_user_inatomic) - .section .fixup,"ax" /* must zero dest */ ENTRY(bad_from_user) -- cgit v1.2.2 From 3c93ca00eeeb774c7dd666cc7286a9e90c53e998 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 16 Nov 2009 15:42:18 +0100 Subject: x86: Add missing might_fault() checks to copy_{to,from}_user() On x86-64, copy_[to|from]_user() rely on assembly routines that never call might_fault(), making us missing various lockdep checks. This doesn't apply to __copy_from,to_user() that explicitly handle these calls, neither is it a problem in x86-32 where copy_to,from_user() rely on the "__" prefixed versions that also call might_fault(). Signed-off-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Linus Torvalds Cc: Nick Piggin Cc: Peter Zijlstra LKML-Reference: <1258382538-30979-1-git-send-email-fweisbec@gmail.com> [ v2: fix module export ] Signed-off-by: Ingo Molnar --- arch/x86/lib/copy_user_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 39369985f1cb..cf889d4e076a 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -65,7 +65,7 @@ .endm /* Standard copy_to_user with segment limit checking */ -ENTRY(copy_to_user) +ENTRY(_copy_to_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rdi,%rcx @@ -75,7 +75,7 @@ ENTRY(copy_to_user) jae bad_to_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENDPROC(copy_to_user) +ENDPROC(_copy_to_user) /* Standard copy_from_user with segment limit checking */ ENTRY(_copy_from_user) -- cgit v1.2.2 From cbe5c34c8c1f8ed1afbe6273f4ad57fcfad7822f Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Sun, 6 Dec 2009 20:14:29 +0900 Subject: x86: Compile insn.c and inat.c only for KPROBES At least, insn.c and inat.c is needed for kprobe for now. So, this compile those only if KPROBES is enabled. Signed-off-by: OGAWA Hirofumi Cc: Masami Hiramatsu LKML-Reference: <878wdg8icq.fsf@devron.myhome.or.jp> Signed-off-by: Ingo Molnar --- arch/x86/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index a2d6472895fb..442b3b3b2d80 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -20,7 +20,7 @@ lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o -lib-y += insn.o inat.o +lib-$(CONFIG_KPROBES) += insn.o inat.o obj-y += msr-reg.o msr-reg-export.o -- cgit v1.2.2 From d32ba45503acf9c23b301eba2397ca2ee322627b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 7 Dec 2009 12:00:33 -0500 Subject: x86 insn: Delete empty or incomplete inat-tables.c Delete empty or incomplete inat-tables.c if gen-insn-attr-x86.awk failed, because it causes a build error if user tries to build kernel next time. Reported-by: Arkadiusz Miskiewicz Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Jens Axboe Cc: Frederic Weisbecker LKML-Reference: <20091207170033.19230.37688.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 442b3b3b2d80..45b20e486c2f 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -5,7 +5,7 @@ inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt quiet_cmd_inat_tables = GEN $@ - cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ + cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ $(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) $(call cmd,inat_tables) -- cgit v1.2.2 From 505422517d3f126bb939439e9d15dece94e11d2c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 11 Dec 2009 18:14:40 +0100 Subject: x86, msr: Add support for non-contiguous cpumasks The current rd/wrmsr_on_cpus helpers assume that the supplied cpumasks are contiguous. However, there are machines out there like some K8 multinode Opterons which have a non-contiguous core enumeration on each node (e.g. cores 0,2 on node 0 instead of 0,1), see http://www.gossamer-threads.com/lists/linux/kernel/1160268. This patch fixes out-of-bounds writes (see URL above) by adding per-CPU msr structs which are used on the respective cores. Additionally, two helpers, msrs_{alloc,free}, are provided for use by the callers of the MSR accessors. Cc: H. Peter Anvin Cc: Mauro Carvalho Chehab Cc: Aristeu Rozanski Cc: Randy Dunlap Cc: Doug Thompson Signed-off-by: Borislav Petkov LKML-Reference: <20091211171440.GD31998@aftab> Signed-off-by: H. Peter Anvin --- arch/x86/lib/msr.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 41628b104b9e..872834177937 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -7,7 +7,6 @@ struct msr_info { u32 msr_no; struct msr reg; struct msr *msrs; - int off; int err; }; @@ -18,7 +17,7 @@ static void __rdmsr_on_cpu(void *info) int this_cpu = raw_smp_processor_id(); if (rv->msrs) - reg = &rv->msrs[this_cpu - rv->off]; + reg = per_cpu_ptr(rv->msrs, this_cpu); else reg = &rv->reg; @@ -32,7 +31,7 @@ static void __wrmsr_on_cpu(void *info) int this_cpu = raw_smp_processor_id(); if (rv->msrs) - reg = &rv->msrs[this_cpu - rv->off]; + reg = per_cpu_ptr(rv->msrs, this_cpu); else reg = &rv->reg; @@ -80,7 +79,6 @@ static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, memset(&rv, 0, sizeof(rv)); - rv.off = cpumask_first(mask); rv.msrs = msrs; rv.msr_no = msr_no; @@ -120,6 +118,26 @@ void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) } EXPORT_SYMBOL(wrmsr_on_cpus); +struct msr *msrs_alloc(void) +{ + struct msr *msrs = NULL; + + msrs = alloc_percpu(struct msr); + if (!msrs) { + pr_warning("%s: error allocating msrs\n", __func__); + return NULL; + } + + return msrs; +} +EXPORT_SYMBOL(msrs_alloc); + +void msrs_free(struct msr *msrs) +{ + free_percpu(msrs); +} +EXPORT_SYMBOL(msrs_free); + /* These "safe" variants are slower and should be used when the target MSR may not actually exist. */ static void __rdmsr_safe_on_cpu(void *info) -- cgit v1.2.2 From 6ede31e03084ee084bcee073ef3d1136f68d0906 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 17 Dec 2009 00:16:25 +0100 Subject: x86, msr: msrs_alloc/free for CONFIG_SMP=n Randy Dunlap reported the following build error: "When CONFIG_SMP=n, CONFIG_X86_MSR=m: ERROR: "msrs_free" [drivers/edac/amd64_edac_mod.ko] undefined! ERROR: "msrs_alloc" [drivers/edac/amd64_edac_mod.ko] undefined!" This is due to the fact that is conditioned on CONFIG_SMP and in the UP case we have only the stubs in the header. Fork off SMP functionality into a new file (msr-smp.c) and build msrs_{alloc,free} unconditionally. Reported-by: Randy Dunlap Cc: H. Peter Anvin Signed-off-by: Borislav Petkov LKML-Reference: <20091216231625.GD27228@liondog.tnic> Signed-off-by: H. Peter Anvin --- arch/x86/lib/Makefile | 4 +- arch/x86/lib/msr-smp.c | 204 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/lib/msr.c | 213 ------------------------------------------------- 3 files changed, 206 insertions(+), 215 deletions(-) create mode 100644 arch/x86/lib/msr-smp.c (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index a2d6472895fb..706be8bf967b 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -14,7 +14,7 @@ $(obj)/inat.o: $(obj)/inat-tables.c clean-files := inat-tables.c -obj-$(CONFIG_SMP) := msr.o +obj-$(CONFIG_SMP) += msr-smp.o lib-y := delay.o lib-y += thunk_$(BITS).o @@ -22,7 +22,7 @@ lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o lib-y += insn.o inat.o -obj-y += msr-reg.o msr-reg-export.o +obj-y += msr.o msr-reg.o msr-reg-export.o ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c new file mode 100644 index 000000000000..a6b1b86d2253 --- /dev/null +++ b/arch/x86/lib/msr-smp.c @@ -0,0 +1,204 @@ +#include +#include +#include +#include + +static void __rdmsr_on_cpu(void *info) +{ + struct msr_info *rv = info; + struct msr *reg; + int this_cpu = raw_smp_processor_id(); + + if (rv->msrs) + reg = per_cpu_ptr(rv->msrs, this_cpu); + else + reg = &rv->reg; + + rdmsr(rv->msr_no, reg->l, reg->h); +} + +static void __wrmsr_on_cpu(void *info) +{ + struct msr_info *rv = info; + struct msr *reg; + int this_cpu = raw_smp_processor_id(); + + if (rv->msrs) + reg = per_cpu_ptr(rv->msrs, this_cpu); + else + reg = &rv->reg; + + wrmsr(rv->msr_no, reg->l, reg->h); +} + +int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); + *l = rv.reg.l; + *h = rv.reg.h; + + return err; +} +EXPORT_SYMBOL(rdmsr_on_cpu); + +int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + rv.reg.l = l; + rv.reg.h = h; + err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); + + return err; +} +EXPORT_SYMBOL(wrmsr_on_cpu); + +static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, + struct msr *msrs, + void (*msr_func) (void *info)) +{ + struct msr_info rv; + int this_cpu; + + memset(&rv, 0, sizeof(rv)); + + rv.msrs = msrs; + rv.msr_no = msr_no; + + this_cpu = get_cpu(); + + if (cpumask_test_cpu(this_cpu, mask)) + msr_func(&rv); + + smp_call_function_many(mask, msr_func, &rv, 1); + put_cpu(); +} + +/* rdmsr on a bunch of CPUs + * + * @mask: which CPUs + * @msr_no: which MSR + * @msrs: array of MSR values + * + */ +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +{ + __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); +} +EXPORT_SYMBOL(rdmsr_on_cpus); + +/* + * wrmsr on a bunch of CPUs + * + * @mask: which CPUs + * @msr_no: which MSR + * @msrs: array of MSR values + * + */ +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +{ + __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); +} +EXPORT_SYMBOL(wrmsr_on_cpus); + +/* These "safe" variants are slower and should be used when the target MSR + may not actually exist. */ +static void __rdmsr_safe_on_cpu(void *info) +{ + struct msr_info *rv = info; + + rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h); +} + +static void __wrmsr_safe_on_cpu(void *info) +{ + struct msr_info *rv = info; + + rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h); +} + +int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); + *l = rv.reg.l; + *h = rv.reg.h; + + return err ? err : rv.err; +} +EXPORT_SYMBOL(rdmsr_safe_on_cpu); + +int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + rv.reg.l = l; + rv.reg.h = h; + err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); + + return err ? err : rv.err; +} +EXPORT_SYMBOL(wrmsr_safe_on_cpu); + +/* + * These variants are significantly slower, but allows control over + * the entire 32-bit GPR set. + */ +static void __rdmsr_safe_regs_on_cpu(void *info) +{ + struct msr_regs_info *rv = info; + + rv->err = rdmsr_safe_regs(rv->regs); +} + +static void __wrmsr_safe_regs_on_cpu(void *info) +{ + struct msr_regs_info *rv = info; + + rv->err = wrmsr_safe_regs(rv->regs); +} + +int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) +{ + int err; + struct msr_regs_info rv; + + rv.regs = regs; + rv.err = -EIO; + err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1); + + return err ? err : rv.err; +} +EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); + +int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) +{ + int err; + struct msr_regs_info rv; + + rv.regs = regs; + rv.err = -EIO; + err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1); + + return err ? err : rv.err; +} +EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu); diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 872834177937..8f8eebdca7d4 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -1,123 +1,7 @@ #include #include -#include #include -struct msr_info { - u32 msr_no; - struct msr reg; - struct msr *msrs; - int err; -}; - -static void __rdmsr_on_cpu(void *info) -{ - struct msr_info *rv = info; - struct msr *reg; - int this_cpu = raw_smp_processor_id(); - - if (rv->msrs) - reg = per_cpu_ptr(rv->msrs, this_cpu); - else - reg = &rv->reg; - - rdmsr(rv->msr_no, reg->l, reg->h); -} - -static void __wrmsr_on_cpu(void *info) -{ - struct msr_info *rv = info; - struct msr *reg; - int this_cpu = raw_smp_processor_id(); - - if (rv->msrs) - reg = per_cpu_ptr(rv->msrs, this_cpu); - else - reg = &rv->reg; - - wrmsr(rv->msr_no, reg->l, reg->h); -} - -int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); - *l = rv.reg.l; - *h = rv.reg.h; - - return err; -} -EXPORT_SYMBOL(rdmsr_on_cpu); - -int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - rv.reg.l = l; - rv.reg.h = h; - err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); - - return err; -} -EXPORT_SYMBOL(wrmsr_on_cpu); - -static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, - struct msr *msrs, - void (*msr_func) (void *info)) -{ - struct msr_info rv; - int this_cpu; - - memset(&rv, 0, sizeof(rv)); - - rv.msrs = msrs; - rv.msr_no = msr_no; - - this_cpu = get_cpu(); - - if (cpumask_test_cpu(this_cpu, mask)) - msr_func(&rv); - - smp_call_function_many(mask, msr_func, &rv, 1); - put_cpu(); -} - -/* rdmsr on a bunch of CPUs - * - * @mask: which CPUs - * @msr_no: which MSR - * @msrs: array of MSR values - * - */ -void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) -{ - __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); -} -EXPORT_SYMBOL(rdmsr_on_cpus); - -/* - * wrmsr on a bunch of CPUs - * - * @mask: which CPUs - * @msr_no: which MSR - * @msrs: array of MSR values - * - */ -void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) -{ - __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); -} -EXPORT_SYMBOL(wrmsr_on_cpus); - struct msr *msrs_alloc(void) { struct msr *msrs = NULL; @@ -137,100 +21,3 @@ void msrs_free(struct msr *msrs) free_percpu(msrs); } EXPORT_SYMBOL(msrs_free); - -/* These "safe" variants are slower and should be used when the target MSR - may not actually exist. */ -static void __rdmsr_safe_on_cpu(void *info) -{ - struct msr_info *rv = info; - - rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h); -} - -static void __wrmsr_safe_on_cpu(void *info) -{ - struct msr_info *rv = info; - - rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h); -} - -int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); - *l = rv.reg.l; - *h = rv.reg.h; - - return err ? err : rv.err; -} -EXPORT_SYMBOL(rdmsr_safe_on_cpu); - -int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - rv.reg.l = l; - rv.reg.h = h; - err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); - - return err ? err : rv.err; -} -EXPORT_SYMBOL(wrmsr_safe_on_cpu); - -/* - * These variants are significantly slower, but allows control over - * the entire 32-bit GPR set. - */ -struct msr_regs_info { - u32 *regs; - int err; -}; - -static void __rdmsr_safe_regs_on_cpu(void *info) -{ - struct msr_regs_info *rv = info; - - rv->err = rdmsr_safe_regs(rv->regs); -} - -static void __wrmsr_safe_regs_on_cpu(void *info) -{ - struct msr_regs_info *rv = info; - - rv->err = wrmsr_safe_regs(rv->regs); -} - -int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) -{ - int err; - struct msr_regs_info rv; - - rv.regs = regs; - rv.err = -EIO; - err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1); - - return err ? err : rv.err; -} -EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); - -int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) -{ - int err; - struct msr_regs_info rv; - - rv.regs = regs; - rv.err = -EIO; - err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1); - - return err ? err : rv.err; -} -EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu); -- cgit v1.2.2 From 1b1d9258181bae199dc940f4bd0298126b9a73d9 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 18 Dec 2009 16:12:56 +0000 Subject: x86-64: Modify copy_user_generic() alternatives mechanism In order to avoid unnecessary chains of branches, rather than implementing copy_user_generic() as a function consisting of just a single (possibly patched) branch, instead properly deal with patching call instructions in the alternative instructions framework, and move the patching into the callers. As a follow-on, one could also introduce something like __EXPORT_SYMBOL_ALT() to avoid patching call sites in modules. Signed-off-by: Jan Beulich Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton LKML-Reference: <4B2BB8180200007800026AE7@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/copy_user_64.S | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index cf889d4e076a..71100c98e337 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -90,12 +90,6 @@ ENTRY(_copy_from_user) CFI_ENDPROC ENDPROC(_copy_from_user) -ENTRY(copy_user_generic) - CFI_STARTPROC - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string - CFI_ENDPROC -ENDPROC(copy_user_generic) - .section .fixup,"ax" /* must zero dest */ ENTRY(bad_from_user) -- cgit v1.2.2 From 7269e8812a59f74fb1ce134465d0bcf5683b93a1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 18 Dec 2009 16:16:03 +0000 Subject: x86-64: Modify memcpy()/memset() alternatives mechanism In order to avoid unnecessary chains of branches, rather than implementing memcpy()/memset()'s access to their alternative implementations via a jump, patch the (larger) original function directly. The memcpy() part of this is slightly subtle: while alternative instruction patching does itself use memcpy(), with the replacement block being less than 64-bytes in size the main loop of the original function doesn't get used for copying memcpy_c() over memcpy(), and hence we can safely write over its beginning. Also note that the CFI annotations are fine for both variants of each of the functions. Signed-off-by: Jan Beulich Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton LKML-Reference: <4B2BB8D30200007800026AF2@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/memcpy_64.S | 23 ++++++++--------------- arch/x86/lib/memset_64.S | 18 ++++++------------ 2 files changed, 14 insertions(+), 27 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index ad5441ed1b57..f82e884928af 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -20,12 +20,11 @@ /* * memcpy_c() - fast string ops (REP MOVSQ) based variant. * - * Calls to this get patched into the kernel image via the + * This gets patched over the unrolled variant (below) via the * alternative instructions framework: */ - ALIGN -memcpy_c: - CFI_STARTPROC + .section .altinstr_replacement, "ax", @progbits +.Lmemcpy_c: movq %rdi, %rax movl %edx, %ecx @@ -35,8 +34,8 @@ memcpy_c: movl %edx, %ecx rep movsb ret - CFI_ENDPROC -ENDPROC(memcpy_c) +.Lmemcpy_e: + .previous ENTRY(__memcpy) ENTRY(memcpy) @@ -128,16 +127,10 @@ ENDPROC(__memcpy) * It is also a lot simpler. Use this when possible: */ - .section .altinstr_replacement, "ax" -1: .byte 0xeb /* jmp */ - .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ -2: - .previous - .section .altinstructions, "a" .align 8 .quad memcpy - .quad 1b + .quad .Lmemcpy_c .byte X86_FEATURE_REP_GOOD /* @@ -145,6 +138,6 @@ ENDPROC(__memcpy) * so it is silly to overwrite itself with nops - reboot is the * only outcome... */ - .byte 2b - 1b - .byte 2b - 1b + .byte .Lmemcpy_e - .Lmemcpy_c + .byte .Lmemcpy_e - .Lmemcpy_c .previous diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 2c5948116bd2..e88d3b81644a 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -12,9 +12,8 @@ * * rax original destination */ - ALIGN -memset_c: - CFI_STARTPROC + .section .altinstr_replacement, "ax", @progbits +.Lmemset_c: movq %rdi,%r9 movl %edx,%r8d andl $7,%r8d @@ -29,8 +28,8 @@ memset_c: rep stosb movq %r9,%rax ret - CFI_ENDPROC -ENDPROC(memset_c) +.Lmemset_e: + .previous ENTRY(memset) ENTRY(__memset) @@ -118,16 +117,11 @@ ENDPROC(__memset) #include - .section .altinstr_replacement,"ax" -1: .byte 0xeb /* jmp */ - .byte (memset_c - memset) - (2f - 1b) /* offset */ -2: - .previous .section .altinstructions,"a" .align 8 .quad memset - .quad 1b + .quad .Lmemset_c .byte X86_FEATURE_REP_GOOD .byte .Lfinal - memset - .byte 2b - 1b + .byte .Lmemset_e - .Lmemset_c .previous -- cgit v1.2.2 From bafaecd11df15ad5b1e598adc7736afcd38ee13d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 12 Jan 2010 18:16:42 -0800 Subject: x86-64: support native xadd rwsem implementation This one is much faster than the spinlock based fallback rwsem code, with certain artifical benchmarks having shown 300%+ improvement on threaded page faults etc. Again, note the 32767-thread limit here. So this really does need that whole "make rwsem_count_t be 64-bit and fix the BIAS values to match" extension on top of it, but that is conceptually a totally independent issue. NOT TESTED! The original patch that this all was based on were tested by KAMEZAWA Hiroyuki, but maybe I screwed up something when I created the cleaned-up series, so caveat emptor.. Also note that it _may_ be a good idea to mark some more registers clobbered on x86-64 in the inline asms instead of saving/restoring them. They are inline functions, but they are only used in places where there are not a lot of live registers _anyway_, so doing for example the clobbers of %r8-%r11 in the asm wouldn't make the fast-path code any worse, and would make the slow-path code smaller. (Not that the slow-path really matters to that degree. Saving a few unnecessary registers is the _least_ of our problems when we hit the slow path. The instruction/cycle counting really only matters in the fast path). Signed-off-by: Linus Torvalds LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/lib/Makefile | 1 + arch/x86/lib/rwsem_64.S | 81 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 arch/x86/lib/rwsem_64.S (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index cffd754f3039..c80245131fdc 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -39,4 +39,5 @@ else lib-y += thunk_64.o clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o + lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o endif diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S new file mode 100644 index 000000000000..15acecf0d7aa --- /dev/null +++ b/arch/x86/lib/rwsem_64.S @@ -0,0 +1,81 @@ +/* + * x86-64 rwsem wrappers + * + * This interfaces the inline asm code to the slow-path + * C routines. We need to save the call-clobbered regs + * that the asm does not mark as clobbered, and move the + * argument from %rax to %rdi. + * + * NOTE! We don't need to save %rax, because the functions + * will always return the semaphore pointer in %rax (which + * is also the input argument to these helpers) + * + * The following can clobber %rdx because the asm clobbers it: + * call_rwsem_down_write_failed + * call_rwsem_wake + * but %rdi, %rsi, %rcx, %r8-r11 always need saving. + */ + +#include +#include +#include +#include +#include + +#define save_common_regs \ + pushq %rdi; \ + pushq %rsi; \ + pushq %rcx; \ + pushq %r8; \ + pushq %r9; \ + pushq %r10; \ + pushq %r11 + +#define restore_common_regs \ + popq %r11; \ + popq %r10; \ + popq %r9; \ + popq %r8; \ + popq %rcx; \ + popq %rsi; \ + popq %rdi + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_down_read_failed) + save_common_regs + pushq %rdx + movq %rax,%rdi + call rwsem_down_read_failed + popq %rdx + restore_common_regs + ret + ENDPROC(call_rwsem_down_read_failed) + +ENTRY(call_rwsem_down_write_failed) + save_common_regs + movq %rax,%rdi + call rwsem_down_write_failed + restore_common_regs + ret + ENDPROC(call_rwsem_down_write_failed) + +ENTRY(call_rwsem_wake) + decw %dx /* do nothing if still outstanding active readers */ + jnz 1f + save_common_regs + movq %rax,%rdi + call rwsem_wake + restore_common_regs +1: ret + ENDPROC(call_rwsem_wake) + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_downgrade_wake) + save_common_regs + pushq %rdx + movq %rax,%rdi + call rwsem_downgrade_wake + popq %rdx + restore_common_regs + ret + ENDPROC(call_rwsem_downgrade_wake) -- cgit v1.2.2 From a7b480e7f30b3813353ec009f10f2ac7a6669f3b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 22 Jan 2010 16:01:03 +0100 Subject: x86, lib: Add wbinvd smp helpers Add wbinvd_on_cpu and wbinvd_on_all_cpus stubs for executing wbinvd on a particular CPU. [ hpa: renamed lib/smp.c to lib/cache-smp.c ] [ hpa: wbinvd_on_all_cpus() returns int, but wbinvd() returns void. Thus, the former cannot be a macro for the latter, replace with an inline function. ] Signed-off-by: Borislav Petkov LKML-Reference: <1264172467-25155-2-git-send-email-bp@amd64.org> Signed-off-by: H. Peter Anvin --- arch/x86/lib/Makefile | 2 +- arch/x86/lib/cache-smp.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 arch/x86/lib/cache-smp.c (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index cffd754f3039..d85e0e438b58 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -14,7 +14,7 @@ $(obj)/inat.o: $(obj)/inat-tables.c clean-files := inat-tables.c -obj-$(CONFIG_SMP) += msr-smp.o +obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o lib-y := delay.o lib-y += thunk_$(BITS).o diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c new file mode 100644 index 000000000000..a3c668875038 --- /dev/null +++ b/arch/x86/lib/cache-smp.c @@ -0,0 +1,19 @@ +#include +#include + +static void __wbinvd(void *dummy) +{ + wbinvd(); +} + +void wbinvd_on_cpu(int cpu) +{ + smp_call_function_single(cpu, __wbinvd, NULL, 1); +} +EXPORT_SYMBOL(wbinvd_on_cpu); + +int wbinvd_on_all_cpus(void) +{ + return on_each_cpu(__wbinvd, NULL, 1); +} +EXPORT_SYMBOL(wbinvd_on_all_cpus); -- cgit v1.2.2 From 6175ddf06b6172046a329e3abfd9c901a43efd2e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 5 Feb 2010 09:37:07 -0500 Subject: x86: Clean up mem*io functions. Iomem has no special significance on x86. Use the standard mem* functions instead of trying to call other versions. Some fixups are needed to match the function prototypes. Signed-off-by: Brian Gerst LKML-Reference: <1265380629-3212-6-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/lib/Makefile | 2 +- arch/x86/lib/io_64.c | 25 ------------------------- 2 files changed, 1 insertion(+), 26 deletions(-) delete mode 100644 arch/x86/lib/io_64.c (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index cffd754f3039..fff14272dbad 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -34,7 +34,7 @@ ifneq ($(CONFIG_X86_CMPXCHG64),y) endif lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else - obj-y += io_64.o iomap_copy_64.o + obj-y += iomap_copy_64.o lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o lib-y += thunk_64.o clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o diff --git a/arch/x86/lib/io_64.c b/arch/x86/lib/io_64.c deleted file mode 100644 index 3f1eb59b5f08..000000000000 --- a/arch/x86/lib/io_64.c +++ /dev/null @@ -1,25 +0,0 @@ -#include -#include -#include - -void __memcpy_toio(unsigned long dst, const void *src, unsigned len) -{ - __inline_memcpy((void *)dst, src, len); -} -EXPORT_SYMBOL(__memcpy_toio); - -void __memcpy_fromio(void *dst, unsigned long src, unsigned len) -{ - __inline_memcpy(dst, (const void *)src, len); -} -EXPORT_SYMBOL(__memcpy_fromio); - -void memset_io(volatile void __iomem *a, int b, size_t c) -{ - /* - * TODO: memset can mangle the IO patterns quite a bit. - * perhaps it would be better to use a dumb one: - */ - memset((void *)a, b, c); -} -EXPORT_SYMBOL(memset_io); -- cgit v1.2.2 From a66f6375bdeb64d7a56c532bda7c006358845820 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 4 May 2010 13:42:53 +0100 Subject: Fix the x86_64 implementation of call_rwsem_wait() The x86_64 call_rwsem_wait() treats the active state counter part of the R/W semaphore state as being 16-bit when it's actually 32-bit (it's half of the 64-bit state). It should do "decl %edx" not "decw %dx". Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- arch/x86/lib/rwsem_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S index 15acecf0d7aa..41fcf00e49df 100644 --- a/arch/x86/lib/rwsem_64.S +++ b/arch/x86/lib/rwsem_64.S @@ -60,7 +60,7 @@ ENTRY(call_rwsem_down_write_failed) ENDPROC(call_rwsem_down_write_failed) ENTRY(call_rwsem_wake) - decw %dx /* do nothing if still outstanding active readers */ + decl %edx /* do nothing if still outstanding active readers */ jnz 1f save_common_regs movq %rax,%rdi -- cgit v1.2.2