aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/boot/compressed/eboot.c26
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/aes-i586-asm_32.S15
-rw-r--r--arch/x86/crypto/aes-x86_64-asm_64.S30
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S23
-rw-r--r--arch/x86/crypto/blowfish-x86_64-asm_64.S39
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S38
-rw-r--r--arch/x86/crypto/camellia-x86_64-asm_64.S50
-rw-r--r--arch/x86/crypto/cast5-avx-x86_64-asm_64.S48
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S35
-rw-r--r--arch/x86/crypto/crc32-pclmul_asm.S246
-rw-r--r--arch/x86/crypto/crc32-pclmul_glue.c201
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S8
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S4
-rw-r--r--arch/x86/crypto/salsa20-i586-asm_32.S28
-rw-r--r--arch/x86/crypto/salsa20-x86_64-asm_64.S28
-rw-r--r--arch/x86/crypto/salsa20_glue.c5
-rw-r--r--arch/x86/crypto/serpent-avx-x86_64-asm_64.S35
-rw-r--r--arch/x86/crypto/serpent-sse2-i586-asm_32.S20
-rw-r--r--arch/x86/crypto/serpent-sse2-x86_64-asm_64.S20
-rw-r--r--arch/x86/crypto/sha1_ssse3_asm.S10
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S35
-rw-r--r--arch/x86/crypto/twofish-i586-asm_32.S11
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64-3way.S20
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64.S11
-rw-r--r--arch/x86/ia32/ia32_aout.c6
-rw-r--r--arch/x86/include/asm/efi.h9
-rw-r--r--arch/x86/include/asm/ftrace.h24
-rw-r--r--arch/x86/include/asm/kvm_host.h26
-rw-r--r--arch/x86/include/asm/kvm_para.h2
-rw-r--r--arch/x86/include/asm/pci.h3
-rw-r--r--arch/x86/include/asm/pci_x86.h1
-rw-r--r--arch/x86/include/asm/thread_info.h1
-rw-r--r--arch/x86/include/asm/vmx.h18
-rw-r--r--arch/x86/include/asm/xen/events.h3
-rw-r--r--arch/x86/include/asm/xen/page.h2
-rw-r--r--arch/x86/include/uapi/asm/vmx.h9
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c23
-rw-r--r--arch/x86/kernel/cpuid.c4
-rw-r--r--arch/x86/kernel/dumpstack.c2
-rw-r--r--arch/x86/kernel/head.c53
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/kprobes/core.c8
-rw-r--r--arch/x86/kernel/kvmclock.c11
-rw-r--r--arch/x86/kernel/nmi.c1
-rw-r--r--arch/x86/kernel/pvclock.c2
-rw-r--r--arch/x86/kernel/setup.c3
-rw-r--r--arch/x86/kvm/emulate.c673
-rw-r--r--arch/x86/kvm/i8254.c1
-rw-r--r--arch/x86/kvm/i8259.c2
-rw-r--r--arch/x86/kvm/irq.c74
-rw-r--r--arch/x86/kvm/lapic.c140
-rw-r--r--arch/x86/kvm/lapic.h34
-rw-r--r--arch/x86/kvm/mmu.c194
-rw-r--r--arch/x86/kvm/mmutrace.h6
-rw-r--r--arch/x86/kvm/paging_tmpl.h106
-rw-r--r--arch/x86/kvm/svm.c24
-rw-r--r--arch/x86/kvm/vmx.c714
-rw-r--r--arch/x86/kvm/x86.c168
-rw-r--r--arch/x86/mm/fault.c8
-rw-r--r--arch/x86/mm/numa.c3
-rw-r--r--arch/x86/mm/pageattr.c50
-rw-r--r--arch/x86/pci/acpi.c9
-rw-r--r--arch/x86/pci/common.c1
-rw-r--r--arch/x86/pci/i386.c185
-rw-r--r--arch/x86/pci/legacy.c2
-rw-r--r--arch/x86/pci/numaq_32.c2
-rw-r--r--arch/x86/platform/efi/efi.c10
-rw-r--r--arch/x86/xen/smp.c42
-rw-r--r--arch/x86/xen/spinlock.c1
77 files changed, 2432 insertions, 1234 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6a9383370311..a4f24f5b1218 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -112,6 +112,7 @@ config X86
112 select GENERIC_STRNLEN_USER 112 select GENERIC_STRNLEN_USER
113 select HAVE_CONTEXT_TRACKING if X86_64 113 select HAVE_CONTEXT_TRACKING if X86_64
114 select HAVE_IRQ_TIME_ACCOUNTING 114 select HAVE_IRQ_TIME_ACCOUNTING
115 select HAVE_VIRT_TO_BUS
115 select MODULES_USE_ELF_REL if X86_32 116 select MODULES_USE_ELF_REL if X86_32
116 select MODULES_USE_ELF_RELA if X86_64 117 select MODULES_USE_ELF_RELA if X86_64
117 select CLONE_BACKWARDS if X86_32 118 select CLONE_BACKWARDS if X86_32
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index f8fa41190c35..c205035a6b96 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -19,23 +19,28 @@
19 19
20static efi_system_table_t *sys_table; 20static efi_system_table_t *sys_table;
21 21
22static void efi_char16_printk(efi_char16_t *str)
23{
24 struct efi_simple_text_output_protocol *out;
25
26 out = (struct efi_simple_text_output_protocol *)sys_table->con_out;
27 efi_call_phys2(out->output_string, out, str);
28}
29
22static void efi_printk(char *str) 30static void efi_printk(char *str)
23{ 31{
24 char *s8; 32 char *s8;
25 33
26 for (s8 = str; *s8; s8++) { 34 for (s8 = str; *s8; s8++) {
27 struct efi_simple_text_output_protocol *out;
28 efi_char16_t ch[2] = { 0 }; 35 efi_char16_t ch[2] = { 0 };
29 36
30 ch[0] = *s8; 37 ch[0] = *s8;
31 out = (struct efi_simple_text_output_protocol *)sys_table->con_out;
32
33 if (*s8 == '\n') { 38 if (*s8 == '\n') {
34 efi_char16_t nl[2] = { '\r', 0 }; 39 efi_char16_t nl[2] = { '\r', 0 };
35 efi_call_phys2(out->output_string, out, nl); 40 efi_char16_printk(nl);
36 } 41 }
37 42
38 efi_call_phys2(out->output_string, out, ch); 43 efi_char16_printk(ch);
39 } 44 }
40} 45}
41 46
@@ -709,7 +714,12 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
709 if ((u8 *)p >= (u8 *)filename_16 + sizeof(filename_16)) 714 if ((u8 *)p >= (u8 *)filename_16 + sizeof(filename_16))
710 break; 715 break;
711 716
712 *p++ = *str++; 717 if (*str == '/') {
718 *p++ = '\\';
719 *str++;
720 } else {
721 *p++ = *str++;
722 }
713 } 723 }
714 724
715 *p = '\0'; 725 *p = '\0';
@@ -737,7 +747,9 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
737 status = efi_call_phys5(fh->open, fh, &h, filename_16, 747 status = efi_call_phys5(fh->open, fh, &h, filename_16,
738 EFI_FILE_MODE_READ, (u64)0); 748 EFI_FILE_MODE_READ, (u64)0);
739 if (status != EFI_SUCCESS) { 749 if (status != EFI_SUCCESS) {
740 efi_printk("Failed to open initrd file\n"); 750 efi_printk("Failed to open initrd file: ");
751 efi_char16_printk(filename_16);
752 efi_printk("\n");
741 goto close_handles; 753 goto close_handles;
742 } 754 }
743 755
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index e0ca7c9ac383..63947a8f9f0f 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
27 27
28obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o 28obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
29obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o 29obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
30obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
30 31
31aes-i586-y := aes-i586-asm_32.o aes_glue.o 32aes-i586-y := aes-i586-asm_32.o aes_glue.o
32twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o 33twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
@@ -52,3 +53,4 @@ ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
52sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o 53sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
53crc32c-intel-y := crc32c-intel_glue.o 54crc32c-intel-y := crc32c-intel_glue.o
54crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o 55crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o
56crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S
index b949ec2f9af4..2849dbc59e11 100644
--- a/arch/x86/crypto/aes-i586-asm_32.S
+++ b/arch/x86/crypto/aes-i586-asm_32.S
@@ -36,6 +36,7 @@
36.file "aes-i586-asm.S" 36.file "aes-i586-asm.S"
37.text 37.text
38 38
39#include <linux/linkage.h>
39#include <asm/asm-offsets.h> 40#include <asm/asm-offsets.h>
40 41
41#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) 42#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
@@ -219,14 +220,10 @@
219// AES (Rijndael) Encryption Subroutine 220// AES (Rijndael) Encryption Subroutine
220/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ 221/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
221 222
222.global aes_enc_blk
223
224.extern crypto_ft_tab 223.extern crypto_ft_tab
225.extern crypto_fl_tab 224.extern crypto_fl_tab
226 225
227.align 4 226ENTRY(aes_enc_blk)
228
229aes_enc_blk:
230 push %ebp 227 push %ebp
231 mov ctx(%esp),%ebp 228 mov ctx(%esp),%ebp
232 229
@@ -290,18 +287,15 @@ aes_enc_blk:
290 mov %r0,(%ebp) 287 mov %r0,(%ebp)
291 pop %ebp 288 pop %ebp
292 ret 289 ret
290ENDPROC(aes_enc_blk)
293 291
294// AES (Rijndael) Decryption Subroutine 292// AES (Rijndael) Decryption Subroutine
295/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ 293/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
296 294
297.global aes_dec_blk
298
299.extern crypto_it_tab 295.extern crypto_it_tab
300.extern crypto_il_tab 296.extern crypto_il_tab
301 297
302.align 4 298ENTRY(aes_dec_blk)
303
304aes_dec_blk:
305 push %ebp 299 push %ebp
306 mov ctx(%esp),%ebp 300 mov ctx(%esp),%ebp
307 301
@@ -365,3 +359,4 @@ aes_dec_blk:
365 mov %r0,(%ebp) 359 mov %r0,(%ebp)
366 pop %ebp 360 pop %ebp
367 ret 361 ret
362ENDPROC(aes_dec_blk)
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
index 5b577d5a059b..910565547163 100644
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -15,6 +15,7 @@
15 15
16.text 16.text
17 17
18#include <linux/linkage.h>
18#include <asm/asm-offsets.h> 19#include <asm/asm-offsets.h>
19 20
20#define R1 %rax 21#define R1 %rax
@@ -49,10 +50,8 @@
49#define R11 %r11 50#define R11 %r11
50 51
51#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ 52#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
52 .global FUNC; \ 53 ENTRY(FUNC); \
53 .type FUNC,@function; \ 54 movq r1,r2; \
54 .align 8; \
55FUNC: movq r1,r2; \
56 movq r3,r4; \ 55 movq r3,r4; \
57 leaq KEY+48(r8),r9; \ 56 leaq KEY+48(r8),r9; \
58 movq r10,r11; \ 57 movq r10,r11; \
@@ -71,14 +70,15 @@ FUNC: movq r1,r2; \
71 je B192; \ 70 je B192; \
72 leaq 32(r9),r9; 71 leaq 32(r9),r9;
73 72
74#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \ 73#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \
75 movq r1,r2; \ 74 movq r1,r2; \
76 movq r3,r4; \ 75 movq r3,r4; \
77 movl r5 ## E,(r9); \ 76 movl r5 ## E,(r9); \
78 movl r6 ## E,4(r9); \ 77 movl r6 ## E,4(r9); \
79 movl r7 ## E,8(r9); \ 78 movl r7 ## E,8(r9); \
80 movl r8 ## E,12(r9); \ 79 movl r8 ## E,12(r9); \
81 ret; 80 ret; \
81 ENDPROC(FUNC);
82 82
83#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ 83#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
84 movzbl r2 ## H,r5 ## E; \ 84 movzbl r2 ## H,r5 ## E; \
@@ -133,7 +133,7 @@ FUNC: movq r1,r2; \
133#define entry(FUNC,KEY,B128,B192) \ 133#define entry(FUNC,KEY,B128,B192) \
134 prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) 134 prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
135 135
136#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11) 136#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11)
137 137
138#define encrypt_round(TAB,OFFSET) \ 138#define encrypt_round(TAB,OFFSET) \
139 round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ 139 round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
@@ -151,12 +151,12 @@ FUNC: movq r1,r2; \
151 151
152/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ 152/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
153 153
154 entry(aes_enc_blk,0,enc128,enc192) 154 entry(aes_enc_blk,0,.Le128,.Le192)
155 encrypt_round(crypto_ft_tab,-96) 155 encrypt_round(crypto_ft_tab,-96)
156 encrypt_round(crypto_ft_tab,-80) 156 encrypt_round(crypto_ft_tab,-80)
157enc192: encrypt_round(crypto_ft_tab,-64) 157.Le192: encrypt_round(crypto_ft_tab,-64)
158 encrypt_round(crypto_ft_tab,-48) 158 encrypt_round(crypto_ft_tab,-48)
159enc128: encrypt_round(crypto_ft_tab,-32) 159.Le128: encrypt_round(crypto_ft_tab,-32)
160 encrypt_round(crypto_ft_tab,-16) 160 encrypt_round(crypto_ft_tab,-16)
161 encrypt_round(crypto_ft_tab, 0) 161 encrypt_round(crypto_ft_tab, 0)
162 encrypt_round(crypto_ft_tab, 16) 162 encrypt_round(crypto_ft_tab, 16)
@@ -166,16 +166,16 @@ enc128: encrypt_round(crypto_ft_tab,-32)
166 encrypt_round(crypto_ft_tab, 80) 166 encrypt_round(crypto_ft_tab, 80)
167 encrypt_round(crypto_ft_tab, 96) 167 encrypt_round(crypto_ft_tab, 96)
168 encrypt_final(crypto_fl_tab,112) 168 encrypt_final(crypto_fl_tab,112)
169 return 169 return(aes_enc_blk)
170 170
171/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ 171/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
172 172
173 entry(aes_dec_blk,240,dec128,dec192) 173 entry(aes_dec_blk,240,.Ld128,.Ld192)
174 decrypt_round(crypto_it_tab,-96) 174 decrypt_round(crypto_it_tab,-96)
175 decrypt_round(crypto_it_tab,-80) 175 decrypt_round(crypto_it_tab,-80)
176dec192: decrypt_round(crypto_it_tab,-64) 176.Ld192: decrypt_round(crypto_it_tab,-64)
177 decrypt_round(crypto_it_tab,-48) 177 decrypt_round(crypto_it_tab,-48)
178dec128: decrypt_round(crypto_it_tab,-32) 178.Ld128: decrypt_round(crypto_it_tab,-32)
179 decrypt_round(crypto_it_tab,-16) 179 decrypt_round(crypto_it_tab,-16)
180 decrypt_round(crypto_it_tab, 0) 180 decrypt_round(crypto_it_tab, 0)
181 decrypt_round(crypto_it_tab, 16) 181 decrypt_round(crypto_it_tab, 16)
@@ -185,4 +185,4 @@ dec128: decrypt_round(crypto_it_tab,-32)
185 decrypt_round(crypto_it_tab, 80) 185 decrypt_round(crypto_it_tab, 80)
186 decrypt_round(crypto_it_tab, 96) 186 decrypt_round(crypto_it_tab, 96)
187 decrypt_final(crypto_il_tab,112) 187 decrypt_final(crypto_il_tab,112)
188 return 188 return(aes_dec_blk)
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 3470624d7835..04b797767b9e 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -1262,7 +1262,6 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
1262* poly = x^128 + x^127 + x^126 + x^121 + 1 1262* poly = x^128 + x^127 + x^126 + x^121 + 1
1263* 1263*
1264*****************************************************************************/ 1264*****************************************************************************/
1265
1266ENTRY(aesni_gcm_dec) 1265ENTRY(aesni_gcm_dec)
1267 push %r12 1266 push %r12
1268 push %r13 1267 push %r13
@@ -1437,6 +1436,7 @@ _return_T_done_decrypt:
1437 pop %r13 1436 pop %r13
1438 pop %r12 1437 pop %r12
1439 ret 1438 ret
1439ENDPROC(aesni_gcm_dec)
1440 1440
1441 1441
1442/***************************************************************************** 1442/*****************************************************************************
@@ -1700,10 +1700,12 @@ _return_T_done_encrypt:
1700 pop %r13 1700 pop %r13
1701 pop %r12 1701 pop %r12
1702 ret 1702 ret
1703ENDPROC(aesni_gcm_enc)
1703 1704
1704#endif 1705#endif
1705 1706
1706 1707
1708.align 4
1707_key_expansion_128: 1709_key_expansion_128:
1708_key_expansion_256a: 1710_key_expansion_256a:
1709 pshufd $0b11111111, %xmm1, %xmm1 1711 pshufd $0b11111111, %xmm1, %xmm1
@@ -1715,6 +1717,8 @@ _key_expansion_256a:
1715 movaps %xmm0, (TKEYP) 1717 movaps %xmm0, (TKEYP)
1716 add $0x10, TKEYP 1718 add $0x10, TKEYP
1717 ret 1719 ret
1720ENDPROC(_key_expansion_128)
1721ENDPROC(_key_expansion_256a)
1718 1722
1719.align 4 1723.align 4
1720_key_expansion_192a: 1724_key_expansion_192a:
@@ -1739,6 +1743,7 @@ _key_expansion_192a:
1739 movaps %xmm1, 0x10(TKEYP) 1743 movaps %xmm1, 0x10(TKEYP)
1740 add $0x20, TKEYP 1744 add $0x20, TKEYP
1741 ret 1745 ret
1746ENDPROC(_key_expansion_192a)
1742 1747
1743.align 4 1748.align 4
1744_key_expansion_192b: 1749_key_expansion_192b:
@@ -1758,6 +1763,7 @@ _key_expansion_192b:
1758 movaps %xmm0, (TKEYP) 1763 movaps %xmm0, (TKEYP)
1759 add $0x10, TKEYP 1764 add $0x10, TKEYP
1760 ret 1765 ret
1766ENDPROC(_key_expansion_192b)
1761 1767
1762.align 4 1768.align 4
1763_key_expansion_256b: 1769_key_expansion_256b:
@@ -1770,6 +1776,7 @@ _key_expansion_256b:
1770 movaps %xmm2, (TKEYP) 1776 movaps %xmm2, (TKEYP)
1771 add $0x10, TKEYP 1777 add $0x10, TKEYP
1772 ret 1778 ret
1779ENDPROC(_key_expansion_256b)
1773 1780
1774/* 1781/*
1775 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, 1782 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
@@ -1882,6 +1889,7 @@ ENTRY(aesni_set_key)
1882 popl KEYP 1889 popl KEYP
1883#endif 1890#endif
1884 ret 1891 ret
1892ENDPROC(aesni_set_key)
1885 1893
1886/* 1894/*
1887 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 1895 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
@@ -1903,6 +1911,7 @@ ENTRY(aesni_enc)
1903 popl KEYP 1911 popl KEYP
1904#endif 1912#endif
1905 ret 1913 ret
1914ENDPROC(aesni_enc)
1906 1915
1907/* 1916/*
1908 * _aesni_enc1: internal ABI 1917 * _aesni_enc1: internal ABI
@@ -1960,6 +1969,7 @@ _aesni_enc1:
1960 movaps 0x70(TKEYP), KEY 1969 movaps 0x70(TKEYP), KEY
1961 AESENCLAST KEY STATE 1970 AESENCLAST KEY STATE
1962 ret 1971 ret
1972ENDPROC(_aesni_enc1)
1963 1973
1964/* 1974/*
1965 * _aesni_enc4: internal ABI 1975 * _aesni_enc4: internal ABI
@@ -2068,6 +2078,7 @@ _aesni_enc4:
2068 AESENCLAST KEY STATE3 2078 AESENCLAST KEY STATE3
2069 AESENCLAST KEY STATE4 2079 AESENCLAST KEY STATE4
2070 ret 2080 ret
2081ENDPROC(_aesni_enc4)
2071 2082
2072/* 2083/*
2073 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 2084 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
@@ -2090,6 +2101,7 @@ ENTRY(aesni_dec)
2090 popl KEYP 2101 popl KEYP
2091#endif 2102#endif
2092 ret 2103 ret
2104ENDPROC(aesni_dec)
2093 2105
2094/* 2106/*
2095 * _aesni_dec1: internal ABI 2107 * _aesni_dec1: internal ABI
@@ -2147,6 +2159,7 @@ _aesni_dec1:
2147 movaps 0x70(TKEYP), KEY 2159 movaps 0x70(TKEYP), KEY
2148 AESDECLAST KEY STATE 2160 AESDECLAST KEY STATE
2149 ret 2161 ret
2162ENDPROC(_aesni_dec1)
2150 2163
2151/* 2164/*
2152 * _aesni_dec4: internal ABI 2165 * _aesni_dec4: internal ABI
@@ -2255,6 +2268,7 @@ _aesni_dec4:
2255 AESDECLAST KEY STATE3 2268 AESDECLAST KEY STATE3
2256 AESDECLAST KEY STATE4 2269 AESDECLAST KEY STATE4
2257 ret 2270 ret
2271ENDPROC(_aesni_dec4)
2258 2272
2259/* 2273/*
2260 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 2274 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2312,6 +2326,7 @@ ENTRY(aesni_ecb_enc)
2312 popl LEN 2326 popl LEN
2313#endif 2327#endif
2314 ret 2328 ret
2329ENDPROC(aesni_ecb_enc)
2315 2330
2316/* 2331/*
2317 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 2332 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2370,6 +2385,7 @@ ENTRY(aesni_ecb_dec)
2370 popl LEN 2385 popl LEN
2371#endif 2386#endif
2372 ret 2387 ret
2388ENDPROC(aesni_ecb_dec)
2373 2389
2374/* 2390/*
2375 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 2391 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2411,6 +2427,7 @@ ENTRY(aesni_cbc_enc)
2411 popl IVP 2427 popl IVP
2412#endif 2428#endif
2413 ret 2429 ret
2430ENDPROC(aesni_cbc_enc)
2414 2431
2415/* 2432/*
2416 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 2433 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2501,6 +2518,7 @@ ENTRY(aesni_cbc_dec)
2501 popl IVP 2518 popl IVP
2502#endif 2519#endif
2503 ret 2520 ret
2521ENDPROC(aesni_cbc_dec)
2504 2522
2505#ifdef __x86_64__ 2523#ifdef __x86_64__
2506.align 16 2524.align 16
@@ -2527,6 +2545,7 @@ _aesni_inc_init:
2527 MOVQ_R64_XMM TCTR_LOW INC 2545 MOVQ_R64_XMM TCTR_LOW INC
2528 MOVQ_R64_XMM CTR TCTR_LOW 2546 MOVQ_R64_XMM CTR TCTR_LOW
2529 ret 2547 ret
2548ENDPROC(_aesni_inc_init)
2530 2549
2531/* 2550/*
2532 * _aesni_inc: internal ABI 2551 * _aesni_inc: internal ABI
@@ -2555,6 +2574,7 @@ _aesni_inc:
2555 movaps CTR, IV 2574 movaps CTR, IV
2556 PSHUFB_XMM BSWAP_MASK IV 2575 PSHUFB_XMM BSWAP_MASK IV
2557 ret 2576 ret
2577ENDPROC(_aesni_inc)
2558 2578
2559/* 2579/*
2560 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 2580 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2615,4 +2635,5 @@ ENTRY(aesni_ctr_enc)
2615 movups IV, (IVP) 2635 movups IV, (IVP)
2616.Lctr_enc_just_ret: 2636.Lctr_enc_just_ret:
2617 ret 2637 ret
2638ENDPROC(aesni_ctr_enc)
2618#endif 2639#endif
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
index 391d245dc086..246c67006ed0 100644
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -20,6 +20,8 @@
20 * 20 *
21 */ 21 */
22 22
23#include <linux/linkage.h>
24
23.file "blowfish-x86_64-asm.S" 25.file "blowfish-x86_64-asm.S"
24.text 26.text
25 27
@@ -116,11 +118,7 @@
116 bswapq RX0; \ 118 bswapq RX0; \
117 xorq RX0, (RIO); 119 xorq RX0, (RIO);
118 120
119.align 8 121ENTRY(__blowfish_enc_blk)
120.global __blowfish_enc_blk
121.type __blowfish_enc_blk,@function;
122
123__blowfish_enc_blk:
124 /* input: 122 /* input:
125 * %rdi: ctx, CTX 123 * %rdi: ctx, CTX
126 * %rsi: dst 124 * %rsi: dst
@@ -148,19 +146,16 @@ __blowfish_enc_blk:
148 146
149 movq %r10, RIO; 147 movq %r10, RIO;
150 test %cl, %cl; 148 test %cl, %cl;
151 jnz __enc_xor; 149 jnz .L__enc_xor;
152 150
153 write_block(); 151 write_block();
154 ret; 152 ret;
155__enc_xor: 153.L__enc_xor:
156 xor_block(); 154 xor_block();
157 ret; 155 ret;
156ENDPROC(__blowfish_enc_blk)
158 157
159.align 8 158ENTRY(blowfish_dec_blk)
160.global blowfish_dec_blk
161.type blowfish_dec_blk,@function;
162
163blowfish_dec_blk:
164 /* input: 159 /* input:
165 * %rdi: ctx, CTX 160 * %rdi: ctx, CTX
166 * %rsi: dst 161 * %rsi: dst
@@ -189,6 +184,7 @@ blowfish_dec_blk:
189 movq %r11, %rbp; 184 movq %r11, %rbp;
190 185
191 ret; 186 ret;
187ENDPROC(blowfish_dec_blk)
192 188
193/********************************************************************** 189/**********************************************************************
194 4-way blowfish, four blocks parallel 190 4-way blowfish, four blocks parallel
@@ -300,11 +296,7 @@ blowfish_dec_blk:
300 bswapq RX3; \ 296 bswapq RX3; \
301 xorq RX3, 24(RIO); 297 xorq RX3, 24(RIO);
302 298
303.align 8 299ENTRY(__blowfish_enc_blk_4way)
304.global __blowfish_enc_blk_4way
305.type __blowfish_enc_blk_4way,@function;
306
307__blowfish_enc_blk_4way:
308 /* input: 300 /* input:
309 * %rdi: ctx, CTX 301 * %rdi: ctx, CTX
310 * %rsi: dst 302 * %rsi: dst
@@ -336,7 +328,7 @@ __blowfish_enc_blk_4way:
336 movq %r11, RIO; 328 movq %r11, RIO;
337 329
338 test %bpl, %bpl; 330 test %bpl, %bpl;
339 jnz __enc_xor4; 331 jnz .L__enc_xor4;
340 332
341 write_block4(); 333 write_block4();
342 334
@@ -344,18 +336,15 @@ __blowfish_enc_blk_4way:
344 popq %rbp; 336 popq %rbp;
345 ret; 337 ret;
346 338
347__enc_xor4: 339.L__enc_xor4:
348 xor_block4(); 340 xor_block4();
349 341
350 popq %rbx; 342 popq %rbx;
351 popq %rbp; 343 popq %rbp;
352 ret; 344 ret;
345ENDPROC(__blowfish_enc_blk_4way)
353 346
354.align 8 347ENTRY(blowfish_dec_blk_4way)
355.global blowfish_dec_blk_4way
356.type blowfish_dec_blk_4way,@function;
357
358blowfish_dec_blk_4way:
359 /* input: 348 /* input:
360 * %rdi: ctx, CTX 349 * %rdi: ctx, CTX
361 * %rsi: dst 350 * %rsi: dst
@@ -387,4 +376,4 @@ blowfish_dec_blk_4way:
387 popq %rbp; 376 popq %rbp;
388 377
389 ret; 378 ret;
390 379ENDPROC(blowfish_dec_blk_4way)
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index 2306d2e4816f..cfc163469c71 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -15,6 +15,8 @@
15 * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz 15 * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz
16 */ 16 */
17 17
18#include <linux/linkage.h>
19
18#define CAMELLIA_TABLE_BYTE_LEN 272 20#define CAMELLIA_TABLE_BYTE_LEN 272
19 21
20/* struct camellia_ctx: */ 22/* struct camellia_ctx: */
@@ -190,6 +192,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
190 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, 192 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
191 %rcx, (%r9)); 193 %rcx, (%r9));
192 ret; 194 ret;
195ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
193 196
194.align 8 197.align 8
195roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: 198roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
@@ -197,6 +200,7 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
197 %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, 200 %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
198 %rax, (%r9)); 201 %rax, (%r9));
199 ret; 202 ret;
203ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
200 204
201/* 205/*
202 * IN/OUT: 206 * IN/OUT:
@@ -709,8 +713,6 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
709.text 713.text
710 714
711.align 8 715.align 8
712.type __camellia_enc_blk16,@function;
713
714__camellia_enc_blk16: 716__camellia_enc_blk16:
715 /* input: 717 /* input:
716 * %rdi: ctx, CTX 718 * %rdi: ctx, CTX
@@ -793,10 +795,9 @@ __camellia_enc_blk16:
793 %xmm15, %rax, %rcx, 24); 795 %xmm15, %rax, %rcx, 24);
794 796
795 jmp .Lenc_done; 797 jmp .Lenc_done;
798ENDPROC(__camellia_enc_blk16)
796 799
797.align 8 800.align 8
798.type __camellia_dec_blk16,@function;
799
800__camellia_dec_blk16: 801__camellia_dec_blk16:
801 /* input: 802 /* input:
802 * %rdi: ctx, CTX 803 * %rdi: ctx, CTX
@@ -877,12 +878,9 @@ __camellia_dec_blk16:
877 ((key_table + (24) * 8) + 4)(CTX)); 878 ((key_table + (24) * 8) + 4)(CTX));
878 879
879 jmp .Ldec_max24; 880 jmp .Ldec_max24;
881ENDPROC(__camellia_dec_blk16)
880 882
881.align 8 883ENTRY(camellia_ecb_enc_16way)
882.global camellia_ecb_enc_16way
883.type camellia_ecb_enc_16way,@function;
884
885camellia_ecb_enc_16way:
886 /* input: 884 /* input:
887 * %rdi: ctx, CTX 885 * %rdi: ctx, CTX
888 * %rsi: dst (16 blocks) 886 * %rsi: dst (16 blocks)
@@ -903,12 +901,9 @@ camellia_ecb_enc_16way:
903 %xmm8, %rsi); 901 %xmm8, %rsi);
904 902
905 ret; 903 ret;
904ENDPROC(camellia_ecb_enc_16way)
906 905
907.align 8 906ENTRY(camellia_ecb_dec_16way)
908.global camellia_ecb_dec_16way
909.type camellia_ecb_dec_16way,@function;
910
911camellia_ecb_dec_16way:
912 /* input: 907 /* input:
913 * %rdi: ctx, CTX 908 * %rdi: ctx, CTX
914 * %rsi: dst (16 blocks) 909 * %rsi: dst (16 blocks)
@@ -934,12 +929,9 @@ camellia_ecb_dec_16way:
934 %xmm8, %rsi); 929 %xmm8, %rsi);
935 930
936 ret; 931 ret;
932ENDPROC(camellia_ecb_dec_16way)
937 933
938.align 8 934ENTRY(camellia_cbc_dec_16way)
939.global camellia_cbc_dec_16way
940.type camellia_cbc_dec_16way,@function;
941
942camellia_cbc_dec_16way:
943 /* input: 935 /* input:
944 * %rdi: ctx, CTX 936 * %rdi: ctx, CTX
945 * %rsi: dst (16 blocks) 937 * %rsi: dst (16 blocks)
@@ -986,6 +978,7 @@ camellia_cbc_dec_16way:
986 %xmm8, %rsi); 978 %xmm8, %rsi);
987 979
988 ret; 980 ret;
981ENDPROC(camellia_cbc_dec_16way)
989 982
990#define inc_le128(x, minus_one, tmp) \ 983#define inc_le128(x, minus_one, tmp) \
991 vpcmpeqq minus_one, x, tmp; \ 984 vpcmpeqq minus_one, x, tmp; \
@@ -993,11 +986,7 @@ camellia_cbc_dec_16way:
993 vpslldq $8, tmp, tmp; \ 986 vpslldq $8, tmp, tmp; \
994 vpsubq tmp, x, x; 987 vpsubq tmp, x, x;
995 988
996.align 8 989ENTRY(camellia_ctr_16way)
997.global camellia_ctr_16way
998.type camellia_ctr_16way,@function;
999
1000camellia_ctr_16way:
1001 /* input: 990 /* input:
1002 * %rdi: ctx, CTX 991 * %rdi: ctx, CTX
1003 * %rsi: dst (16 blocks) 992 * %rsi: dst (16 blocks)
@@ -1100,3 +1089,4 @@ camellia_ctr_16way:
1100 %xmm8, %rsi); 1089 %xmm8, %rsi);
1101 1090
1102 ret; 1091 ret;
1092ENDPROC(camellia_ctr_16way)
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
index 0b3374335fdc..310319c601ed 100644
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -20,6 +20,8 @@
20 * 20 *
21 */ 21 */
22 22
23#include <linux/linkage.h>
24
23.file "camellia-x86_64-asm_64.S" 25.file "camellia-x86_64-asm_64.S"
24.text 26.text
25 27
@@ -188,10 +190,7 @@
188 bswapq RAB0; \ 190 bswapq RAB0; \
189 movq RAB0, 4*2(RIO); 191 movq RAB0, 4*2(RIO);
190 192
191.global __camellia_enc_blk; 193ENTRY(__camellia_enc_blk)
192.type __camellia_enc_blk,@function;
193
194__camellia_enc_blk:
195 /* input: 194 /* input:
196 * %rdi: ctx, CTX 195 * %rdi: ctx, CTX
197 * %rsi: dst 196 * %rsi: dst
@@ -214,33 +213,31 @@ __camellia_enc_blk:
214 movl $24, RT1d; /* max */ 213 movl $24, RT1d; /* max */
215 214
216 cmpb $16, key_length(CTX); 215 cmpb $16, key_length(CTX);
217 je __enc_done; 216 je .L__enc_done;
218 217
219 enc_fls(24); 218 enc_fls(24);
220 enc_rounds(24); 219 enc_rounds(24);
221 movl $32, RT1d; /* max */ 220 movl $32, RT1d; /* max */
222 221
223__enc_done: 222.L__enc_done:
224 testb RXORbl, RXORbl; 223 testb RXORbl, RXORbl;
225 movq RDST, RIO; 224 movq RDST, RIO;
226 225
227 jnz __enc_xor; 226 jnz .L__enc_xor;
228 227
229 enc_outunpack(mov, RT1); 228 enc_outunpack(mov, RT1);
230 229
231 movq RRBP, %rbp; 230 movq RRBP, %rbp;
232 ret; 231 ret;
233 232
234__enc_xor: 233.L__enc_xor:
235 enc_outunpack(xor, RT1); 234 enc_outunpack(xor, RT1);
236 235
237 movq RRBP, %rbp; 236 movq RRBP, %rbp;
238 ret; 237 ret;
238ENDPROC(__camellia_enc_blk)
239 239
240.global camellia_dec_blk; 240ENTRY(camellia_dec_blk)
241.type camellia_dec_blk,@function;
242
243camellia_dec_blk:
244 /* input: 241 /* input:
245 * %rdi: ctx, CTX 242 * %rdi: ctx, CTX
246 * %rsi: dst 243 * %rsi: dst
@@ -258,12 +255,12 @@ camellia_dec_blk:
258 dec_inpack(RT2); 255 dec_inpack(RT2);
259 256
260 cmpb $24, RT2bl; 257 cmpb $24, RT2bl;
261 je __dec_rounds16; 258 je .L__dec_rounds16;
262 259
263 dec_rounds(24); 260 dec_rounds(24);
264 dec_fls(24); 261 dec_fls(24);
265 262
266__dec_rounds16: 263.L__dec_rounds16:
267 dec_rounds(16); 264 dec_rounds(16);
268 dec_fls(16); 265 dec_fls(16);
269 dec_rounds(8); 266 dec_rounds(8);
@@ -276,6 +273,7 @@ __dec_rounds16:
276 273
277 movq RRBP, %rbp; 274 movq RRBP, %rbp;
278 ret; 275 ret;
276ENDPROC(camellia_dec_blk)
279 277
280/********************************************************************** 278/**********************************************************************
281 2-way camellia 279 2-way camellia
@@ -426,10 +424,7 @@ __dec_rounds16:
426 bswapq RAB1; \ 424 bswapq RAB1; \
427 movq RAB1, 12*2(RIO); 425 movq RAB1, 12*2(RIO);
428 426
429.global __camellia_enc_blk_2way; 427ENTRY(__camellia_enc_blk_2way)
430.type __camellia_enc_blk_2way,@function;
431
432__camellia_enc_blk_2way:
433 /* input: 428 /* input:
434 * %rdi: ctx, CTX 429 * %rdi: ctx, CTX
435 * %rsi: dst 430 * %rsi: dst
@@ -453,16 +448,16 @@ __camellia_enc_blk_2way:
453 movl $24, RT2d; /* max */ 448 movl $24, RT2d; /* max */
454 449
455 cmpb $16, key_length(CTX); 450 cmpb $16, key_length(CTX);
456 je __enc2_done; 451 je .L__enc2_done;
457 452
458 enc_fls2(24); 453 enc_fls2(24);
459 enc_rounds2(24); 454 enc_rounds2(24);
460 movl $32, RT2d; /* max */ 455 movl $32, RT2d; /* max */
461 456
462__enc2_done: 457.L__enc2_done:
463 test RXORbl, RXORbl; 458 test RXORbl, RXORbl;
464 movq RDST, RIO; 459 movq RDST, RIO;
465 jnz __enc2_xor; 460 jnz .L__enc2_xor;
466 461
467 enc_outunpack2(mov, RT2); 462 enc_outunpack2(mov, RT2);
468 463
@@ -470,17 +465,15 @@ __enc2_done:
470 popq %rbx; 465 popq %rbx;
471 ret; 466 ret;
472 467
473__enc2_xor: 468.L__enc2_xor:
474 enc_outunpack2(xor, RT2); 469 enc_outunpack2(xor, RT2);
475 470
476 movq RRBP, %rbp; 471 movq RRBP, %rbp;
477 popq %rbx; 472 popq %rbx;
478 ret; 473 ret;
474ENDPROC(__camellia_enc_blk_2way)
479 475
480.global camellia_dec_blk_2way; 476ENTRY(camellia_dec_blk_2way)
481.type camellia_dec_blk_2way,@function;
482
483camellia_dec_blk_2way:
484 /* input: 477 /* input:
485 * %rdi: ctx, CTX 478 * %rdi: ctx, CTX
486 * %rsi: dst 479 * %rsi: dst
@@ -499,12 +492,12 @@ camellia_dec_blk_2way:
499 dec_inpack2(RT2); 492 dec_inpack2(RT2);
500 493
501 cmpb $24, RT2bl; 494 cmpb $24, RT2bl;
502 je __dec2_rounds16; 495 je .L__dec2_rounds16;
503 496
504 dec_rounds2(24); 497 dec_rounds2(24);
505 dec_fls2(24); 498 dec_fls2(24);
506 499
507__dec2_rounds16: 500.L__dec2_rounds16:
508 dec_rounds2(16); 501 dec_rounds2(16);
509 dec_fls2(16); 502 dec_fls2(16);
510 dec_rounds2(8); 503 dec_rounds2(8);
@@ -518,3 +511,4 @@ __dec2_rounds16:
518 movq RRBP, %rbp; 511 movq RRBP, %rbp;
519 movq RXOR, %rbx; 512 movq RXOR, %rbx;
520 ret; 513 ret;
514ENDPROC(camellia_dec_blk_2way)
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index 15b00ac7cbd3..c35fd5d6ecd2 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -23,6 +23,8 @@
23 * 23 *
24 */ 24 */
25 25
26#include <linux/linkage.h>
27
26.file "cast5-avx-x86_64-asm_64.S" 28.file "cast5-avx-x86_64-asm_64.S"
27 29
28.extern cast_s1 30.extern cast_s1
@@ -211,8 +213,6 @@
211.text 213.text
212 214
213.align 16 215.align 16
214.type __cast5_enc_blk16,@function;
215
216__cast5_enc_blk16: 216__cast5_enc_blk16:
217 /* input: 217 /* input:
218 * %rdi: ctx, CTX 218 * %rdi: ctx, CTX
@@ -263,14 +263,14 @@ __cast5_enc_blk16:
263 263
264 movzbl rr(CTX), %eax; 264 movzbl rr(CTX), %eax;
265 testl %eax, %eax; 265 testl %eax, %eax;
266 jnz __skip_enc; 266 jnz .L__skip_enc;
267 267
268 round(RL, RR, 12, 1); 268 round(RL, RR, 12, 1);
269 round(RR, RL, 13, 2); 269 round(RR, RL, 13, 2);
270 round(RL, RR, 14, 3); 270 round(RL, RR, 14, 3);
271 round(RR, RL, 15, 1); 271 round(RR, RL, 15, 1);
272 272
273__skip_enc: 273.L__skip_enc:
274 popq %rbx; 274 popq %rbx;
275 popq %rbp; 275 popq %rbp;
276 276
@@ -282,10 +282,9 @@ __skip_enc:
282 outunpack_blocks(RR4, RL4, RTMP, RX, RKM); 282 outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
283 283
284 ret; 284 ret;
285ENDPROC(__cast5_enc_blk16)
285 286
286.align 16 287.align 16
287.type __cast5_dec_blk16,@function;
288
289__cast5_dec_blk16: 288__cast5_dec_blk16:
290 /* input: 289 /* input:
291 * %rdi: ctx, CTX 290 * %rdi: ctx, CTX
@@ -323,14 +322,14 @@ __cast5_dec_blk16:
323 322
324 movzbl rr(CTX), %eax; 323 movzbl rr(CTX), %eax;
325 testl %eax, %eax; 324 testl %eax, %eax;
326 jnz __skip_dec; 325 jnz .L__skip_dec;
327 326
328 round(RL, RR, 15, 1); 327 round(RL, RR, 15, 1);
329 round(RR, RL, 14, 3); 328 round(RR, RL, 14, 3);
330 round(RL, RR, 13, 2); 329 round(RL, RR, 13, 2);
331 round(RR, RL, 12, 1); 330 round(RR, RL, 12, 1);
332 331
333__dec_tail: 332.L__dec_tail:
334 round(RL, RR, 11, 3); 333 round(RL, RR, 11, 3);
335 round(RR, RL, 10, 2); 334 round(RR, RL, 10, 2);
336 round(RL, RR, 9, 1); 335 round(RL, RR, 9, 1);
@@ -355,15 +354,12 @@ __dec_tail:
355 354
356 ret; 355 ret;
357 356
358__skip_dec: 357.L__skip_dec:
359 vpsrldq $4, RKR, RKR; 358 vpsrldq $4, RKR, RKR;
360 jmp __dec_tail; 359 jmp .L__dec_tail;
360ENDPROC(__cast5_dec_blk16)
361 361
362.align 16 362ENTRY(cast5_ecb_enc_16way)
363.global cast5_ecb_enc_16way
364.type cast5_ecb_enc_16way,@function;
365
366cast5_ecb_enc_16way:
367 /* input: 363 /* input:
368 * %rdi: ctx, CTX 364 * %rdi: ctx, CTX
369 * %rsi: dst 365 * %rsi: dst
@@ -393,12 +389,9 @@ cast5_ecb_enc_16way:
393 vmovdqu RL4, (7*4*4)(%r11); 389 vmovdqu RL4, (7*4*4)(%r11);
394 390
395 ret; 391 ret;
392ENDPROC(cast5_ecb_enc_16way)
396 393
397.align 16 394ENTRY(cast5_ecb_dec_16way)
398.global cast5_ecb_dec_16way
399.type cast5_ecb_dec_16way,@function;
400
401cast5_ecb_dec_16way:
402 /* input: 395 /* input:
403 * %rdi: ctx, CTX 396 * %rdi: ctx, CTX
404 * %rsi: dst 397 * %rsi: dst
@@ -428,12 +421,9 @@ cast5_ecb_dec_16way:
428 vmovdqu RL4, (7*4*4)(%r11); 421 vmovdqu RL4, (7*4*4)(%r11);
429 422
430 ret; 423 ret;
424ENDPROC(cast5_ecb_dec_16way)
431 425
432.align 16 426ENTRY(cast5_cbc_dec_16way)
433.global cast5_cbc_dec_16way
434.type cast5_cbc_dec_16way,@function;
435
436cast5_cbc_dec_16way:
437 /* input: 427 /* input:
438 * %rdi: ctx, CTX 428 * %rdi: ctx, CTX
439 * %rsi: dst 429 * %rsi: dst
@@ -480,12 +470,9 @@ cast5_cbc_dec_16way:
480 popq %r12; 470 popq %r12;
481 471
482 ret; 472 ret;
473ENDPROC(cast5_cbc_dec_16way)
483 474
484.align 16 475ENTRY(cast5_ctr_16way)
485.global cast5_ctr_16way
486.type cast5_ctr_16way,@function;
487
488cast5_ctr_16way:
489 /* input: 476 /* input:
490 * %rdi: ctx, CTX 477 * %rdi: ctx, CTX
491 * %rsi: dst 478 * %rsi: dst
@@ -556,3 +543,4 @@ cast5_ctr_16way:
556 popq %r12; 543 popq %r12;
557 544
558 ret; 545 ret;
546ENDPROC(cast5_ctr_16way)
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 2569d0da841f..f93b6105a0ce 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -23,6 +23,7 @@
23 * 23 *
24 */ 24 */
25 25
26#include <linux/linkage.h>
26#include "glue_helper-asm-avx.S" 27#include "glue_helper-asm-avx.S"
27 28
28.file "cast6-avx-x86_64-asm_64.S" 29.file "cast6-avx-x86_64-asm_64.S"
@@ -250,8 +251,6 @@
250.text 251.text
251 252
252.align 8 253.align 8
253.type __cast6_enc_blk8,@function;
254
255__cast6_enc_blk8: 254__cast6_enc_blk8:
256 /* input: 255 /* input:
257 * %rdi: ctx, CTX 256 * %rdi: ctx, CTX
@@ -295,10 +294,9 @@ __cast6_enc_blk8:
295 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); 294 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
296 295
297 ret; 296 ret;
297ENDPROC(__cast6_enc_blk8)
298 298
299.align 8 299.align 8
300.type __cast6_dec_blk8,@function;
301
302__cast6_dec_blk8: 300__cast6_dec_blk8:
303 /* input: 301 /* input:
304 * %rdi: ctx, CTX 302 * %rdi: ctx, CTX
@@ -341,12 +339,9 @@ __cast6_dec_blk8:
341 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); 339 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
342 340
343 ret; 341 ret;
342ENDPROC(__cast6_dec_blk8)
344 343
345.align 8 344ENTRY(cast6_ecb_enc_8way)
346.global cast6_ecb_enc_8way
347.type cast6_ecb_enc_8way,@function;
348
349cast6_ecb_enc_8way:
350 /* input: 345 /* input:
351 * %rdi: ctx, CTX 346 * %rdi: ctx, CTX
352 * %rsi: dst 347 * %rsi: dst
@@ -362,12 +357,9 @@ cast6_ecb_enc_8way:
362 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 357 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
363 358
364 ret; 359 ret;
360ENDPROC(cast6_ecb_enc_8way)
365 361
366.align 8 362ENTRY(cast6_ecb_dec_8way)
367.global cast6_ecb_dec_8way
368.type cast6_ecb_dec_8way,@function;
369
370cast6_ecb_dec_8way:
371 /* input: 363 /* input:
372 * %rdi: ctx, CTX 364 * %rdi: ctx, CTX
373 * %rsi: dst 365 * %rsi: dst
@@ -383,12 +375,9 @@ cast6_ecb_dec_8way:
383 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 375 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
384 376
385 ret; 377 ret;
378ENDPROC(cast6_ecb_dec_8way)
386 379
387.align 8 380ENTRY(cast6_cbc_dec_8way)
388.global cast6_cbc_dec_8way
389.type cast6_cbc_dec_8way,@function;
390
391cast6_cbc_dec_8way:
392 /* input: 381 /* input:
393 * %rdi: ctx, CTX 382 * %rdi: ctx, CTX
394 * %rsi: dst 383 * %rsi: dst
@@ -409,12 +398,9 @@ cast6_cbc_dec_8way:
409 popq %r12; 398 popq %r12;
410 399
411 ret; 400 ret;
401ENDPROC(cast6_cbc_dec_8way)
412 402
413.align 8 403ENTRY(cast6_ctr_8way)
414.global cast6_ctr_8way
415.type cast6_ctr_8way,@function;
416
417cast6_ctr_8way:
418 /* input: 404 /* input:
419 * %rdi: ctx, CTX 405 * %rdi: ctx, CTX
420 * %rsi: dst 406 * %rsi: dst
@@ -437,3 +423,4 @@ cast6_ctr_8way:
437 popq %r12; 423 popq %r12;
438 424
439 ret; 425 ret;
426ENDPROC(cast6_ctr_8way)
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S
new file mode 100644
index 000000000000..c8335014a044
--- /dev/null
+++ b/arch/x86/crypto/crc32-pclmul_asm.S
@@ -0,0 +1,246 @@
1/* GPL HEADER START
2 *
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License version 2 for more details (a copy is included
13 * in the LICENSE file that accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License
16 * version 2 along with this program; If not, see http://www.gnu.org/licenses
17 *
18 * Please visit http://www.xyratex.com/contact if you need additional
19 * information or have any questions.
20 *
21 * GPL HEADER END
22 */
23
24/*
25 * Copyright 2012 Xyratex Technology Limited
26 *
27 * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
28 * calculation.
29 * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
30 * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
31 * at:
32 * http://www.intel.com/products/processor/manuals/
33 * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
34 * Volume 2B: Instruction Set Reference, N-Z
35 *
36 * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
37 * Alexander Boyko <Alexander_Boyko@xyratex.com>
38 */
39
40#include <linux/linkage.h>
41#include <asm/inst.h>
42
43
44.align 16
45/*
46 * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
47 * #define CONSTANT_R1 0x154442bd4LL
48 *
49 * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
50 * #define CONSTANT_R2 0x1c6e41596LL
51 */
52.Lconstant_R2R1:
53 .octa 0x00000001c6e415960000000154442bd4
54/*
55 * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
56 * #define CONSTANT_R3 0x1751997d0LL
57 *
58 * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
59 * #define CONSTANT_R4 0x0ccaa009eLL
60 */
61.Lconstant_R4R3:
62 .octa 0x00000000ccaa009e00000001751997d0
63/*
64 * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
65 * #define CONSTANT_R5 0x163cd6124LL
66 */
67.Lconstant_R5:
68 .octa 0x00000000000000000000000163cd6124
69.Lconstant_mask32:
70 .octa 0x000000000000000000000000FFFFFFFF
71/*
72 * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
73 *
74 * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
75 * #define CONSTANT_RU 0x1F7011641LL
76 */
77.Lconstant_RUpoly:
78 .octa 0x00000001F701164100000001DB710641
79
80#define CONSTANT %xmm0
81
82#ifdef __x86_64__
83#define BUF %rdi
84#define LEN %rsi
85#define CRC %edx
86#else
87#define BUF %eax
88#define LEN %edx
89#define CRC %ecx
90#endif
91
92
93
94.text
95/**
96 * Calculate crc32
97 * BUF - buffer (16 bytes aligned)
98 * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63
99 * CRC - initial crc32
100 * return %eax crc32
101 * uint crc32_pclmul_le_16(unsigned char const *buffer,
102 * size_t len, uint crc32)
103 */
104.globl crc32_pclmul_le_16
105.align 4, 0x90
106crc32_pclmul_le_16:/* buffer and buffer size are 16 bytes aligned */
107 movdqa (BUF), %xmm1
108 movdqa 0x10(BUF), %xmm2
109 movdqa 0x20(BUF), %xmm3
110 movdqa 0x30(BUF), %xmm4
111 movd CRC, CONSTANT
112 pxor CONSTANT, %xmm1
113 sub $0x40, LEN
114 add $0x40, BUF
115#ifndef __x86_64__
116 /* This is for position independent code(-fPIC) support for 32bit */
117 call delta
118delta:
119 pop %ecx
120#endif
121 cmp $0x40, LEN
122 jb less_64
123
124#ifdef __x86_64__
125 movdqa .Lconstant_R2R1(%rip), CONSTANT
126#else
127 movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT
128#endif
129
130loop_64:/* 64 bytes Full cache line folding */
131 prefetchnta 0x40(BUF)
132 movdqa %xmm1, %xmm5
133 movdqa %xmm2, %xmm6
134 movdqa %xmm3, %xmm7
135#ifdef __x86_64__
136 movdqa %xmm4, %xmm8
137#endif
138 PCLMULQDQ 00, CONSTANT, %xmm1
139 PCLMULQDQ 00, CONSTANT, %xmm2
140 PCLMULQDQ 00, CONSTANT, %xmm3
141#ifdef __x86_64__
142 PCLMULQDQ 00, CONSTANT, %xmm4
143#endif
144 PCLMULQDQ 0x11, CONSTANT, %xmm5
145 PCLMULQDQ 0x11, CONSTANT, %xmm6
146 PCLMULQDQ 0x11, CONSTANT, %xmm7
147#ifdef __x86_64__
148 PCLMULQDQ 0x11, CONSTANT, %xmm8
149#endif
150 pxor %xmm5, %xmm1
151 pxor %xmm6, %xmm2
152 pxor %xmm7, %xmm3
153#ifdef __x86_64__
154 pxor %xmm8, %xmm4
155#else
156 /* xmm8 unsupported for x32 */
157 movdqa %xmm4, %xmm5
158 PCLMULQDQ 00, CONSTANT, %xmm4
159 PCLMULQDQ 0x11, CONSTANT, %xmm5
160 pxor %xmm5, %xmm4
161#endif
162
163 pxor (BUF), %xmm1
164 pxor 0x10(BUF), %xmm2
165 pxor 0x20(BUF), %xmm3
166 pxor 0x30(BUF), %xmm4
167
168 sub $0x40, LEN
169 add $0x40, BUF
170 cmp $0x40, LEN
171 jge loop_64
172less_64:/* Folding cache line into 128bit */
173#ifdef __x86_64__
174 movdqa .Lconstant_R4R3(%rip), CONSTANT
175#else
176 movdqa .Lconstant_R4R3 - delta(%ecx), CONSTANT
177#endif
178 prefetchnta (BUF)
179
180 movdqa %xmm1, %xmm5
181 PCLMULQDQ 0x00, CONSTANT, %xmm1
182 PCLMULQDQ 0x11, CONSTANT, %xmm5
183 pxor %xmm5, %xmm1
184 pxor %xmm2, %xmm1
185
186 movdqa %xmm1, %xmm5
187 PCLMULQDQ 0x00, CONSTANT, %xmm1
188 PCLMULQDQ 0x11, CONSTANT, %xmm5
189 pxor %xmm5, %xmm1
190 pxor %xmm3, %xmm1
191
192 movdqa %xmm1, %xmm5
193 PCLMULQDQ 0x00, CONSTANT, %xmm1
194 PCLMULQDQ 0x11, CONSTANT, %xmm5
195 pxor %xmm5, %xmm1
196 pxor %xmm4, %xmm1
197
198 cmp $0x10, LEN
199 jb fold_64
200loop_16:/* Folding rest buffer into 128bit */
201 movdqa %xmm1, %xmm5
202 PCLMULQDQ 0x00, CONSTANT, %xmm1
203 PCLMULQDQ 0x11, CONSTANT, %xmm5
204 pxor %xmm5, %xmm1
205 pxor (BUF), %xmm1
206 sub $0x10, LEN
207 add $0x10, BUF
208 cmp $0x10, LEN
209 jge loop_16
210
211fold_64:
212 /* perform the last 64 bit fold, also adds 32 zeroes
213 * to the input stream */
214 PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
215 psrldq $0x08, %xmm1
216 pxor CONSTANT, %xmm1
217
218 /* final 32-bit fold */
219 movdqa %xmm1, %xmm2
220#ifdef __x86_64__
221 movdqa .Lconstant_R5(%rip), CONSTANT
222 movdqa .Lconstant_mask32(%rip), %xmm3
223#else
224 movdqa .Lconstant_R5 - delta(%ecx), CONSTANT
225 movdqa .Lconstant_mask32 - delta(%ecx), %xmm3
226#endif
227 psrldq $0x04, %xmm2
228 pand %xmm3, %xmm1
229 PCLMULQDQ 0x00, CONSTANT, %xmm1
230 pxor %xmm2, %xmm1
231
232 /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
233#ifdef __x86_64__
234 movdqa .Lconstant_RUpoly(%rip), CONSTANT
235#else
236 movdqa .Lconstant_RUpoly - delta(%ecx), CONSTANT
237#endif
238 movdqa %xmm1, %xmm2
239 pand %xmm3, %xmm1
240 PCLMULQDQ 0x10, CONSTANT, %xmm1
241 pand %xmm3, %xmm1
242 PCLMULQDQ 0x00, CONSTANT, %xmm1
243 pxor %xmm2, %xmm1
244 pextrd $0x01, %xmm1, %eax
245
246 ret
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
new file mode 100644
index 000000000000..9d014a74ef96
--- /dev/null
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -0,0 +1,201 @@
1/* GPL HEADER START
2 *
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 only,
7 * as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License version 2 for more details (a copy is included
13 * in the LICENSE file that accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License
16 * version 2 along with this program; If not, see http://www.gnu.org/licenses
17 *
18 * Please visit http://www.xyratex.com/contact if you need additional
19 * information or have any questions.
20 *
21 * GPL HEADER END
22 */
23
24/*
25 * Copyright 2012 Xyratex Technology Limited
26 *
27 * Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation.
28 */
29#include <linux/init.h>
30#include <linux/module.h>
31#include <linux/string.h>
32#include <linux/kernel.h>
33#include <linux/crc32.h>
34#include <crypto/internal/hash.h>
35
36#include <asm/cpufeature.h>
37#include <asm/cpu_device_id.h>
38#include <asm/i387.h>
39
40#define CHKSUM_BLOCK_SIZE 1
41#define CHKSUM_DIGEST_SIZE 4
42
43#define PCLMUL_MIN_LEN 64L /* minimum size of buffer
44 * for crc32_pclmul_le_16 */
45#define SCALE_F 16L /* size of xmm register */
46#define SCALE_F_MASK (SCALE_F - 1)
47
48u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32);
49
50static u32 __attribute__((pure))
51 crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len)
52{
53 unsigned int iquotient;
54 unsigned int iremainder;
55 unsigned int prealign;
56
57 if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable())
58 return crc32_le(crc, p, len);
59
60 if ((long)p & SCALE_F_MASK) {
61 /* align p to 16 byte */
62 prealign = SCALE_F - ((long)p & SCALE_F_MASK);
63
64 crc = crc32_le(crc, p, prealign);
65 len -= prealign;
66 p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) &
67 ~SCALE_F_MASK);
68 }
69 iquotient = len & (~SCALE_F_MASK);
70 iremainder = len & SCALE_F_MASK;
71
72 kernel_fpu_begin();
73 crc = crc32_pclmul_le_16(p, iquotient, crc);
74 kernel_fpu_end();
75
76 if (iremainder)
77 crc = crc32_le(crc, p + iquotient, iremainder);
78
79 return crc;
80}
81
82static int crc32_pclmul_cra_init(struct crypto_tfm *tfm)
83{
84 u32 *key = crypto_tfm_ctx(tfm);
85
86 *key = 0;
87
88 return 0;
89}
90
91static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key,
92 unsigned int keylen)
93{
94 u32 *mctx = crypto_shash_ctx(hash);
95
96 if (keylen != sizeof(u32)) {
97 crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
98 return -EINVAL;
99 }
100 *mctx = le32_to_cpup((__le32 *)key);
101 return 0;
102}
103
104static int crc32_pclmul_init(struct shash_desc *desc)
105{
106 u32 *mctx = crypto_shash_ctx(desc->tfm);
107 u32 *crcp = shash_desc_ctx(desc);
108
109 *crcp = *mctx;
110
111 return 0;
112}
113
114static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data,
115 unsigned int len)
116{
117 u32 *crcp = shash_desc_ctx(desc);
118
119 *crcp = crc32_pclmul_le(*crcp, data, len);
120 return 0;
121}
122
123/* No final XOR 0xFFFFFFFF, like crc32_le */
124static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len,
125 u8 *out)
126{
127 *(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len));
128 return 0;
129}
130
131static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data,
132 unsigned int len, u8 *out)
133{
134 return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out);
135}
136
137static int crc32_pclmul_final(struct shash_desc *desc, u8 *out)
138{
139 u32 *crcp = shash_desc_ctx(desc);
140
141 *(__le32 *)out = cpu_to_le32p(crcp);
142 return 0;
143}
144
145static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data,
146 unsigned int len, u8 *out)
147{
148 return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len,
149 out);
150}
151
152static struct shash_alg alg = {
153 .setkey = crc32_pclmul_setkey,
154 .init = crc32_pclmul_init,
155 .update = crc32_pclmul_update,
156 .final = crc32_pclmul_final,
157 .finup = crc32_pclmul_finup,
158 .digest = crc32_pclmul_digest,
159 .descsize = sizeof(u32),
160 .digestsize = CHKSUM_DIGEST_SIZE,
161 .base = {
162 .cra_name = "crc32",
163 .cra_driver_name = "crc32-pclmul",
164 .cra_priority = 200,
165 .cra_blocksize = CHKSUM_BLOCK_SIZE,
166 .cra_ctxsize = sizeof(u32),
167 .cra_module = THIS_MODULE,
168 .cra_init = crc32_pclmul_cra_init,
169 }
170};
171
172static const struct x86_cpu_id crc32pclmul_cpu_id[] = {
173 X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
174 {}
175};
176MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id);
177
178
179static int __init crc32_pclmul_mod_init(void)
180{
181
182 if (!x86_match_cpu(crc32pclmul_cpu_id)) {
183 pr_info("PCLMULQDQ-NI instructions are not detected.\n");
184 return -ENODEV;
185 }
186 return crypto_register_shash(&alg);
187}
188
189static void __exit crc32_pclmul_mod_fini(void)
190{
191 crypto_unregister_shash(&alg);
192}
193
194module_init(crc32_pclmul_mod_init);
195module_exit(crc32_pclmul_mod_fini);
196
197MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
198MODULE_LICENSE("GPL");
199
200MODULE_ALIAS("crc32");
201MODULE_ALIAS("crc32-pclmul");
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 93c6d39237ac..cf1a7ec4cc3a 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -42,6 +42,8 @@
42 * SOFTWARE. 42 * SOFTWARE.
43 */ 43 */
44 44
45#include <linux/linkage.h>
46
45## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction 47## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
46 48
47.macro LABEL prefix n 49.macro LABEL prefix n
@@ -68,8 +70,7 @@
68 70
69# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); 71# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
70 72
71.global crc_pcl 73ENTRY(crc_pcl)
72crc_pcl:
73#define bufp %rdi 74#define bufp %rdi
74#define bufp_dw %edi 75#define bufp_dw %edi
75#define bufp_w %di 76#define bufp_w %di
@@ -323,6 +324,9 @@ JMPTBL_ENTRY %i
323.noaltmacro 324.noaltmacro
324 i=i+1 325 i=i+1
325.endr 326.endr
327
328ENDPROC(crc_pcl)
329
326 ################################################################ 330 ################################################################
327 ## PCLMULQDQ tables 331 ## PCLMULQDQ tables
328 ## Table is 128 entries x 2 quad words each 332 ## Table is 128 entries x 2 quad words each
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 1eb7f90cb7b9..586f41aac361 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -94,6 +94,7 @@ __clmul_gf128mul_ble:
94 pxor T2, T1 94 pxor T2, T1
95 pxor T1, DATA 95 pxor T1, DATA
96 ret 96 ret
97ENDPROC(__clmul_gf128mul_ble)
97 98
98/* void clmul_ghash_mul(char *dst, const be128 *shash) */ 99/* void clmul_ghash_mul(char *dst, const be128 *shash) */
99ENTRY(clmul_ghash_mul) 100ENTRY(clmul_ghash_mul)
@@ -105,6 +106,7 @@ ENTRY(clmul_ghash_mul)
105 PSHUFB_XMM BSWAP DATA 106 PSHUFB_XMM BSWAP DATA
106 movups DATA, (%rdi) 107 movups DATA, (%rdi)
107 ret 108 ret
109ENDPROC(clmul_ghash_mul)
108 110
109/* 111/*
110 * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, 112 * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
@@ -131,6 +133,7 @@ ENTRY(clmul_ghash_update)
131 movups DATA, (%rdi) 133 movups DATA, (%rdi)
132.Lupdate_just_ret: 134.Lupdate_just_ret:
133 ret 135 ret
136ENDPROC(clmul_ghash_update)
134 137
135/* 138/*
136 * void clmul_ghash_setkey(be128 *shash, const u8 *key); 139 * void clmul_ghash_setkey(be128 *shash, const u8 *key);
@@ -155,3 +158,4 @@ ENTRY(clmul_ghash_setkey)
155 pxor %xmm1, %xmm0 158 pxor %xmm1, %xmm0
156 movups %xmm0, (%rdi) 159 movups %xmm0, (%rdi)
157 ret 160 ret
161ENDPROC(clmul_ghash_setkey)
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S
index 72eb306680b2..329452b8f794 100644
--- a/arch/x86/crypto/salsa20-i586-asm_32.S
+++ b/arch/x86/crypto/salsa20-i586-asm_32.S
@@ -2,11 +2,12 @@
2# D. J. Bernstein 2# D. J. Bernstein
3# Public domain. 3# Public domain.
4 4
5# enter ECRYPT_encrypt_bytes 5#include <linux/linkage.h>
6
6.text 7.text
7.p2align 5 8
8.globl ECRYPT_encrypt_bytes 9# enter salsa20_encrypt_bytes
9ECRYPT_encrypt_bytes: 10ENTRY(salsa20_encrypt_bytes)
10 mov %esp,%eax 11 mov %esp,%eax
11 and $31,%eax 12 and $31,%eax
12 add $256,%eax 13 add $256,%eax
@@ -933,11 +934,10 @@ ECRYPT_encrypt_bytes:
933 add $64,%esi 934 add $64,%esi
934 # goto bytesatleast1 935 # goto bytesatleast1
935 jmp ._bytesatleast1 936 jmp ._bytesatleast1
936# enter ECRYPT_keysetup 937ENDPROC(salsa20_encrypt_bytes)
937.text 938
938.p2align 5 939# enter salsa20_keysetup
939.globl ECRYPT_keysetup 940ENTRY(salsa20_keysetup)
940ECRYPT_keysetup:
941 mov %esp,%eax 941 mov %esp,%eax
942 and $31,%eax 942 and $31,%eax
943 add $256,%eax 943 add $256,%eax
@@ -1060,11 +1060,10 @@ ECRYPT_keysetup:
1060 # leave 1060 # leave
1061 add %eax,%esp 1061 add %eax,%esp
1062 ret 1062 ret
1063# enter ECRYPT_ivsetup 1063ENDPROC(salsa20_keysetup)
1064.text 1064
1065.p2align 5 1065# enter salsa20_ivsetup
1066.globl ECRYPT_ivsetup 1066ENTRY(salsa20_ivsetup)
1067ECRYPT_ivsetup:
1068 mov %esp,%eax 1067 mov %esp,%eax
1069 and $31,%eax 1068 and $31,%eax
1070 add $256,%eax 1069 add $256,%eax
@@ -1112,3 +1111,4 @@ ECRYPT_ivsetup:
1112 # leave 1111 # leave
1113 add %eax,%esp 1112 add %eax,%esp
1114 ret 1113 ret
1114ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S
index 6214a9b09706..9279e0b2d60e 100644
--- a/arch/x86/crypto/salsa20-x86_64-asm_64.S
+++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S
@@ -1,8 +1,7 @@
1# enter ECRYPT_encrypt_bytes 1#include <linux/linkage.h>
2.text 2
3.p2align 5 3# enter salsa20_encrypt_bytes
4.globl ECRYPT_encrypt_bytes 4ENTRY(salsa20_encrypt_bytes)
5ECRYPT_encrypt_bytes:
6 mov %rsp,%r11 5 mov %rsp,%r11
7 and $31,%r11 6 and $31,%r11
8 add $256,%r11 7 add $256,%r11
@@ -802,11 +801,10 @@ ECRYPT_encrypt_bytes:
802 # comment:fp stack unchanged by jump 801 # comment:fp stack unchanged by jump
803 # goto bytesatleast1 802 # goto bytesatleast1
804 jmp ._bytesatleast1 803 jmp ._bytesatleast1
805# enter ECRYPT_keysetup 804ENDPROC(salsa20_encrypt_bytes)
806.text 805
807.p2align 5 806# enter salsa20_keysetup
808.globl ECRYPT_keysetup 807ENTRY(salsa20_keysetup)
809ECRYPT_keysetup:
810 mov %rsp,%r11 808 mov %rsp,%r11
811 and $31,%r11 809 and $31,%r11
812 add $256,%r11 810 add $256,%r11
@@ -892,11 +890,10 @@ ECRYPT_keysetup:
892 mov %rdi,%rax 890 mov %rdi,%rax
893 mov %rsi,%rdx 891 mov %rsi,%rdx
894 ret 892 ret
895# enter ECRYPT_ivsetup 893ENDPROC(salsa20_keysetup)
896.text 894
897.p2align 5 895# enter salsa20_ivsetup
898.globl ECRYPT_ivsetup 896ENTRY(salsa20_ivsetup)
899ECRYPT_ivsetup:
900 mov %rsp,%r11 897 mov %rsp,%r11
901 and $31,%r11 898 and $31,%r11
902 add $256,%r11 899 add $256,%r11
@@ -918,3 +915,4 @@ ECRYPT_ivsetup:
918 mov %rdi,%rax 915 mov %rdi,%rax
919 mov %rsi,%rdx 916 mov %rsi,%rdx
920 ret 917 ret
918ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index a3a3c0205c16..5e8e67739bb5 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -26,11 +26,6 @@
26#define SALSA20_MIN_KEY_SIZE 16U 26#define SALSA20_MIN_KEY_SIZE 16U
27#define SALSA20_MAX_KEY_SIZE 32U 27#define SALSA20_MAX_KEY_SIZE 32U
28 28
29// use the ECRYPT_* function names
30#define salsa20_keysetup ECRYPT_keysetup
31#define salsa20_ivsetup ECRYPT_ivsetup
32#define salsa20_encrypt_bytes ECRYPT_encrypt_bytes
33
34struct salsa20_ctx 29struct salsa20_ctx
35{ 30{
36 u32 input[16]; 31 u32 input[16];
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
index 02b0e9fe997c..43c938612b74 100644
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -24,6 +24,7 @@
24 * 24 *
25 */ 25 */
26 26
27#include <linux/linkage.h>
27#include "glue_helper-asm-avx.S" 28#include "glue_helper-asm-avx.S"
28 29
29.file "serpent-avx-x86_64-asm_64.S" 30.file "serpent-avx-x86_64-asm_64.S"
@@ -566,8 +567,6 @@
566 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) 567 transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
567 568
568.align 8 569.align 8
569.type __serpent_enc_blk8_avx,@function;
570
571__serpent_enc_blk8_avx: 570__serpent_enc_blk8_avx:
572 /* input: 571 /* input:
573 * %rdi: ctx, CTX 572 * %rdi: ctx, CTX
@@ -619,10 +618,9 @@ __serpent_enc_blk8_avx:
619 write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); 618 write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
620 619
621 ret; 620 ret;
621ENDPROC(__serpent_enc_blk8_avx)
622 622
623.align 8 623.align 8
624.type __serpent_dec_blk8_avx,@function;
625
626__serpent_dec_blk8_avx: 624__serpent_dec_blk8_avx:
627 /* input: 625 /* input:
628 * %rdi: ctx, CTX 626 * %rdi: ctx, CTX
@@ -674,12 +672,9 @@ __serpent_dec_blk8_avx:
674 write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); 672 write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
675 673
676 ret; 674 ret;
675ENDPROC(__serpent_dec_blk8_avx)
677 676
678.align 8 677ENTRY(serpent_ecb_enc_8way_avx)
679.global serpent_ecb_enc_8way_avx
680.type serpent_ecb_enc_8way_avx,@function;
681
682serpent_ecb_enc_8way_avx:
683 /* input: 678 /* input:
684 * %rdi: ctx, CTX 679 * %rdi: ctx, CTX
685 * %rsi: dst 680 * %rsi: dst
@@ -693,12 +688,9 @@ serpent_ecb_enc_8way_avx:
693 store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 688 store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
694 689
695 ret; 690 ret;
691ENDPROC(serpent_ecb_enc_8way_avx)
696 692
697.align 8 693ENTRY(serpent_ecb_dec_8way_avx)
698.global serpent_ecb_dec_8way_avx
699.type serpent_ecb_dec_8way_avx,@function;
700
701serpent_ecb_dec_8way_avx:
702 /* input: 694 /* input:
703 * %rdi: ctx, CTX 695 * %rdi: ctx, CTX
704 * %rsi: dst 696 * %rsi: dst
@@ -712,12 +704,9 @@ serpent_ecb_dec_8way_avx:
712 store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); 704 store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
713 705
714 ret; 706 ret;
707ENDPROC(serpent_ecb_dec_8way_avx)
715 708
716.align 8 709ENTRY(serpent_cbc_dec_8way_avx)
717.global serpent_cbc_dec_8way_avx
718.type serpent_cbc_dec_8way_avx,@function;
719
720serpent_cbc_dec_8way_avx:
721 /* input: 710 /* input:
722 * %rdi: ctx, CTX 711 * %rdi: ctx, CTX
723 * %rsi: dst 712 * %rsi: dst
@@ -731,12 +720,9 @@ serpent_cbc_dec_8way_avx:
731 store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); 720 store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
732 721
733 ret; 722 ret;
723ENDPROC(serpent_cbc_dec_8way_avx)
734 724
735.align 8 725ENTRY(serpent_ctr_8way_avx)
736.global serpent_ctr_8way_avx
737.type serpent_ctr_8way_avx,@function;
738
739serpent_ctr_8way_avx:
740 /* input: 726 /* input:
741 * %rdi: ctx, CTX 727 * %rdi: ctx, CTX
742 * %rsi: dst 728 * %rsi: dst
@@ -752,3 +738,4 @@ serpent_ctr_8way_avx:
752 store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 738 store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
753 739
754 ret; 740 ret;
741ENDPROC(serpent_ctr_8way_avx)
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
index c00053d42f99..d348f1553a79 100644
--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
@@ -24,6 +24,8 @@
24 * 24 *
25 */ 25 */
26 26
27#include <linux/linkage.h>
28
27.file "serpent-sse2-i586-asm_32.S" 29.file "serpent-sse2-i586-asm_32.S"
28.text 30.text
29 31
@@ -510,11 +512,7 @@
510 pxor t0, x3; \ 512 pxor t0, x3; \
511 movdqu x3, (3*4*4)(out); 513 movdqu x3, (3*4*4)(out);
512 514
513.align 8 515ENTRY(__serpent_enc_blk_4way)
514.global __serpent_enc_blk_4way
515.type __serpent_enc_blk_4way,@function;
516
517__serpent_enc_blk_4way:
518 /* input: 516 /* input:
519 * arg_ctx(%esp): ctx, CTX 517 * arg_ctx(%esp): ctx, CTX
520 * arg_dst(%esp): dst 518 * arg_dst(%esp): dst
@@ -566,22 +564,19 @@ __serpent_enc_blk_4way:
566 movl arg_dst(%esp), %eax; 564 movl arg_dst(%esp), %eax;
567 565
568 cmpb $0, arg_xor(%esp); 566 cmpb $0, arg_xor(%esp);
569 jnz __enc_xor4; 567 jnz .L__enc_xor4;
570 568
571 write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); 569 write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
572 570
573 ret; 571 ret;
574 572
575__enc_xor4: 573.L__enc_xor4:
576 xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); 574 xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
577 575
578 ret; 576 ret;
577ENDPROC(__serpent_enc_blk_4way)
579 578
580.align 8 579ENTRY(serpent_dec_blk_4way)
581.global serpent_dec_blk_4way
582.type serpent_dec_blk_4way,@function;
583
584serpent_dec_blk_4way:
585 /* input: 580 /* input:
586 * arg_ctx(%esp): ctx, CTX 581 * arg_ctx(%esp): ctx, CTX
587 * arg_dst(%esp): dst 582 * arg_dst(%esp): dst
@@ -633,3 +628,4 @@ serpent_dec_blk_4way:
633 write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); 628 write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
634 629
635 ret; 630 ret;
631ENDPROC(serpent_dec_blk_4way)
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
index 3ee1ff04d3e9..acc066c7c6b2 100644
--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
@@ -24,6 +24,8 @@
24 * 24 *
25 */ 25 */
26 26
27#include <linux/linkage.h>
28
27.file "serpent-sse2-x86_64-asm_64.S" 29.file "serpent-sse2-x86_64-asm_64.S"
28.text 30.text
29 31
@@ -632,11 +634,7 @@
632 pxor t0, x3; \ 634 pxor t0, x3; \
633 movdqu x3, (3*4*4)(out); 635 movdqu x3, (3*4*4)(out);
634 636
635.align 8 637ENTRY(__serpent_enc_blk_8way)
636.global __serpent_enc_blk_8way
637.type __serpent_enc_blk_8way,@function;
638
639__serpent_enc_blk_8way:
640 /* input: 638 /* input:
641 * %rdi: ctx, CTX 639 * %rdi: ctx, CTX
642 * %rsi: dst 640 * %rsi: dst
@@ -687,24 +685,21 @@ __serpent_enc_blk_8way:
687 leaq (4*4*4)(%rsi), %rax; 685 leaq (4*4*4)(%rsi), %rax;
688 686
689 testb %cl, %cl; 687 testb %cl, %cl;
690 jnz __enc_xor8; 688 jnz .L__enc_xor8;
691 689
692 write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 690 write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
693 write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); 691 write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
694 692
695 ret; 693 ret;
696 694
697__enc_xor8: 695.L__enc_xor8:
698 xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); 696 xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
699 xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); 697 xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
700 698
701 ret; 699 ret;
700ENDPROC(__serpent_enc_blk_8way)
702 701
703.align 8 702ENTRY(serpent_dec_blk_8way)
704.global serpent_dec_blk_8way
705.type serpent_dec_blk_8way,@function;
706
707serpent_dec_blk_8way:
708 /* input: 703 /* input:
709 * %rdi: ctx, CTX 704 * %rdi: ctx, CTX
710 * %rsi: dst 705 * %rsi: dst
@@ -756,3 +751,4 @@ serpent_dec_blk_8way:
756 write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); 751 write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
757 752
758 ret; 753 ret;
754ENDPROC(serpent_dec_blk_8way)
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index 49d6987a73d9..a4109506a5e8 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -28,6 +28,8 @@
28 * (at your option) any later version. 28 * (at your option) any later version.
29 */ 29 */
30 30
31#include <linux/linkage.h>
32
31#define CTX %rdi // arg1 33#define CTX %rdi // arg1
32#define BUF %rsi // arg2 34#define BUF %rsi // arg2
33#define CNT %rdx // arg3 35#define CNT %rdx // arg3
@@ -69,10 +71,8 @@
69 * param: function's name 71 * param: function's name
70 */ 72 */
71.macro SHA1_VECTOR_ASM name 73.macro SHA1_VECTOR_ASM name
72 .global \name 74 ENTRY(\name)
73 .type \name, @function 75
74 .align 32
75\name:
76 push %rbx 76 push %rbx
77 push %rbp 77 push %rbp
78 push %r12 78 push %r12
@@ -106,7 +106,7 @@
106 pop %rbx 106 pop %rbx
107 ret 107 ret
108 108
109 .size \name, .-\name 109 ENDPROC(\name)
110.endm 110.endm
111 111
112/* 112/*
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index ebac16bfa830..8d3e113b2c95 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -23,6 +23,7 @@
23 * 23 *
24 */ 24 */
25 25
26#include <linux/linkage.h>
26#include "glue_helper-asm-avx.S" 27#include "glue_helper-asm-avx.S"
27 28
28.file "twofish-avx-x86_64-asm_64.S" 29.file "twofish-avx-x86_64-asm_64.S"
@@ -243,8 +244,6 @@
243 vpxor x3, wkey, x3; 244 vpxor x3, wkey, x3;
244 245
245.align 8 246.align 8
246.type __twofish_enc_blk8,@function;
247
248__twofish_enc_blk8: 247__twofish_enc_blk8:
249 /* input: 248 /* input:
250 * %rdi: ctx, CTX 249 * %rdi: ctx, CTX
@@ -284,10 +283,9 @@ __twofish_enc_blk8:
284 outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); 283 outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
285 284
286 ret; 285 ret;
286ENDPROC(__twofish_enc_blk8)
287 287
288.align 8 288.align 8
289.type __twofish_dec_blk8,@function;
290
291__twofish_dec_blk8: 289__twofish_dec_blk8:
292 /* input: 290 /* input:
293 * %rdi: ctx, CTX 291 * %rdi: ctx, CTX
@@ -325,12 +323,9 @@ __twofish_dec_blk8:
325 outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); 323 outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
326 324
327 ret; 325 ret;
326ENDPROC(__twofish_dec_blk8)
328 327
329.align 8 328ENTRY(twofish_ecb_enc_8way)
330.global twofish_ecb_enc_8way
331.type twofish_ecb_enc_8way,@function;
332
333twofish_ecb_enc_8way:
334 /* input: 329 /* input:
335 * %rdi: ctx, CTX 330 * %rdi: ctx, CTX
336 * %rsi: dst 331 * %rsi: dst
@@ -346,12 +341,9 @@ twofish_ecb_enc_8way:
346 store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); 341 store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
347 342
348 ret; 343 ret;
344ENDPROC(twofish_ecb_enc_8way)
349 345
350.align 8 346ENTRY(twofish_ecb_dec_8way)
351.global twofish_ecb_dec_8way
352.type twofish_ecb_dec_8way,@function;
353
354twofish_ecb_dec_8way:
355 /* input: 347 /* input:
356 * %rdi: ctx, CTX 348 * %rdi: ctx, CTX
357 * %rsi: dst 349 * %rsi: dst
@@ -367,12 +359,9 @@ twofish_ecb_dec_8way:
367 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 359 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
368 360
369 ret; 361 ret;
362ENDPROC(twofish_ecb_dec_8way)
370 363
371.align 8 364ENTRY(twofish_cbc_dec_8way)
372.global twofish_cbc_dec_8way
373.type twofish_cbc_dec_8way,@function;
374
375twofish_cbc_dec_8way:
376 /* input: 365 /* input:
377 * %rdi: ctx, CTX 366 * %rdi: ctx, CTX
378 * %rsi: dst 367 * %rsi: dst
@@ -393,12 +382,9 @@ twofish_cbc_dec_8way:
393 popq %r12; 382 popq %r12;
394 383
395 ret; 384 ret;
385ENDPROC(twofish_cbc_dec_8way)
396 386
397.align 8 387ENTRY(twofish_ctr_8way)
398.global twofish_ctr_8way
399.type twofish_ctr_8way,@function;
400
401twofish_ctr_8way:
402 /* input: 388 /* input:
403 * %rdi: ctx, CTX 389 * %rdi: ctx, CTX
404 * %rsi: dst 390 * %rsi: dst
@@ -421,3 +407,4 @@ twofish_ctr_8way:
421 popq %r12; 407 popq %r12;
422 408
423 ret; 409 ret;
410ENDPROC(twofish_ctr_8way)
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
index 658af4bb35c9..694ea4587ba7 100644
--- a/arch/x86/crypto/twofish-i586-asm_32.S
+++ b/arch/x86/crypto/twofish-i586-asm_32.S
@@ -20,6 +20,7 @@
20.file "twofish-i586-asm.S" 20.file "twofish-i586-asm.S"
21.text 21.text
22 22
23#include <linux/linkage.h>
23#include <asm/asm-offsets.h> 24#include <asm/asm-offsets.h>
24 25
25/* return address at 0 */ 26/* return address at 0 */
@@ -219,11 +220,7 @@
219 xor %esi, d ## D;\ 220 xor %esi, d ## D;\
220 ror $1, d ## D; 221 ror $1, d ## D;
221 222
222.align 4 223ENTRY(twofish_enc_blk)
223.global twofish_enc_blk
224.global twofish_dec_blk
225
226twofish_enc_blk:
227 push %ebp /* save registers according to calling convention*/ 224 push %ebp /* save registers according to calling convention*/
228 push %ebx 225 push %ebx
229 push %esi 226 push %esi
@@ -277,8 +274,9 @@ twofish_enc_blk:
277 pop %ebp 274 pop %ebp
278 mov $1, %eax 275 mov $1, %eax
279 ret 276 ret
277ENDPROC(twofish_enc_blk)
280 278
281twofish_dec_blk: 279ENTRY(twofish_dec_blk)
282 push %ebp /* save registers according to calling convention*/ 280 push %ebp /* save registers according to calling convention*/
283 push %ebx 281 push %ebx
284 push %esi 282 push %esi
@@ -333,3 +331,4 @@ twofish_dec_blk:
333 pop %ebp 331 pop %ebp
334 mov $1, %eax 332 mov $1, %eax
335 ret 333 ret
334ENDPROC(twofish_dec_blk)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
index 5b012a2c5119..1c3b7ceb36d2 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -20,6 +20,8 @@
20 * 20 *
21 */ 21 */
22 22
23#include <linux/linkage.h>
24
23.file "twofish-x86_64-asm-3way.S" 25.file "twofish-x86_64-asm-3way.S"
24.text 26.text
25 27
@@ -214,11 +216,7 @@
214 rorq $32, RAB2; \ 216 rorq $32, RAB2; \
215 outunpack3(mov, RIO, 2, RAB, 2); 217 outunpack3(mov, RIO, 2, RAB, 2);
216 218
217.align 8 219ENTRY(__twofish_enc_blk_3way)
218.global __twofish_enc_blk_3way
219.type __twofish_enc_blk_3way,@function;
220
221__twofish_enc_blk_3way:
222 /* input: 220 /* input:
223 * %rdi: ctx, CTX 221 * %rdi: ctx, CTX
224 * %rsi: dst 222 * %rsi: dst
@@ -250,7 +248,7 @@ __twofish_enc_blk_3way:
250 popq %rbp; /* bool xor */ 248 popq %rbp; /* bool xor */
251 249
252 testb %bpl, %bpl; 250 testb %bpl, %bpl;
253 jnz __enc_xor3; 251 jnz .L__enc_xor3;
254 252
255 outunpack_enc3(mov); 253 outunpack_enc3(mov);
256 254
@@ -262,7 +260,7 @@ __twofish_enc_blk_3way:
262 popq %r15; 260 popq %r15;
263 ret; 261 ret;
264 262
265__enc_xor3: 263.L__enc_xor3:
266 outunpack_enc3(xor); 264 outunpack_enc3(xor);
267 265
268 popq %rbx; 266 popq %rbx;
@@ -272,11 +270,9 @@ __enc_xor3:
272 popq %r14; 270 popq %r14;
273 popq %r15; 271 popq %r15;
274 ret; 272 ret;
273ENDPROC(__twofish_enc_blk_3way)
275 274
276.global twofish_dec_blk_3way 275ENTRY(twofish_dec_blk_3way)
277.type twofish_dec_blk_3way,@function;
278
279twofish_dec_blk_3way:
280 /* input: 276 /* input:
281 * %rdi: ctx, CTX 277 * %rdi: ctx, CTX
282 * %rsi: dst 278 * %rsi: dst
@@ -313,4 +309,4 @@ twofish_dec_blk_3way:
313 popq %r14; 309 popq %r14;
314 popq %r15; 310 popq %r15;
315 ret; 311 ret;
316 312ENDPROC(twofish_dec_blk_3way)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index 7bcf3fcc3668..a039d21986a2 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -20,6 +20,7 @@
20.file "twofish-x86_64-asm.S" 20.file "twofish-x86_64-asm.S"
21.text 21.text
22 22
23#include <linux/linkage.h>
23#include <asm/asm-offsets.h> 24#include <asm/asm-offsets.h>
24 25
25#define a_offset 0 26#define a_offset 0
@@ -214,11 +215,7 @@
214 xor %r8d, d ## D;\ 215 xor %r8d, d ## D;\
215 ror $1, d ## D; 216 ror $1, d ## D;
216 217
217.align 8 218ENTRY(twofish_enc_blk)
218.global twofish_enc_blk
219.global twofish_dec_blk
220
221twofish_enc_blk:
222 pushq R1 219 pushq R1
223 220
224 /* %rdi contains the ctx address */ 221 /* %rdi contains the ctx address */
@@ -269,8 +266,9 @@ twofish_enc_blk:
269 popq R1 266 popq R1
270 movq $1,%rax 267 movq $1,%rax
271 ret 268 ret
269ENDPROC(twofish_enc_blk)
272 270
273twofish_dec_blk: 271ENTRY(twofish_dec_blk)
274 pushq R1 272 pushq R1
275 273
276 /* %rdi contains the ctx address */ 274 /* %rdi contains the ctx address */
@@ -320,3 +318,4 @@ twofish_dec_blk:
320 popq R1 318 popq R1
321 movq $1,%rax 319 movq $1,%rax
322 ret 320 ret
321ENDPROC(twofish_dec_blk)
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index a703af19c281..03abf9b70011 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -271,7 +271,7 @@ static int load_aout_binary(struct linux_binprm *bprm)
271 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && 271 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
272 N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || 272 N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
273 N_TRSIZE(ex) || N_DRSIZE(ex) || 273 N_TRSIZE(ex) || N_DRSIZE(ex) ||
274 i_size_read(bprm->file->f_path.dentry->d_inode) < 274 i_size_read(file_inode(bprm->file)) <
275 ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { 275 ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
276 return -ENOEXEC; 276 return -ENOEXEC;
277 } 277 }
@@ -425,12 +425,10 @@ beyond_if:
425 425
426static int load_aout_library(struct file *file) 426static int load_aout_library(struct file *file)
427{ 427{
428 struct inode *inode;
429 unsigned long bss, start_addr, len, error; 428 unsigned long bss, start_addr, len, error;
430 int retval; 429 int retval;
431 struct exec ex; 430 struct exec ex;
432 431
433 inode = file->f_path.dentry->d_inode;
434 432
435 retval = -ENOEXEC; 433 retval = -ENOEXEC;
436 error = kernel_read(file, 0, (char *) &ex, sizeof(ex)); 434 error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
@@ -440,7 +438,7 @@ static int load_aout_library(struct file *file)
440 /* We come in here for the regular a.out style of shared libraries */ 438 /* We come in here for the regular a.out style of shared libraries */
441 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) || 439 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
442 N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || 440 N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
443 i_size_read(inode) < 441 i_size_read(file_inode(file)) <
444 ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { 442 ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
445 goto out; 443 goto out;
446 } 444 }
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 28677c55113f..60c89f30c727 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -102,7 +102,14 @@ extern void efi_call_phys_epilog(void);
102extern void efi_unmap_memmap(void); 102extern void efi_unmap_memmap(void);
103extern void efi_memory_uc(u64 addr, unsigned long size); 103extern void efi_memory_uc(u64 addr, unsigned long size);
104 104
105#ifndef CONFIG_EFI 105#ifdef CONFIG_EFI
106
107static inline bool efi_is_native(void)
108{
109 return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);
110}
111
112#else
106/* 113/*
107 * IF EFI is not configured, have the EFI calls return -ENOSYS. 114 * IF EFI is not configured, have the EFI calls return -ENOSYS.
108 */ 115 */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 86cb51e1ca96..0525a8bdf65d 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -72,4 +72,28 @@ int ftrace_int3_handler(struct pt_regs *regs);
72#endif /* __ASSEMBLY__ */ 72#endif /* __ASSEMBLY__ */
73#endif /* CONFIG_FUNCTION_TRACER */ 73#endif /* CONFIG_FUNCTION_TRACER */
74 74
75
76#if !defined(__ASSEMBLY__) && !defined(COMPILE_OFFSETS)
77
78#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_IA32_EMULATION)
79#include <asm/compat.h>
80
81/*
82 * Because ia32 syscalls do not map to x86_64 syscall numbers
83 * this screws up the trace output when tracing a ia32 task.
84 * Instead of reporting bogus syscalls, just do not trace them.
85 *
86 * If the user realy wants these, then they should use the
87 * raw syscall tracepoints with filtering.
88 */
89#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 1
90static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
91{
92 if (is_compat_task())
93 return true;
94 return false;
95}
96#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */
97#endif /* !__ASSEMBLY__ && !COMPILE_OFFSETS */
98
75#endif /* _ASM_X86_FTRACE_H */ 99#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dc87b65e9c3a..635a74d22409 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -33,10 +33,10 @@
33 33
34#define KVM_MAX_VCPUS 254 34#define KVM_MAX_VCPUS 254
35#define KVM_SOFT_MAX_VCPUS 160 35#define KVM_SOFT_MAX_VCPUS 160
36#define KVM_MEMORY_SLOTS 32 36#define KVM_USER_MEM_SLOTS 125
37/* memory slots that does not exposed to userspace */ 37/* memory slots that are not exposed to userspace */
38#define KVM_PRIVATE_MEM_SLOTS 4 38#define KVM_PRIVATE_MEM_SLOTS 3
39#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) 39#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
40 40
41#define KVM_MMIO_SIZE 16 41#define KVM_MMIO_SIZE 16
42 42
@@ -219,11 +219,6 @@ struct kvm_mmu_page {
219 u64 *spt; 219 u64 *spt;
220 /* hold the gfn of each spte inside spt */ 220 /* hold the gfn of each spte inside spt */
221 gfn_t *gfns; 221 gfn_t *gfns;
222 /*
223 * One bit set per slot which has memory
224 * in this shadow page.
225 */
226 DECLARE_BITMAP(slot_bitmap, KVM_MEM_SLOTS_NUM);
227 bool unsync; 222 bool unsync;
228 int root_count; /* Currently serving as active root */ 223 int root_count; /* Currently serving as active root */
229 unsigned int unsync_children; 224 unsigned int unsync_children;
@@ -502,6 +497,13 @@ struct kvm_vcpu_arch {
502 u64 msr_val; 497 u64 msr_val;
503 struct gfn_to_hva_cache data; 498 struct gfn_to_hva_cache data;
504 } pv_eoi; 499 } pv_eoi;
500
501 /*
502 * Indicate whether the access faults on its page table in guest
503 * which is set when fix page fault and used to detect unhandeable
504 * instruction.
505 */
506 bool write_fault_to_shadow_pgtable;
505}; 507};
506 508
507struct kvm_lpage_info { 509struct kvm_lpage_info {
@@ -697,6 +699,11 @@ struct kvm_x86_ops {
697 void (*enable_nmi_window)(struct kvm_vcpu *vcpu); 699 void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
698 void (*enable_irq_window)(struct kvm_vcpu *vcpu); 700 void (*enable_irq_window)(struct kvm_vcpu *vcpu);
699 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); 701 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
702 int (*vm_has_apicv)(struct kvm *kvm);
703 void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
704 void (*hwapic_isr_update)(struct kvm *kvm, int isr);
705 void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
706 void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
700 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 707 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
701 int (*get_tdp_level)(void); 708 int (*get_tdp_level)(void);
702 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); 709 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
@@ -991,6 +998,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva);
991int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); 998int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
992void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); 999void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
993int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); 1000int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
1001int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
994int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); 1002int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
995int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); 1003int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
996int kvm_cpu_get_interrupt(struct kvm_vcpu *v); 1004int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 65231e173baf..695399f2d5eb 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -27,7 +27,7 @@ static inline bool kvm_check_and_clear_guest_paused(void)
27 * 27 *
28 * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively. 28 * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
29 * The hypercall number should be placed in rax and the return value will be 29 * The hypercall number should be placed in rax and the return value will be
30 * placed in rax. No other registers will be clobbered unless explicited 30 * placed in rax. No other registers will be clobbered unless explicitly
31 * noted by the particular hypercall. 31 * noted by the particular hypercall.
32 */ 32 */
33 33
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index c28fd02f4bf7..d9e9e6c7ed32 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -14,6 +14,9 @@
14struct pci_sysdata { 14struct pci_sysdata {
15 int domain; /* PCI domain */ 15 int domain; /* PCI domain */
16 int node; /* NUMA node */ 16 int node; /* NUMA node */
17#ifdef CONFIG_ACPI
18 void *acpi; /* ACPI-specific data */
19#endif
17#ifdef CONFIG_X86_64 20#ifdef CONFIG_X86_64
18 void *iommu; /* IOMMU private data */ 21 void *iommu; /* IOMMU private data */
19#endif 22#endif
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 747e5a38b590..fa1195dae425 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -54,7 +54,6 @@ void pcibios_set_cache_line_size(void);
54/* pci-pc.c */ 54/* pci-pc.c */
55 55
56extern int pcibios_last_bus; 56extern int pcibios_last_bus;
57extern struct pci_bus *pci_root_bus;
58extern struct pci_ops pci_root_ops; 57extern struct pci_ops pci_root_ops;
59 58
60void pcibios_scan_specific_bus(int busn); 59void pcibios_scan_specific_bus(int busn);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 2d946e63ee82..2cd056e3ada3 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -20,7 +20,6 @@
20struct task_struct; 20struct task_struct;
21struct exec_domain; 21struct exec_domain;
22#include <asm/processor.h> 22#include <asm/processor.h>
23#include <asm/ftrace.h>
24#include <linux/atomic.h> 23#include <linux/atomic.h>
25 24
26struct thread_info { 25struct thread_info {
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 235b49fa554b..b6fbf860e398 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -57,9 +57,12 @@
57#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 57#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
58#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 58#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
59#define SECONDARY_EXEC_RDTSCP 0x00000008 59#define SECONDARY_EXEC_RDTSCP 0x00000008
60#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
60#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 61#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
61#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 62#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
62#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 63#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
64#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
65#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
63#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 66#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
64#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 67#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
65 68
@@ -97,6 +100,7 @@ enum vmcs_field {
97 GUEST_GS_SELECTOR = 0x0000080a, 100 GUEST_GS_SELECTOR = 0x0000080a,
98 GUEST_LDTR_SELECTOR = 0x0000080c, 101 GUEST_LDTR_SELECTOR = 0x0000080c,
99 GUEST_TR_SELECTOR = 0x0000080e, 102 GUEST_TR_SELECTOR = 0x0000080e,
103 GUEST_INTR_STATUS = 0x00000810,
100 HOST_ES_SELECTOR = 0x00000c00, 104 HOST_ES_SELECTOR = 0x00000c00,
101 HOST_CS_SELECTOR = 0x00000c02, 105 HOST_CS_SELECTOR = 0x00000c02,
102 HOST_SS_SELECTOR = 0x00000c04, 106 HOST_SS_SELECTOR = 0x00000c04,
@@ -124,6 +128,14 @@ enum vmcs_field {
124 APIC_ACCESS_ADDR_HIGH = 0x00002015, 128 APIC_ACCESS_ADDR_HIGH = 0x00002015,
125 EPT_POINTER = 0x0000201a, 129 EPT_POINTER = 0x0000201a,
126 EPT_POINTER_HIGH = 0x0000201b, 130 EPT_POINTER_HIGH = 0x0000201b,
131 EOI_EXIT_BITMAP0 = 0x0000201c,
132 EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
133 EOI_EXIT_BITMAP1 = 0x0000201e,
134 EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
135 EOI_EXIT_BITMAP2 = 0x00002020,
136 EOI_EXIT_BITMAP2_HIGH = 0x00002021,
137 EOI_EXIT_BITMAP3 = 0x00002022,
138 EOI_EXIT_BITMAP3_HIGH = 0x00002023,
127 GUEST_PHYSICAL_ADDRESS = 0x00002400, 139 GUEST_PHYSICAL_ADDRESS = 0x00002400,
128 GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, 140 GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
129 VMCS_LINK_POINTER = 0x00002800, 141 VMCS_LINK_POINTER = 0x00002800,
@@ -346,9 +358,9 @@ enum vmcs_field {
346 358
347#define AR_RESERVD_MASK 0xfffe0f00 359#define AR_RESERVD_MASK 0xfffe0f00
348 360
349#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) 361#define TSS_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
350#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) 362#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 1)
351#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) 363#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2)
352 364
353#define VMX_NR_VPIDS (1 << 16) 365#define VMX_NR_VPIDS (1 << 16)
354#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 366#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index cc146d51449e..ca842f2769ef 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -16,4 +16,7 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
16 return raw_irqs_disabled_flags(regs->flags); 16 return raw_irqs_disabled_flags(regs->flags);
17} 17}
18 18
19/* No need for a barrier -- XCHG is a barrier on x86. */
20#define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
21
19#endif /* _ASM_X86_XEN_EVENTS_H */ 22#endif /* _ASM_X86_XEN_EVENTS_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 472b9b783019..6aef9fbc09b7 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -212,4 +212,6 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr);
212void make_lowmem_page_readonly(void *vaddr); 212void make_lowmem_page_readonly(void *vaddr);
213void make_lowmem_page_readwrite(void *vaddr); 213void make_lowmem_page_readwrite(void *vaddr);
214 214
215#define xen_remap(cookie, size) ioremap((cookie), (size));
216
215#endif /* _ASM_X86_XEN_PAGE_H */ 217#endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 979d03bce135..2871fccfee68 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -62,10 +62,12 @@
62#define EXIT_REASON_MCE_DURING_VMENTRY 41 62#define EXIT_REASON_MCE_DURING_VMENTRY 41
63#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 63#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
64#define EXIT_REASON_APIC_ACCESS 44 64#define EXIT_REASON_APIC_ACCESS 44
65#define EXIT_REASON_EOI_INDUCED 45
65#define EXIT_REASON_EPT_VIOLATION 48 66#define EXIT_REASON_EPT_VIOLATION 48
66#define EXIT_REASON_EPT_MISCONFIG 49 67#define EXIT_REASON_EPT_MISCONFIG 49
67#define EXIT_REASON_WBINVD 54 68#define EXIT_REASON_WBINVD 54
68#define EXIT_REASON_XSETBV 55 69#define EXIT_REASON_XSETBV 55
70#define EXIT_REASON_APIC_WRITE 56
69#define EXIT_REASON_INVPCID 58 71#define EXIT_REASON_INVPCID 58
70 72
71#define VMX_EXIT_REASONS \ 73#define VMX_EXIT_REASONS \
@@ -103,7 +105,12 @@
103 { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ 105 { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
104 { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ 106 { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
105 { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ 107 { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
106 { EXIT_REASON_WBINVD, "WBINVD" } 108 { EXIT_REASON_WBINVD, "WBINVD" }, \
109 { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \
110 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
111 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
112 { EXIT_REASON_INVD, "INVD" }, \
113 { EXIT_REASON_INVPCID, "INVPCID" }
107 114
108 115
109#endif /* _UAPIVMX_H */ 116#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a5b4dce1b7ac..904611bf0e5a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -131,7 +131,7 @@ static int __init parse_lapic(char *arg)
131{ 131{
132 if (config_enabled(CONFIG_X86_32) && !arg) 132 if (config_enabled(CONFIG_X86_32) && !arg)
133 force_enable_local_apic = 1; 133 force_enable_local_apic = 1;
134 else if (!strncmp(arg, "notscdeadline", 13)) 134 else if (arg && !strncmp(arg, "notscdeadline", 13))
135 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); 135 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
136 return 0; 136 return 0;
137} 137}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index edd77e7508b3..fa96eb0d02fb 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -219,8 +219,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
219 */ 219 */
220 WARN_ONCE(1, "WARNING: This combination of AMD" 220 WARN_ONCE(1, "WARNING: This combination of AMD"
221 " processors is not suitable for SMP.\n"); 221 " processors is not suitable for SMP.\n");
222 if (!test_taint(TAINT_UNSAFE_SMP)) 222 add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE);
223 add_taint(TAINT_UNSAFE_SMP);
224 223
225valid_k7: 224valid_k7:
226 ; 225 ;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index fc7608a89d93..7bc126346ace 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1082,7 +1082,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1082 /* 1082 /*
1083 * Set taint even when machine check was not enabled. 1083 * Set taint even when machine check was not enabled.
1084 */ 1084 */
1085 add_taint(TAINT_MACHINE_CHECK); 1085 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
1086 1086
1087 severity = mce_severity(&m, cfg->tolerant, NULL); 1087 severity = mce_severity(&m, cfg->tolerant, NULL);
1088 1088
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 2d5454cd2c4f..1c044b1ccc59 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -33,7 +33,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
33 smp_processor_id()); 33 smp_processor_id());
34 } 34 }
35 35
36 add_taint(TAINT_MACHINE_CHECK); 36 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
37} 37}
38 38
39/* Set up machine check reporting for processors with Intel style MCE: */ 39/* Set up machine check reporting for processors with Intel style MCE: */
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 2d7998fb628c..e9a701aecaa1 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -15,7 +15,7 @@
15static void winchip_machine_check(struct pt_regs *regs, long error_code) 15static void winchip_machine_check(struct pt_regs *regs, long error_code)
16{ 16{
17 printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); 17 printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
18 add_taint(TAINT_MACHINE_CHECK); 18 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
19} 19}
20 20
21/* Set up machine check reporting on the Winchip C6 series */ 21/* Set up machine check reporting on the Winchip C6 series */
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index e9fe907cd249..fa72a39e5d46 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -542,7 +542,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
542 542
543 if (tmp != mask_lo) { 543 if (tmp != mask_lo) {
544 printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); 544 printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
545 add_taint(TAINT_FIRMWARE_WORKAROUND); 545 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
546 mask_lo = tmp; 546 mask_lo = tmp;
547 } 547 }
548 } 548 }
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 4914e94ad6e8..529c8931fc02 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -107,6 +107,27 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
107 EVENT_CONSTRAINT_END 107 EVENT_CONSTRAINT_END
108}; 108};
109 109
110static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
111{
112 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
113 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
114 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
115 INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
116 INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
117 INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
118 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
119 INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
120 INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
121 INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
122 INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
123 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
124 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
125 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
126 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
127 INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
128 EVENT_CONSTRAINT_END
129};
130
110static struct extra_reg intel_westmere_extra_regs[] __read_mostly = 131static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
111{ 132{
112 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 133 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
@@ -2095,7 +2116,7 @@ __init int intel_pmu_init(void)
2095 2116
2096 intel_pmu_lbr_init_snb(); 2117 intel_pmu_lbr_init_snb();
2097 2118
2098 x86_pmu.event_constraints = intel_snb_event_constraints; 2119 x86_pmu.event_constraints = intel_ivb_event_constraints;
2099 x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; 2120 x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
2100 x86_pmu.pebs_aliases = intel_pebs_aliases_snb; 2121 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
2101 x86_pmu.extra_regs = intel_snb_extra_regs; 2122 x86_pmu.extra_regs = intel_snb_extra_regs;
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 60c78917190c..1e4dbcfe6d31 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -85,7 +85,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
85{ 85{
86 char __user *tmp = buf; 86 char __user *tmp = buf;
87 struct cpuid_regs cmd; 87 struct cpuid_regs cmd;
88 int cpu = iminor(file->f_path.dentry->d_inode); 88 int cpu = iminor(file_inode(file));
89 u64 pos = *ppos; 89 u64 pos = *ppos;
90 ssize_t bytes = 0; 90 ssize_t bytes = 0;
91 int err = 0; 91 int err = 0;
@@ -116,7 +116,7 @@ static int cpuid_open(struct inode *inode, struct file *file)
116 unsigned int cpu; 116 unsigned int cpu;
117 struct cpuinfo_x86 *c; 117 struct cpuinfo_x86 *c;
118 118
119 cpu = iminor(file->f_path.dentry->d_inode); 119 cpu = iminor(file_inode(file));
120 if (cpu >= nr_cpu_ids || !cpu_online(cpu)) 120 if (cpu >= nr_cpu_ids || !cpu_online(cpu))
121 return -ENXIO; /* No such CPU */ 121 return -ENXIO; /* No such CPU */
122 122
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index ae42418bc50f..c8797d55b245 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -232,7 +232,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
232 232
233 bust_spinlocks(0); 233 bust_spinlocks(0);
234 die_owner = -1; 234 die_owner = -1;
235 add_taint(TAINT_DIE); 235 add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
236 die_nest_count--; 236 die_nest_count--;
237 if (!die_nest_count) 237 if (!die_nest_count)
238 /* Nest count reaches zero, release the lock. */ 238 /* Nest count reaches zero, release the lock. */
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index 48d9d4ea1020..992f442ca155 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -5,8 +5,6 @@
5#include <asm/setup.h> 5#include <asm/setup.h>
6#include <asm/bios_ebda.h> 6#include <asm/bios_ebda.h>
7 7
8#define BIOS_LOWMEM_KILOBYTES 0x413
9
10/* 8/*
11 * The BIOS places the EBDA/XBDA at the top of conventional 9 * The BIOS places the EBDA/XBDA at the top of conventional
12 * memory, and usually decreases the reported amount of 10 * memory, and usually decreases the reported amount of
@@ -16,17 +14,30 @@
16 * chipset: reserve a page before VGA to prevent PCI prefetch 14 * chipset: reserve a page before VGA to prevent PCI prefetch
17 * into it (errata #56). Usually the page is reserved anyways, 15 * into it (errata #56). Usually the page is reserved anyways,
18 * unless you have no PS/2 mouse plugged in. 16 * unless you have no PS/2 mouse plugged in.
17 *
18 * This functions is deliberately very conservative. Losing
19 * memory in the bottom megabyte is rarely a problem, as long
20 * as we have enough memory to install the trampoline. Using
21 * memory that is in use by the BIOS or by some DMA device
22 * the BIOS didn't shut down *is* a big problem.
19 */ 23 */
24
25#define BIOS_LOWMEM_KILOBYTES 0x413
26#define LOWMEM_CAP 0x9f000U /* Absolute maximum */
27#define INSANE_CUTOFF 0x20000U /* Less than this = insane */
28
20void __init reserve_ebda_region(void) 29void __init reserve_ebda_region(void)
21{ 30{
22 unsigned int lowmem, ebda_addr; 31 unsigned int lowmem, ebda_addr;
23 32
24 /* To determine the position of the EBDA and the */ 33 /*
25 /* end of conventional memory, we need to look at */ 34 * To determine the position of the EBDA and the
26 /* the BIOS data area. In a paravirtual environment */ 35 * end of conventional memory, we need to look at
27 /* that area is absent. We'll just have to assume */ 36 * the BIOS data area. In a paravirtual environment
28 /* that the paravirt case can handle memory setup */ 37 * that area is absent. We'll just have to assume
29 /* correctly, without our help. */ 38 * that the paravirt case can handle memory setup
39 * correctly, without our help.
40 */
30 if (paravirt_enabled()) 41 if (paravirt_enabled())
31 return; 42 return;
32 43
@@ -37,19 +48,23 @@ void __init reserve_ebda_region(void)
37 /* start of EBDA area */ 48 /* start of EBDA area */
38 ebda_addr = get_bios_ebda(); 49 ebda_addr = get_bios_ebda();
39 50
40 /* Fixup: bios puts an EBDA in the top 64K segment */ 51 /*
41 /* of conventional memory, but does not adjust lowmem. */ 52 * Note: some old Dells seem to need 4k EBDA without
42 if ((lowmem - ebda_addr) <= 0x10000) 53 * reporting so, so just consider the memory above 0x9f000
43 lowmem = ebda_addr; 54 * to be off limits (bugzilla 2990).
55 */
56
57 /* If the EBDA address is below 128K, assume it is bogus */
58 if (ebda_addr < INSANE_CUTOFF)
59 ebda_addr = LOWMEM_CAP;
44 60
45 /* Fixup: bios does not report an EBDA at all. */ 61 /* If lowmem is less than 128K, assume it is bogus */
46 /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ 62 if (lowmem < INSANE_CUTOFF)
47 if ((ebda_addr == 0) && (lowmem >= 0x9f000)) 63 lowmem = LOWMEM_CAP;
48 lowmem = 0x9f000;
49 64
50 /* Paranoia: should never happen, but... */ 65 /* Use the lower of the lowmem and EBDA markers as the cutoff */
51 if ((lowmem == 0) || (lowmem >= 0x100000)) 66 lowmem = min(lowmem, ebda_addr);
52 lowmem = 0x9f000; 67 lowmem = min(lowmem, LOWMEM_CAP); /* Absolute cap */
53 68
54 /* reserve all memory between lowmem and the 1MB mark */ 69 /* reserve all memory between lowmem and the 1MB mark */
55 memblock_reserve(lowmem, 0x100000 - lowmem); 70 memblock_reserve(lowmem, 0x100000 - lowmem);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b7de3b25adb5..6859e9626442 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -48,7 +48,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
48 .globl startup_64 48 .globl startup_64
49startup_64: 49startup_64:
50 /* 50 /*
51 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, 51 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
52 * and someone has loaded an identity mapped page table 52 * and someone has loaded an identity mapped page table
53 * for us. These identity mapped page tables map all of the 53 * for us. These identity mapped page tables map all of the
54 * kernel pages and possibly all of memory. 54 * kernel pages and possibly all of memory.
@@ -159,7 +159,7 @@ startup_64:
159 jmp 1f 159 jmp 1f
160ENTRY(secondary_startup_64) 160ENTRY(secondary_startup_64)
161 /* 161 /*
162 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, 162 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
163 * and someone has loaded a mapped page table. 163 * and someone has loaded a mapped page table.
164 * 164 *
165 * %rsi holds a physical pointer to real_mode_data. 165 * %rsi holds a physical pointer to real_mode_data.
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index e124554598ee..3f06e6149981 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -652,7 +652,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
652{ 652{
653 struct kretprobe_instance *ri = NULL; 653 struct kretprobe_instance *ri = NULL;
654 struct hlist_head *head, empty_rp; 654 struct hlist_head *head, empty_rp;
655 struct hlist_node *node, *tmp; 655 struct hlist_node *tmp;
656 unsigned long flags, orig_ret_address = 0; 656 unsigned long flags, orig_ret_address = 0;
657 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; 657 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
658 kprobe_opcode_t *correct_ret_addr = NULL; 658 kprobe_opcode_t *correct_ret_addr = NULL;
@@ -682,7 +682,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
682 * will be the real return address, and all the rest will 682 * will be the real return address, and all the rest will
683 * point to kretprobe_trampoline. 683 * point to kretprobe_trampoline.
684 */ 684 */
685 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { 685 hlist_for_each_entry_safe(ri, tmp, head, hlist) {
686 if (ri->task != current) 686 if (ri->task != current)
687 /* another task is sharing our hash bucket */ 687 /* another task is sharing our hash bucket */
688 continue; 688 continue;
@@ -701,7 +701,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
701 kretprobe_assert(ri, orig_ret_address, trampoline_address); 701 kretprobe_assert(ri, orig_ret_address, trampoline_address);
702 702
703 correct_ret_addr = ri->ret_addr; 703 correct_ret_addr = ri->ret_addr;
704 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { 704 hlist_for_each_entry_safe(ri, tmp, head, hlist) {
705 if (ri->task != current) 705 if (ri->task != current)
706 /* another task is sharing our hash bucket */ 706 /* another task is sharing our hash bucket */
707 continue; 707 continue;
@@ -728,7 +728,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
728 728
729 kretprobe_hash_unlock(current, &flags); 729 kretprobe_hash_unlock(current, &flags);
730 730
731 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 731 hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
732 hlist_del(&ri->hlist); 732 hlist_del(&ri->hlist);
733 kfree(ri); 733 kfree(ri);
734 } 734 }
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 9f966dc0b9e4..0732f0089a3d 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -218,6 +218,9 @@ static void kvm_shutdown(void)
218void __init kvmclock_init(void) 218void __init kvmclock_init(void)
219{ 219{
220 unsigned long mem; 220 unsigned long mem;
221 int size;
222
223 size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
221 224
222 if (!kvm_para_available()) 225 if (!kvm_para_available())
223 return; 226 return;
@@ -231,16 +234,14 @@ void __init kvmclock_init(void)
231 printk(KERN_INFO "kvm-clock: Using msrs %x and %x", 234 printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
232 msr_kvm_system_time, msr_kvm_wall_clock); 235 msr_kvm_system_time, msr_kvm_wall_clock);
233 236
234 mem = memblock_alloc(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS, 237 mem = memblock_alloc(size, PAGE_SIZE);
235 PAGE_SIZE);
236 if (!mem) 238 if (!mem)
237 return; 239 return;
238 hv_clock = __va(mem); 240 hv_clock = __va(mem);
239 241
240 if (kvm_register_clock("boot clock")) { 242 if (kvm_register_clock("boot clock")) {
241 hv_clock = NULL; 243 hv_clock = NULL;
242 memblock_free(mem, 244 memblock_free(mem, size);
243 sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
244 return; 245 return;
245 } 246 }
246 pv_time_ops.sched_clock = kvm_clock_read; 247 pv_time_ops.sched_clock = kvm_clock_read;
@@ -275,7 +276,7 @@ int __init kvm_setup_vsyscall_timeinfo(void)
275 struct pvclock_vcpu_time_info *vcpu_time; 276 struct pvclock_vcpu_time_info *vcpu_time;
276 unsigned int size; 277 unsigned int size;
277 278
278 size = sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS; 279 size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
279 280
280 preempt_disable(); 281 preempt_disable();
281 cpu = smp_processor_id(); 282 cpu = smp_processor_id();
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index f84f5c57de35..60308053fdb2 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -509,3 +509,4 @@ void local_touch_nmi(void)
509{ 509{
510 __this_cpu_write(last_nmi_rip, 0); 510 __this_cpu_write(last_nmi_rip, 0);
511} 511}
512EXPORT_SYMBOL_GPL(local_touch_nmi);
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 85c39590c1a4..2cb9470ea85b 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -185,7 +185,7 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
185 185
186 for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { 186 for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
187 __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, 187 __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
188 __pa_symbol(i) + (idx*PAGE_SIZE), 188 __pa(i) + (idx*PAGE_SIZE),
189 PAGE_KERNEL_VVAR); 189 PAGE_KERNEL_VVAR);
190 } 190 }
191 191
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9c857f05cef0..e89acdf6b77b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1196,8 +1196,7 @@ void __init setup_arch(char **cmdline_p)
1196 * mismatched firmware/kernel archtectures since there is no 1196 * mismatched firmware/kernel archtectures since there is no
1197 * support for runtime services. 1197 * support for runtime services.
1198 */ 1198 */
1199 if (efi_enabled(EFI_BOOT) && 1199 if (efi_enabled(EFI_BOOT) && !efi_is_native()) {
1200 IS_ENABLED(CONFIG_X86_64) != efi_enabled(EFI_64BIT)) {
1201 pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); 1200 pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
1202 efi_unmap_memmap(); 1201 efi_unmap_memmap();
1203 } 1202 }
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a27e76371108..a335cc6cde72 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -24,6 +24,7 @@
24#include "kvm_cache_regs.h" 24#include "kvm_cache_regs.h"
25#include <linux/module.h> 25#include <linux/module.h>
26#include <asm/kvm_emulate.h> 26#include <asm/kvm_emulate.h>
27#include <linux/stringify.h>
27 28
28#include "x86.h" 29#include "x86.h"
29#include "tss.h" 30#include "tss.h"
@@ -43,7 +44,7 @@
43#define OpCL 9ull /* CL register (for shifts) */ 44#define OpCL 9ull /* CL register (for shifts) */
44#define OpImmByte 10ull /* 8-bit sign extended immediate */ 45#define OpImmByte 10ull /* 8-bit sign extended immediate */
45#define OpOne 11ull /* Implied 1 */ 46#define OpOne 11ull /* Implied 1 */
46#define OpImm 12ull /* Sign extended immediate */ 47#define OpImm 12ull /* Sign extended up to 32-bit immediate */
47#define OpMem16 13ull /* Memory operand (16-bit). */ 48#define OpMem16 13ull /* Memory operand (16-bit). */
48#define OpMem32 14ull /* Memory operand (32-bit). */ 49#define OpMem32 14ull /* Memory operand (32-bit). */
49#define OpImmU 15ull /* Immediate operand, zero extended */ 50#define OpImmU 15ull /* Immediate operand, zero extended */
@@ -58,6 +59,7 @@
58#define OpFS 24ull /* FS */ 59#define OpFS 24ull /* FS */
59#define OpGS 25ull /* GS */ 60#define OpGS 25ull /* GS */
60#define OpMem8 26ull /* 8-bit zero extended memory operand */ 61#define OpMem8 26ull /* 8-bit zero extended memory operand */
62#define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
61 63
62#define OpBits 5 /* Width of operand field */ 64#define OpBits 5 /* Width of operand field */
63#define OpMask ((1ull << OpBits) - 1) 65#define OpMask ((1ull << OpBits) - 1)
@@ -101,6 +103,7 @@
101#define SrcMemFAddr (OpMemFAddr << SrcShift) 103#define SrcMemFAddr (OpMemFAddr << SrcShift)
102#define SrcAcc (OpAcc << SrcShift) 104#define SrcAcc (OpAcc << SrcShift)
103#define SrcImmU16 (OpImmU16 << SrcShift) 105#define SrcImmU16 (OpImmU16 << SrcShift)
106#define SrcImm64 (OpImm64 << SrcShift)
104#define SrcDX (OpDX << SrcShift) 107#define SrcDX (OpDX << SrcShift)
105#define SrcMem8 (OpMem8 << SrcShift) 108#define SrcMem8 (OpMem8 << SrcShift)
106#define SrcMask (OpMask << SrcShift) 109#define SrcMask (OpMask << SrcShift)
@@ -113,6 +116,7 @@
113#define GroupDual (2<<15) /* Alternate decoding of mod == 3 */ 116#define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
114#define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ 117#define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
115#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ 118#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
119#define Escape (5<<15) /* Escape to coprocessor instruction */
116#define Sse (1<<18) /* SSE Vector instruction */ 120#define Sse (1<<18) /* SSE Vector instruction */
117/* Generic ModRM decode. */ 121/* Generic ModRM decode. */
118#define ModRM (1<<19) 122#define ModRM (1<<19)
@@ -146,6 +150,8 @@
146#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ 150#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
147#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ 151#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
148#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ 152#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
153#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
154#define NoWrite ((u64)1 << 45) /* No writeback */
149 155
150#define X2(x...) x, x 156#define X2(x...) x, x
151#define X3(x...) X2(x), x 157#define X3(x...) X2(x), x
@@ -156,6 +162,27 @@
156#define X8(x...) X4(x), X4(x) 162#define X8(x...) X4(x), X4(x)
157#define X16(x...) X8(x), X8(x) 163#define X16(x...) X8(x), X8(x)
158 164
165#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
166#define FASTOP_SIZE 8
167
168/*
169 * fastop functions have a special calling convention:
170 *
171 * dst: [rdx]:rax (in/out)
172 * src: rbx (in/out)
173 * src2: rcx (in)
174 * flags: rflags (in/out)
175 *
176 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
177 * different operand sizes can be reached by calculation, rather than a jump
178 * table (which would be bigger than the code).
179 *
180 * fastop functions are declared as taking a never-defined fastop parameter,
181 * so they can't be called from C directly.
182 */
183
184struct fastop;
185
159struct opcode { 186struct opcode {
160 u64 flags : 56; 187 u64 flags : 56;
161 u64 intercept : 8; 188 u64 intercept : 8;
@@ -164,6 +191,8 @@ struct opcode {
164 const struct opcode *group; 191 const struct opcode *group;
165 const struct group_dual *gdual; 192 const struct group_dual *gdual;
166 const struct gprefix *gprefix; 193 const struct gprefix *gprefix;
194 const struct escape *esc;
195 void (*fastop)(struct fastop *fake);
167 } u; 196 } u;
168 int (*check_perm)(struct x86_emulate_ctxt *ctxt); 197 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
169}; 198};
@@ -180,6 +209,11 @@ struct gprefix {
180 struct opcode pfx_f3; 209 struct opcode pfx_f3;
181}; 210};
182 211
212struct escape {
213 struct opcode op[8];
214 struct opcode high[64];
215};
216
183/* EFLAGS bit definitions. */ 217/* EFLAGS bit definitions. */
184#define EFLG_ID (1<<21) 218#define EFLG_ID (1<<21)
185#define EFLG_VIP (1<<20) 219#define EFLG_VIP (1<<20)
@@ -407,6 +441,97 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
407 } \ 441 } \
408 } while (0) 442 } while (0)
409 443
444static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
445
446#define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t"
447#define FOP_RET "ret \n\t"
448
449#define FOP_START(op) \
450 extern void em_##op(struct fastop *fake); \
451 asm(".pushsection .text, \"ax\" \n\t" \
452 ".global em_" #op " \n\t" \
453 FOP_ALIGN \
454 "em_" #op ": \n\t"
455
456#define FOP_END \
457 ".popsection")
458
459#define FOPNOP() FOP_ALIGN FOP_RET
460
461#define FOP1E(op, dst) \
462 FOP_ALIGN #op " %" #dst " \n\t" FOP_RET
463
464#define FASTOP1(op) \
465 FOP_START(op) \
466 FOP1E(op##b, al) \
467 FOP1E(op##w, ax) \
468 FOP1E(op##l, eax) \
469 ON64(FOP1E(op##q, rax)) \
470 FOP_END
471
472#define FOP2E(op, dst, src) \
473 FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET
474
475#define FASTOP2(op) \
476 FOP_START(op) \
477 FOP2E(op##b, al, bl) \
478 FOP2E(op##w, ax, bx) \
479 FOP2E(op##l, eax, ebx) \
480 ON64(FOP2E(op##q, rax, rbx)) \
481 FOP_END
482
483/* 2 operand, word only */
484#define FASTOP2W(op) \
485 FOP_START(op) \
486 FOPNOP() \
487 FOP2E(op##w, ax, bx) \
488 FOP2E(op##l, eax, ebx) \
489 ON64(FOP2E(op##q, rax, rbx)) \
490 FOP_END
491
492/* 2 operand, src is CL */
493#define FASTOP2CL(op) \
494 FOP_START(op) \
495 FOP2E(op##b, al, cl) \
496 FOP2E(op##w, ax, cl) \
497 FOP2E(op##l, eax, cl) \
498 ON64(FOP2E(op##q, rax, cl)) \
499 FOP_END
500
501#define FOP3E(op, dst, src, src2) \
502 FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
503
504/* 3-operand, word-only, src2=cl */
505#define FASTOP3WCL(op) \
506 FOP_START(op) \
507 FOPNOP() \
508 FOP3E(op##w, ax, bx, cl) \
509 FOP3E(op##l, eax, ebx, cl) \
510 ON64(FOP3E(op##q, rax, rbx, cl)) \
511 FOP_END
512
513/* Special case for SETcc - 1 instruction per cc */
514#define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t"
515
516FOP_START(setcc)
517FOP_SETCC(seto)
518FOP_SETCC(setno)
519FOP_SETCC(setc)
520FOP_SETCC(setnc)
521FOP_SETCC(setz)
522FOP_SETCC(setnz)
523FOP_SETCC(setbe)
524FOP_SETCC(setnbe)
525FOP_SETCC(sets)
526FOP_SETCC(setns)
527FOP_SETCC(setp)
528FOP_SETCC(setnp)
529FOP_SETCC(setl)
530FOP_SETCC(setnl)
531FOP_SETCC(setle)
532FOP_SETCC(setnle)
533FOP_END;
534
410#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ 535#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \
411 do { \ 536 do { \
412 unsigned long _tmp; \ 537 unsigned long _tmp; \
@@ -663,7 +788,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
663 ulong la; 788 ulong la;
664 u32 lim; 789 u32 lim;
665 u16 sel; 790 u16 sel;
666 unsigned cpl, rpl; 791 unsigned cpl;
667 792
668 la = seg_base(ctxt, addr.seg) + addr.ea; 793 la = seg_base(ctxt, addr.seg) + addr.ea;
669 switch (ctxt->mode) { 794 switch (ctxt->mode) {
@@ -697,11 +822,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
697 goto bad; 822 goto bad;
698 } 823 }
699 cpl = ctxt->ops->cpl(ctxt); 824 cpl = ctxt->ops->cpl(ctxt);
700 if (ctxt->mode == X86EMUL_MODE_REAL)
701 rpl = 0;
702 else
703 rpl = sel & 3;
704 cpl = max(cpl, rpl);
705 if (!(desc.type & 8)) { 825 if (!(desc.type & 8)) {
706 /* data segment */ 826 /* data segment */
707 if (cpl > desc.dpl) 827 if (cpl > desc.dpl)
@@ -852,39 +972,50 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
852 return rc; 972 return rc;
853} 973}
854 974
855static int test_cc(unsigned int condition, unsigned int flags) 975FASTOP2(add);
856{ 976FASTOP2(or);
857 int rc = 0; 977FASTOP2(adc);
858 978FASTOP2(sbb);
859 switch ((condition & 15) >> 1) { 979FASTOP2(and);
860 case 0: /* o */ 980FASTOP2(sub);
861 rc |= (flags & EFLG_OF); 981FASTOP2(xor);
862 break; 982FASTOP2(cmp);
863 case 1: /* b/c/nae */ 983FASTOP2(test);
864 rc |= (flags & EFLG_CF); 984
865 break; 985FASTOP3WCL(shld);
866 case 2: /* z/e */ 986FASTOP3WCL(shrd);
867 rc |= (flags & EFLG_ZF); 987
868 break; 988FASTOP2W(imul);
869 case 3: /* be/na */ 989
870 rc |= (flags & (EFLG_CF|EFLG_ZF)); 990FASTOP1(not);
871 break; 991FASTOP1(neg);
872 case 4: /* s */ 992FASTOP1(inc);
873 rc |= (flags & EFLG_SF); 993FASTOP1(dec);
874 break; 994
875 case 5: /* p/pe */ 995FASTOP2CL(rol);
876 rc |= (flags & EFLG_PF); 996FASTOP2CL(ror);
877 break; 997FASTOP2CL(rcl);
878 case 7: /* le/ng */ 998FASTOP2CL(rcr);
879 rc |= (flags & EFLG_ZF); 999FASTOP2CL(shl);
880 /* fall through */ 1000FASTOP2CL(shr);
881 case 6: /* l/nge */ 1001FASTOP2CL(sar);
882 rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF)); 1002
883 break; 1003FASTOP2W(bsf);
884 } 1004FASTOP2W(bsr);
885 1005FASTOP2W(bt);
886 /* Odd condition identifiers (lsb == 1) have inverted sense. */ 1006FASTOP2W(bts);
887 return (!!rc ^ (condition & 1)); 1007FASTOP2W(btr);
1008FASTOP2W(btc);
1009
1010static u8 test_cc(unsigned int condition, unsigned long flags)
1011{
1012 u8 rc;
1013 void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
1014
1015 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1016 asm("push %[flags]; popf; call *%[fastop]"
1017 : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
1018 return rc;
888} 1019}
889 1020
890static void fetch_register_operand(struct operand *op) 1021static void fetch_register_operand(struct operand *op)
@@ -994,6 +1125,53 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
994 ctxt->ops->put_fpu(ctxt); 1125 ctxt->ops->put_fpu(ctxt);
995} 1126}
996 1127
1128static int em_fninit(struct x86_emulate_ctxt *ctxt)
1129{
1130 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1131 return emulate_nm(ctxt);
1132
1133 ctxt->ops->get_fpu(ctxt);
1134 asm volatile("fninit");
1135 ctxt->ops->put_fpu(ctxt);
1136 return X86EMUL_CONTINUE;
1137}
1138
1139static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1140{
1141 u16 fcw;
1142
1143 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1144 return emulate_nm(ctxt);
1145
1146 ctxt->ops->get_fpu(ctxt);
1147 asm volatile("fnstcw %0": "+m"(fcw));
1148 ctxt->ops->put_fpu(ctxt);
1149
1150 /* force 2 byte destination */
1151 ctxt->dst.bytes = 2;
1152 ctxt->dst.val = fcw;
1153
1154 return X86EMUL_CONTINUE;
1155}
1156
1157static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1158{
1159 u16 fsw;
1160
1161 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1162 return emulate_nm(ctxt);
1163
1164 ctxt->ops->get_fpu(ctxt);
1165 asm volatile("fnstsw %0": "+m"(fsw));
1166 ctxt->ops->put_fpu(ctxt);
1167
1168 /* force 2 byte destination */
1169 ctxt->dst.bytes = 2;
1170 ctxt->dst.val = fsw;
1171
1172 return X86EMUL_CONTINUE;
1173}
1174
997static void decode_register_operand(struct x86_emulate_ctxt *ctxt, 1175static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
998 struct operand *op) 1176 struct operand *op)
999{ 1177{
@@ -1534,6 +1712,9 @@ static int writeback(struct x86_emulate_ctxt *ctxt)
1534{ 1712{
1535 int rc; 1713 int rc;
1536 1714
1715 if (ctxt->d & NoWrite)
1716 return X86EMUL_CONTINUE;
1717
1537 switch (ctxt->dst.type) { 1718 switch (ctxt->dst.type) {
1538 case OP_REG: 1719 case OP_REG:
1539 write_register_operand(&ctxt->dst); 1720 write_register_operand(&ctxt->dst);
@@ -1918,47 +2099,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
1918 return X86EMUL_CONTINUE; 2099 return X86EMUL_CONTINUE;
1919} 2100}
1920 2101
1921static int em_grp2(struct x86_emulate_ctxt *ctxt)
1922{
1923 switch (ctxt->modrm_reg) {
1924 case 0: /* rol */
1925 emulate_2op_SrcB(ctxt, "rol");
1926 break;
1927 case 1: /* ror */
1928 emulate_2op_SrcB(ctxt, "ror");
1929 break;
1930 case 2: /* rcl */
1931 emulate_2op_SrcB(ctxt, "rcl");
1932 break;
1933 case 3: /* rcr */
1934 emulate_2op_SrcB(ctxt, "rcr");
1935 break;
1936 case 4: /* sal/shl */
1937 case 6: /* sal/shl */
1938 emulate_2op_SrcB(ctxt, "sal");
1939 break;
1940 case 5: /* shr */
1941 emulate_2op_SrcB(ctxt, "shr");
1942 break;
1943 case 7: /* sar */
1944 emulate_2op_SrcB(ctxt, "sar");
1945 break;
1946 }
1947 return X86EMUL_CONTINUE;
1948}
1949
1950static int em_not(struct x86_emulate_ctxt *ctxt)
1951{
1952 ctxt->dst.val = ~ctxt->dst.val;
1953 return X86EMUL_CONTINUE;
1954}
1955
1956static int em_neg(struct x86_emulate_ctxt *ctxt)
1957{
1958 emulate_1op(ctxt, "neg");
1959 return X86EMUL_CONTINUE;
1960}
1961
1962static int em_mul_ex(struct x86_emulate_ctxt *ctxt) 2102static int em_mul_ex(struct x86_emulate_ctxt *ctxt)
1963{ 2103{
1964 u8 ex = 0; 2104 u8 ex = 0;
@@ -2000,12 +2140,6 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
2000 int rc = X86EMUL_CONTINUE; 2140 int rc = X86EMUL_CONTINUE;
2001 2141
2002 switch (ctxt->modrm_reg) { 2142 switch (ctxt->modrm_reg) {
2003 case 0: /* inc */
2004 emulate_1op(ctxt, "inc");
2005 break;
2006 case 1: /* dec */
2007 emulate_1op(ctxt, "dec");
2008 break;
2009 case 2: /* call near abs */ { 2143 case 2: /* call near abs */ {
2010 long int old_eip; 2144 long int old_eip;
2011 old_eip = ctxt->_eip; 2145 old_eip = ctxt->_eip;
@@ -2075,7 +2209,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2075 /* Save real source value, then compare EAX against destination. */ 2209 /* Save real source value, then compare EAX against destination. */
2076 ctxt->src.orig_val = ctxt->src.val; 2210 ctxt->src.orig_val = ctxt->src.val;
2077 ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); 2211 ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX);
2078 emulate_2op_SrcV(ctxt, "cmp"); 2212 fastop(ctxt, em_cmp);
2079 2213
2080 if (ctxt->eflags & EFLG_ZF) { 2214 if (ctxt->eflags & EFLG_ZF) {
2081 /* Success: write back to memory. */ 2215 /* Success: write back to memory. */
@@ -2843,7 +2977,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt)
2843 ctxt->src.type = OP_IMM; 2977 ctxt->src.type = OP_IMM;
2844 ctxt->src.val = 0; 2978 ctxt->src.val = 0;
2845 ctxt->src.bytes = 1; 2979 ctxt->src.bytes = 1;
2846 emulate_2op_SrcV(ctxt, "or"); 2980 fastop(ctxt, em_or);
2847 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); 2981 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
2848 if (cf) 2982 if (cf)
2849 ctxt->eflags |= X86_EFLAGS_CF; 2983 ctxt->eflags |= X86_EFLAGS_CF;
@@ -2852,6 +2986,24 @@ static int em_das(struct x86_emulate_ctxt *ctxt)
2852 return X86EMUL_CONTINUE; 2986 return X86EMUL_CONTINUE;
2853} 2987}
2854 2988
2989static int em_aad(struct x86_emulate_ctxt *ctxt)
2990{
2991 u8 al = ctxt->dst.val & 0xff;
2992 u8 ah = (ctxt->dst.val >> 8) & 0xff;
2993
2994 al = (al + (ah * ctxt->src.val)) & 0xff;
2995
2996 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
2997
2998 /* Set PF, ZF, SF */
2999 ctxt->src.type = OP_IMM;
3000 ctxt->src.val = 0;
3001 ctxt->src.bytes = 1;
3002 fastop(ctxt, em_or);
3003
3004 return X86EMUL_CONTINUE;
3005}
3006
2855static int em_call(struct x86_emulate_ctxt *ctxt) 3007static int em_call(struct x86_emulate_ctxt *ctxt)
2856{ 3008{
2857 long rel = ctxt->src.val; 3009 long rel = ctxt->src.val;
@@ -2900,64 +3052,6 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
2900 return X86EMUL_CONTINUE; 3052 return X86EMUL_CONTINUE;
2901} 3053}
2902 3054
2903static int em_add(struct x86_emulate_ctxt *ctxt)
2904{
2905 emulate_2op_SrcV(ctxt, "add");
2906 return X86EMUL_CONTINUE;
2907}
2908
2909static int em_or(struct x86_emulate_ctxt *ctxt)
2910{
2911 emulate_2op_SrcV(ctxt, "or");
2912 return X86EMUL_CONTINUE;
2913}
2914
2915static int em_adc(struct x86_emulate_ctxt *ctxt)
2916{
2917 emulate_2op_SrcV(ctxt, "adc");
2918 return X86EMUL_CONTINUE;
2919}
2920
2921static int em_sbb(struct x86_emulate_ctxt *ctxt)
2922{
2923 emulate_2op_SrcV(ctxt, "sbb");
2924 return X86EMUL_CONTINUE;
2925}
2926
2927static int em_and(struct x86_emulate_ctxt *ctxt)
2928{
2929 emulate_2op_SrcV(ctxt, "and");
2930 return X86EMUL_CONTINUE;
2931}
2932
2933static int em_sub(struct x86_emulate_ctxt *ctxt)
2934{
2935 emulate_2op_SrcV(ctxt, "sub");
2936 return X86EMUL_CONTINUE;
2937}
2938
2939static int em_xor(struct x86_emulate_ctxt *ctxt)
2940{
2941 emulate_2op_SrcV(ctxt, "xor");
2942 return X86EMUL_CONTINUE;
2943}
2944
2945static int em_cmp(struct x86_emulate_ctxt *ctxt)
2946{
2947 emulate_2op_SrcV(ctxt, "cmp");
2948 /* Disable writeback. */
2949 ctxt->dst.type = OP_NONE;
2950 return X86EMUL_CONTINUE;
2951}
2952
2953static int em_test(struct x86_emulate_ctxt *ctxt)
2954{
2955 emulate_2op_SrcV(ctxt, "test");
2956 /* Disable writeback. */
2957 ctxt->dst.type = OP_NONE;
2958 return X86EMUL_CONTINUE;
2959}
2960
2961static int em_xchg(struct x86_emulate_ctxt *ctxt) 3055static int em_xchg(struct x86_emulate_ctxt *ctxt)
2962{ 3056{
2963 /* Write back the register source. */ 3057 /* Write back the register source. */
@@ -2970,16 +3064,10 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt)
2970 return X86EMUL_CONTINUE; 3064 return X86EMUL_CONTINUE;
2971} 3065}
2972 3066
2973static int em_imul(struct x86_emulate_ctxt *ctxt)
2974{
2975 emulate_2op_SrcV_nobyte(ctxt, "imul");
2976 return X86EMUL_CONTINUE;
2977}
2978
2979static int em_imul_3op(struct x86_emulate_ctxt *ctxt) 3067static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
2980{ 3068{
2981 ctxt->dst.val = ctxt->src2.val; 3069 ctxt->dst.val = ctxt->src2.val;
2982 return em_imul(ctxt); 3070 return fastop(ctxt, em_imul);
2983} 3071}
2984 3072
2985static int em_cwd(struct x86_emulate_ctxt *ctxt) 3073static int em_cwd(struct x86_emulate_ctxt *ctxt)
@@ -3300,47 +3388,6 @@ static int em_sti(struct x86_emulate_ctxt *ctxt)
3300 return X86EMUL_CONTINUE; 3388 return X86EMUL_CONTINUE;
3301} 3389}
3302 3390
3303static int em_bt(struct x86_emulate_ctxt *ctxt)
3304{
3305 /* Disable writeback. */
3306 ctxt->dst.type = OP_NONE;
3307 /* only subword offset */
3308 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
3309
3310 emulate_2op_SrcV_nobyte(ctxt, "bt");
3311 return X86EMUL_CONTINUE;
3312}
3313
3314static int em_bts(struct x86_emulate_ctxt *ctxt)
3315{
3316 emulate_2op_SrcV_nobyte(ctxt, "bts");
3317 return X86EMUL_CONTINUE;
3318}
3319
3320static int em_btr(struct x86_emulate_ctxt *ctxt)
3321{
3322 emulate_2op_SrcV_nobyte(ctxt, "btr");
3323 return X86EMUL_CONTINUE;
3324}
3325
3326static int em_btc(struct x86_emulate_ctxt *ctxt)
3327{
3328 emulate_2op_SrcV_nobyte(ctxt, "btc");
3329 return X86EMUL_CONTINUE;
3330}
3331
3332static int em_bsf(struct x86_emulate_ctxt *ctxt)
3333{
3334 emulate_2op_SrcV_nobyte(ctxt, "bsf");
3335 return X86EMUL_CONTINUE;
3336}
3337
3338static int em_bsr(struct x86_emulate_ctxt *ctxt)
3339{
3340 emulate_2op_SrcV_nobyte(ctxt, "bsr");
3341 return X86EMUL_CONTINUE;
3342}
3343
3344static int em_cpuid(struct x86_emulate_ctxt *ctxt) 3391static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3345{ 3392{
3346 u32 eax, ebx, ecx, edx; 3393 u32 eax, ebx, ecx, edx;
@@ -3572,7 +3619,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3572#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } 3619#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3573#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } 3620#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
3574#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } 3621#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
3622#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
3575#define I(_f, _e) { .flags = (_f), .u.execute = (_e) } 3623#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3624#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
3576#define II(_f, _e, _i) \ 3625#define II(_f, _e, _i) \
3577 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } 3626 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
3578#define IIP(_f, _e, _i, _p) \ 3627#define IIP(_f, _e, _i, _p) \
@@ -3583,12 +3632,13 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3583#define D2bv(_f) D((_f) | ByteOp), D(_f) 3632#define D2bv(_f) D((_f) | ByteOp), D(_f)
3584#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) 3633#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
3585#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) 3634#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
3635#define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
3586#define I2bvIP(_f, _e, _i, _p) \ 3636#define I2bvIP(_f, _e, _i, _p) \
3587 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) 3637 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
3588 3638
3589#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ 3639#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
3590 I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ 3640 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
3591 I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) 3641 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
3592 3642
3593static const struct opcode group7_rm1[] = { 3643static const struct opcode group7_rm1[] = {
3594 DI(SrcNone | Priv, monitor), 3644 DI(SrcNone | Priv, monitor),
@@ -3614,25 +3664,36 @@ static const struct opcode group7_rm7[] = {
3614}; 3664};
3615 3665
3616static const struct opcode group1[] = { 3666static const struct opcode group1[] = {
3617 I(Lock, em_add), 3667 F(Lock, em_add),
3618 I(Lock | PageTable, em_or), 3668 F(Lock | PageTable, em_or),
3619 I(Lock, em_adc), 3669 F(Lock, em_adc),
3620 I(Lock, em_sbb), 3670 F(Lock, em_sbb),
3621 I(Lock | PageTable, em_and), 3671 F(Lock | PageTable, em_and),
3622 I(Lock, em_sub), 3672 F(Lock, em_sub),
3623 I(Lock, em_xor), 3673 F(Lock, em_xor),
3624 I(0, em_cmp), 3674 F(NoWrite, em_cmp),
3625}; 3675};
3626 3676
3627static const struct opcode group1A[] = { 3677static const struct opcode group1A[] = {
3628 I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, 3678 I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,
3629}; 3679};
3630 3680
3681static const struct opcode group2[] = {
3682 F(DstMem | ModRM, em_rol),
3683 F(DstMem | ModRM, em_ror),
3684 F(DstMem | ModRM, em_rcl),
3685 F(DstMem | ModRM, em_rcr),
3686 F(DstMem | ModRM, em_shl),
3687 F(DstMem | ModRM, em_shr),
3688 F(DstMem | ModRM, em_shl),
3689 F(DstMem | ModRM, em_sar),
3690};
3691
3631static const struct opcode group3[] = { 3692static const struct opcode group3[] = {
3632 I(DstMem | SrcImm, em_test), 3693 F(DstMem | SrcImm | NoWrite, em_test),
3633 I(DstMem | SrcImm, em_test), 3694 F(DstMem | SrcImm | NoWrite, em_test),
3634 I(DstMem | SrcNone | Lock, em_not), 3695 F(DstMem | SrcNone | Lock, em_not),
3635 I(DstMem | SrcNone | Lock, em_neg), 3696 F(DstMem | SrcNone | Lock, em_neg),
3636 I(SrcMem, em_mul_ex), 3697 I(SrcMem, em_mul_ex),
3637 I(SrcMem, em_imul_ex), 3698 I(SrcMem, em_imul_ex),
3638 I(SrcMem, em_div_ex), 3699 I(SrcMem, em_div_ex),
@@ -3640,14 +3701,14 @@ static const struct opcode group3[] = {
3640}; 3701};
3641 3702
3642static const struct opcode group4[] = { 3703static const struct opcode group4[] = {
3643 I(ByteOp | DstMem | SrcNone | Lock, em_grp45), 3704 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
3644 I(ByteOp | DstMem | SrcNone | Lock, em_grp45), 3705 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
3645 N, N, N, N, N, N, 3706 N, N, N, N, N, N,
3646}; 3707};
3647 3708
3648static const struct opcode group5[] = { 3709static const struct opcode group5[] = {
3649 I(DstMem | SrcNone | Lock, em_grp45), 3710 F(DstMem | SrcNone | Lock, em_inc),
3650 I(DstMem | SrcNone | Lock, em_grp45), 3711 F(DstMem | SrcNone | Lock, em_dec),
3651 I(SrcMem | Stack, em_grp45), 3712 I(SrcMem | Stack, em_grp45),
3652 I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), 3713 I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
3653 I(SrcMem | Stack, em_grp45), 3714 I(SrcMem | Stack, em_grp45),
@@ -3682,10 +3743,10 @@ static const struct group_dual group7 = { {
3682 3743
3683static const struct opcode group8[] = { 3744static const struct opcode group8[] = {
3684 N, N, N, N, 3745 N, N, N, N,
3685 I(DstMem | SrcImmByte, em_bt), 3746 F(DstMem | SrcImmByte | NoWrite, em_bt),
3686 I(DstMem | SrcImmByte | Lock | PageTable, em_bts), 3747 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
3687 I(DstMem | SrcImmByte | Lock, em_btr), 3748 F(DstMem | SrcImmByte | Lock, em_btr),
3688 I(DstMem | SrcImmByte | Lock | PageTable, em_btc), 3749 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
3689}; 3750};
3690 3751
3691static const struct group_dual group9 = { { 3752static const struct group_dual group9 = { {
@@ -3707,33 +3768,96 @@ static const struct gprefix pfx_vmovntpx = {
3707 I(0, em_mov), N, N, N, 3768 I(0, em_mov), N, N, N,
3708}; 3769};
3709 3770
3771static const struct escape escape_d9 = { {
3772 N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
3773}, {
3774 /* 0xC0 - 0xC7 */
3775 N, N, N, N, N, N, N, N,
3776 /* 0xC8 - 0xCF */
3777 N, N, N, N, N, N, N, N,
3778 /* 0xD0 - 0xC7 */
3779 N, N, N, N, N, N, N, N,
3780 /* 0xD8 - 0xDF */
3781 N, N, N, N, N, N, N, N,
3782 /* 0xE0 - 0xE7 */
3783 N, N, N, N, N, N, N, N,
3784 /* 0xE8 - 0xEF */
3785 N, N, N, N, N, N, N, N,
3786 /* 0xF0 - 0xF7 */
3787 N, N, N, N, N, N, N, N,
3788 /* 0xF8 - 0xFF */
3789 N, N, N, N, N, N, N, N,
3790} };
3791
3792static const struct escape escape_db = { {
3793 N, N, N, N, N, N, N, N,
3794}, {
3795 /* 0xC0 - 0xC7 */
3796 N, N, N, N, N, N, N, N,
3797 /* 0xC8 - 0xCF */
3798 N, N, N, N, N, N, N, N,
3799 /* 0xD0 - 0xC7 */
3800 N, N, N, N, N, N, N, N,
3801 /* 0xD8 - 0xDF */
3802 N, N, N, N, N, N, N, N,
3803 /* 0xE0 - 0xE7 */
3804 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
3805 /* 0xE8 - 0xEF */
3806 N, N, N, N, N, N, N, N,
3807 /* 0xF0 - 0xF7 */
3808 N, N, N, N, N, N, N, N,
3809 /* 0xF8 - 0xFF */
3810 N, N, N, N, N, N, N, N,
3811} };
3812
3813static const struct escape escape_dd = { {
3814 N, N, N, N, N, N, N, I(DstMem, em_fnstsw),
3815}, {
3816 /* 0xC0 - 0xC7 */
3817 N, N, N, N, N, N, N, N,
3818 /* 0xC8 - 0xCF */
3819 N, N, N, N, N, N, N, N,
3820 /* 0xD0 - 0xC7 */
3821 N, N, N, N, N, N, N, N,
3822 /* 0xD8 - 0xDF */
3823 N, N, N, N, N, N, N, N,
3824 /* 0xE0 - 0xE7 */
3825 N, N, N, N, N, N, N, N,
3826 /* 0xE8 - 0xEF */
3827 N, N, N, N, N, N, N, N,
3828 /* 0xF0 - 0xF7 */
3829 N, N, N, N, N, N, N, N,
3830 /* 0xF8 - 0xFF */
3831 N, N, N, N, N, N, N, N,
3832} };
3833
3710static const struct opcode opcode_table[256] = { 3834static const struct opcode opcode_table[256] = {
3711 /* 0x00 - 0x07 */ 3835 /* 0x00 - 0x07 */
3712 I6ALU(Lock, em_add), 3836 F6ALU(Lock, em_add),
3713 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), 3837 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
3714 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), 3838 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
3715 /* 0x08 - 0x0F */ 3839 /* 0x08 - 0x0F */
3716 I6ALU(Lock | PageTable, em_or), 3840 F6ALU(Lock | PageTable, em_or),
3717 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), 3841 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
3718 N, 3842 N,
3719 /* 0x10 - 0x17 */ 3843 /* 0x10 - 0x17 */
3720 I6ALU(Lock, em_adc), 3844 F6ALU(Lock, em_adc),
3721 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg), 3845 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
3722 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg), 3846 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
3723 /* 0x18 - 0x1F */ 3847 /* 0x18 - 0x1F */
3724 I6ALU(Lock, em_sbb), 3848 F6ALU(Lock, em_sbb),
3725 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), 3849 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
3726 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), 3850 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
3727 /* 0x20 - 0x27 */ 3851 /* 0x20 - 0x27 */
3728 I6ALU(Lock | PageTable, em_and), N, N, 3852 F6ALU(Lock | PageTable, em_and), N, N,
3729 /* 0x28 - 0x2F */ 3853 /* 0x28 - 0x2F */
3730 I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), 3854 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
3731 /* 0x30 - 0x37 */ 3855 /* 0x30 - 0x37 */
3732 I6ALU(Lock, em_xor), N, N, 3856 F6ALU(Lock, em_xor), N, N,
3733 /* 0x38 - 0x3F */ 3857 /* 0x38 - 0x3F */
3734 I6ALU(0, em_cmp), N, N, 3858 F6ALU(NoWrite, em_cmp), N, N,
3735 /* 0x40 - 0x4F */ 3859 /* 0x40 - 0x4F */
3736 X16(D(DstReg)), 3860 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
3737 /* 0x50 - 0x57 */ 3861 /* 0x50 - 0x57 */
3738 X8(I(SrcReg | Stack, em_push)), 3862 X8(I(SrcReg | Stack, em_push)),
3739 /* 0x58 - 0x5F */ 3863 /* 0x58 - 0x5F */
@@ -3757,7 +3881,7 @@ static const struct opcode opcode_table[256] = {
3757 G(DstMem | SrcImm, group1), 3881 G(DstMem | SrcImm, group1),
3758 G(ByteOp | DstMem | SrcImm | No64, group1), 3882 G(ByteOp | DstMem | SrcImm | No64, group1),
3759 G(DstMem | SrcImmByte, group1), 3883 G(DstMem | SrcImmByte, group1),
3760 I2bv(DstMem | SrcReg | ModRM, em_test), 3884 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
3761 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), 3885 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
3762 /* 0x88 - 0x8F */ 3886 /* 0x88 - 0x8F */
3763 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), 3887 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
@@ -3777,18 +3901,18 @@ static const struct opcode opcode_table[256] = {
3777 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), 3901 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
3778 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), 3902 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
3779 I2bv(SrcSI | DstDI | Mov | String, em_mov), 3903 I2bv(SrcSI | DstDI | Mov | String, em_mov),
3780 I2bv(SrcSI | DstDI | String, em_cmp), 3904 F2bv(SrcSI | DstDI | String | NoWrite, em_cmp),
3781 /* 0xA8 - 0xAF */ 3905 /* 0xA8 - 0xAF */
3782 I2bv(DstAcc | SrcImm, em_test), 3906 F2bv(DstAcc | SrcImm | NoWrite, em_test),
3783 I2bv(SrcAcc | DstDI | Mov | String, em_mov), 3907 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
3784 I2bv(SrcSI | DstAcc | Mov | String, em_mov), 3908 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
3785 I2bv(SrcAcc | DstDI | String, em_cmp), 3909 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp),
3786 /* 0xB0 - 0xB7 */ 3910 /* 0xB0 - 0xB7 */
3787 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), 3911 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
3788 /* 0xB8 - 0xBF */ 3912 /* 0xB8 - 0xBF */
3789 X8(I(DstReg | SrcImm | Mov, em_mov)), 3913 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
3790 /* 0xC0 - 0xC7 */ 3914 /* 0xC0 - 0xC7 */
3791 D2bv(DstMem | SrcImmByte | ModRM), 3915 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
3792 I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), 3916 I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
3793 I(ImplicitOps | Stack, em_ret), 3917 I(ImplicitOps | Stack, em_ret),
3794 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), 3918 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
@@ -3800,10 +3924,11 @@ static const struct opcode opcode_table[256] = {
3800 D(ImplicitOps), DI(SrcImmByte, intn), 3924 D(ImplicitOps), DI(SrcImmByte, intn),
3801 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), 3925 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
3802 /* 0xD0 - 0xD7 */ 3926 /* 0xD0 - 0xD7 */
3803 D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), 3927 G(Src2One | ByteOp, group2), G(Src2One, group2),
3804 N, N, N, N, 3928 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
3929 N, I(DstAcc | SrcImmByte | No64, em_aad), N, N,
3805 /* 0xD8 - 0xDF */ 3930 /* 0xD8 - 0xDF */
3806 N, N, N, N, N, N, N, N, 3931 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
3807 /* 0xE0 - 0xE7 */ 3932 /* 0xE0 - 0xE7 */
3808 X3(I(SrcImmByte, em_loop)), 3933 X3(I(SrcImmByte, em_loop)),
3809 I(SrcImmByte, em_jcxz), 3934 I(SrcImmByte, em_jcxz),
@@ -3870,28 +3995,29 @@ static const struct opcode twobyte_table[256] = {
3870 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), 3995 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
3871 /* 0xA0 - 0xA7 */ 3996 /* 0xA0 - 0xA7 */
3872 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), 3997 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
3873 II(ImplicitOps, em_cpuid, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt), 3998 II(ImplicitOps, em_cpuid, cpuid),
3874 D(DstMem | SrcReg | Src2ImmByte | ModRM), 3999 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
3875 D(DstMem | SrcReg | Src2CL | ModRM), N, N, 4000 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4001 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
3876 /* 0xA8 - 0xAF */ 4002 /* 0xA8 - 0xAF */
3877 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), 4003 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
3878 DI(ImplicitOps, rsm), 4004 DI(ImplicitOps, rsm),
3879 I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), 4005 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
3880 D(DstMem | SrcReg | Src2ImmByte | ModRM), 4006 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
3881 D(DstMem | SrcReg | Src2CL | ModRM), 4007 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
3882 D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), 4008 D(ModRM), F(DstReg | SrcMem | ModRM, em_imul),
3883 /* 0xB0 - 0xB7 */ 4009 /* 0xB0 - 0xB7 */
3884 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), 4010 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
3885 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), 4011 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
3886 I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), 4012 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
3887 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), 4013 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
3888 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), 4014 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
3889 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), 4015 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
3890 /* 0xB8 - 0xBF */ 4016 /* 0xB8 - 0xBF */
3891 N, N, 4017 N, N,
3892 G(BitOp, group8), 4018 G(BitOp, group8),
3893 I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), 4019 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
3894 I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), 4020 F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
3895 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), 4021 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
3896 /* 0xC0 - 0xC7 */ 4022 /* 0xC0 - 0xC7 */
3897 D2bv(DstMem | SrcReg | ModRM | Lock), 4023 D2bv(DstMem | SrcReg | ModRM | Lock),
@@ -3950,6 +4076,9 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
3950 case 4: 4076 case 4:
3951 op->val = insn_fetch(s32, ctxt); 4077 op->val = insn_fetch(s32, ctxt);
3952 break; 4078 break;
4079 case 8:
4080 op->val = insn_fetch(s64, ctxt);
4081 break;
3953 } 4082 }
3954 if (!sign_extension) { 4083 if (!sign_extension) {
3955 switch (op->bytes) { 4084 switch (op->bytes) {
@@ -4028,6 +4157,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4028 case OpImm: 4157 case OpImm:
4029 rc = decode_imm(ctxt, op, imm_size(ctxt), true); 4158 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4030 break; 4159 break;
4160 case OpImm64:
4161 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4162 break;
4031 case OpMem8: 4163 case OpMem8:
4032 ctxt->memop.bytes = 1; 4164 ctxt->memop.bytes = 1;
4033 goto mem_common; 4165 goto mem_common;
@@ -4222,6 +4354,12 @@ done_prefixes:
4222 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; 4354 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
4223 } 4355 }
4224 break; 4356 break;
4357 case Escape:
4358 if (ctxt->modrm > 0xbf)
4359 opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
4360 else
4361 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
4362 break;
4225 default: 4363 default:
4226 return EMULATION_FAILED; 4364 return EMULATION_FAILED;
4227 } 4365 }
@@ -4354,6 +4492,16 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
4354 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); 4492 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
4355} 4493}
4356 4494
4495static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
4496{
4497 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
4498 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
4499 asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
4500 : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags)
4501 : "c"(ctxt->src2.val), [fastop]"S"(fop));
4502 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
4503 return X86EMUL_CONTINUE;
4504}
4357 4505
4358int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) 4506int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4359{ 4507{
@@ -4483,6 +4631,13 @@ special_insn:
4483 } 4631 }
4484 4632
4485 if (ctxt->execute) { 4633 if (ctxt->execute) {
4634 if (ctxt->d & Fastop) {
4635 void (*fop)(struct fastop *) = (void *)ctxt->execute;
4636 rc = fastop(ctxt, fop);
4637 if (rc != X86EMUL_CONTINUE)
4638 goto done;
4639 goto writeback;
4640 }
4486 rc = ctxt->execute(ctxt); 4641 rc = ctxt->execute(ctxt);
4487 if (rc != X86EMUL_CONTINUE) 4642 if (rc != X86EMUL_CONTINUE)
4488 goto done; 4643 goto done;
@@ -4493,12 +4648,6 @@ special_insn:
4493 goto twobyte_insn; 4648 goto twobyte_insn;
4494 4649
4495 switch (ctxt->b) { 4650 switch (ctxt->b) {
4496 case 0x40 ... 0x47: /* inc r16/r32 */
4497 emulate_1op(ctxt, "inc");
4498 break;
4499 case 0x48 ... 0x4f: /* dec r16/r32 */
4500 emulate_1op(ctxt, "dec");
4501 break;
4502 case 0x63: /* movsxd */ 4651 case 0x63: /* movsxd */
4503 if (ctxt->mode != X86EMUL_MODE_PROT64) 4652 if (ctxt->mode != X86EMUL_MODE_PROT64)
4504 goto cannot_emulate; 4653 goto cannot_emulate;
@@ -4523,9 +4672,6 @@ special_insn:
4523 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break; 4672 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
4524 } 4673 }
4525 break; 4674 break;
4526 case 0xc0 ... 0xc1:
4527 rc = em_grp2(ctxt);
4528 break;
4529 case 0xcc: /* int3 */ 4675 case 0xcc: /* int3 */
4530 rc = emulate_int(ctxt, 3); 4676 rc = emulate_int(ctxt, 3);
4531 break; 4677 break;
@@ -4536,13 +4682,6 @@ special_insn:
4536 if (ctxt->eflags & EFLG_OF) 4682 if (ctxt->eflags & EFLG_OF)
4537 rc = emulate_int(ctxt, 4); 4683 rc = emulate_int(ctxt, 4);
4538 break; 4684 break;
4539 case 0xd0 ... 0xd1: /* Grp2 */
4540 rc = em_grp2(ctxt);
4541 break;
4542 case 0xd2 ... 0xd3: /* Grp2 */
4543 ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX);
4544 rc = em_grp2(ctxt);
4545 break;
4546 case 0xe9: /* jmp rel */ 4685 case 0xe9: /* jmp rel */
4547 case 0xeb: /* jmp rel short */ 4686 case 0xeb: /* jmp rel short */
4548 jmp_rel(ctxt, ctxt->src.val); 4687 jmp_rel(ctxt, ctxt->src.val);
@@ -4661,14 +4800,6 @@ twobyte_insn:
4661 case 0x90 ... 0x9f: /* setcc r/m8 */ 4800 case 0x90 ... 0x9f: /* setcc r/m8 */
4662 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); 4801 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
4663 break; 4802 break;
4664 case 0xa4: /* shld imm8, r, r/m */
4665 case 0xa5: /* shld cl, r, r/m */
4666 emulate_2op_cl(ctxt, "shld");
4667 break;
4668 case 0xac: /* shrd imm8, r, r/m */
4669 case 0xad: /* shrd cl, r, r/m */
4670 emulate_2op_cl(ctxt, "shrd");
4671 break;
4672 case 0xae: /* clflush */ 4803 case 0xae: /* clflush */
4673 break; 4804 break;
4674 case 0xb6 ... 0xb7: /* movzx */ 4805 case 0xb6 ... 0xb7: /* movzx */
@@ -4682,7 +4813,7 @@ twobyte_insn:
4682 (s16) ctxt->src.val; 4813 (s16) ctxt->src.val;
4683 break; 4814 break;
4684 case 0xc0 ... 0xc1: /* xadd */ 4815 case 0xc0 ... 0xc1: /* xadd */
4685 emulate_2op_SrcV(ctxt, "add"); 4816 fastop(ctxt, em_add);
4686 /* Write back the register source. */ 4817 /* Write back the register source. */
4687 ctxt->src.val = ctxt->dst.orig_val; 4818 ctxt->src.val = ctxt->dst.orig_val;
4688 write_register_operand(&ctxt->src); 4819 write_register_operand(&ctxt->src);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 11300d2fa714..c1d30b2fc9bb 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -122,7 +122,6 @@ static s64 __kpit_elapsed(struct kvm *kvm)
122 */ 122 */
123 remaining = hrtimer_get_remaining(&ps->timer); 123 remaining = hrtimer_get_remaining(&ps->timer);
124 elapsed = ps->period - ktime_to_ns(remaining); 124 elapsed = ps->period - ktime_to_ns(remaining);
125 elapsed = mod_64(elapsed, ps->period);
126 125
127 return elapsed; 126 return elapsed;
128} 127}
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 848206df0967..cc31f7c06d3d 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -241,6 +241,8 @@ int kvm_pic_read_irq(struct kvm *kvm)
241 int irq, irq2, intno; 241 int irq, irq2, intno;
242 struct kvm_pic *s = pic_irqchip(kvm); 242 struct kvm_pic *s = pic_irqchip(kvm);
243 243
244 s->output = 0;
245
244 pic_lock(s); 246 pic_lock(s);
245 irq = pic_get_irq(&s->pics[0]); 247 irq = pic_get_irq(&s->pics[0]);
246 if (irq >= 0) { 248 if (irq >= 0) {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 7e06ba1618bd..484bc874688b 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -38,49 +38,81 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
38EXPORT_SYMBOL(kvm_cpu_has_pending_timer); 38EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
39 39
40/* 40/*
41 * check if there is pending interrupt from
42 * non-APIC source without intack.
43 */
44static int kvm_cpu_has_extint(struct kvm_vcpu *v)
45{
46 if (kvm_apic_accept_pic_intr(v))
47 return pic_irqchip(v->kvm)->output; /* PIC */
48 else
49 return 0;
50}
51
52/*
53 * check if there is injectable interrupt:
54 * when virtual interrupt delivery enabled,
55 * interrupt from apic will handled by hardware,
56 * we don't need to check it here.
57 */
58int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
59{
60 if (!irqchip_in_kernel(v->kvm))
61 return v->arch.interrupt.pending;
62
63 if (kvm_cpu_has_extint(v))
64 return 1;
65
66 if (kvm_apic_vid_enabled(v->kvm))
67 return 0;
68
69 return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
70}
71
72/*
41 * check if there is pending interrupt without 73 * check if there is pending interrupt without
42 * intack. 74 * intack.
43 */ 75 */
44int kvm_cpu_has_interrupt(struct kvm_vcpu *v) 76int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
45{ 77{
46 struct kvm_pic *s;
47
48 if (!irqchip_in_kernel(v->kvm)) 78 if (!irqchip_in_kernel(v->kvm))
49 return v->arch.interrupt.pending; 79 return v->arch.interrupt.pending;
50 80
51 if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ 81 if (kvm_cpu_has_extint(v))
52 if (kvm_apic_accept_pic_intr(v)) { 82 return 1;
53 s = pic_irqchip(v->kvm); /* PIC */ 83
54 return s->output; 84 return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
55 } else
56 return 0;
57 }
58 return 1;
59} 85}
60EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); 86EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
61 87
62/* 88/*
89 * Read pending interrupt(from non-APIC source)
90 * vector and intack.
91 */
92static int kvm_cpu_get_extint(struct kvm_vcpu *v)
93{
94 if (kvm_cpu_has_extint(v))
95 return kvm_pic_read_irq(v->kvm); /* PIC */
96 return -1;
97}
98
99/*
63 * Read pending interrupt vector and intack. 100 * Read pending interrupt vector and intack.
64 */ 101 */
65int kvm_cpu_get_interrupt(struct kvm_vcpu *v) 102int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
66{ 103{
67 struct kvm_pic *s;
68 int vector; 104 int vector;
69 105
70 if (!irqchip_in_kernel(v->kvm)) 106 if (!irqchip_in_kernel(v->kvm))
71 return v->arch.interrupt.nr; 107 return v->arch.interrupt.nr;
72 108
73 vector = kvm_get_apic_interrupt(v); /* APIC */ 109 vector = kvm_cpu_get_extint(v);
74 if (vector == -1) { 110
75 if (kvm_apic_accept_pic_intr(v)) { 111 if (kvm_apic_vid_enabled(v->kvm) || vector != -1)
76 s = pic_irqchip(v->kvm); 112 return vector; /* PIC */
77 s->output = 0; /* PIC */ 113
78 vector = kvm_pic_read_irq(v->kvm); 114 return kvm_get_apic_interrupt(v); /* APIC */
79 }
80 }
81 return vector;
82} 115}
83EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
84 116
85void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) 117void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
86{ 118{
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9392f527f107..02b51dd4e4ad 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -140,31 +140,56 @@ static inline int apic_enabled(struct kvm_lapic *apic)
140 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 140 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
141 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 141 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
142 142
143static inline int apic_x2apic_mode(struct kvm_lapic *apic)
144{
145 return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
146}
147
148static inline int kvm_apic_id(struct kvm_lapic *apic) 143static inline int kvm_apic_id(struct kvm_lapic *apic)
149{ 144{
150 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 145 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
151} 146}
152 147
153static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) 148void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
149 struct kvm_lapic_irq *irq,
150 u64 *eoi_exit_bitmap)
154{ 151{
155 u16 cid; 152 struct kvm_lapic **dst;
156 ldr >>= 32 - map->ldr_bits; 153 struct kvm_apic_map *map;
157 cid = (ldr >> map->cid_shift) & map->cid_mask; 154 unsigned long bitmap = 1;
155 int i;
158 156
159 BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); 157 rcu_read_lock();
158 map = rcu_dereference(vcpu->kvm->arch.apic_map);
160 159
161 return cid; 160 if (unlikely(!map)) {
162} 161 __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap);
162 goto out;
163 }
163 164
164static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) 165 if (irq->dest_mode == 0) { /* physical mode */
165{ 166 if (irq->delivery_mode == APIC_DM_LOWEST ||
166 ldr >>= (32 - map->ldr_bits); 167 irq->dest_id == 0xff) {
167 return ldr & map->lid_mask; 168 __set_bit(irq->vector,
169 (unsigned long *)eoi_exit_bitmap);
170 goto out;
171 }
172 dst = &map->phys_map[irq->dest_id & 0xff];
173 } else {
174 u32 mda = irq->dest_id << (32 - map->ldr_bits);
175
176 dst = map->logical_map[apic_cluster_id(map, mda)];
177
178 bitmap = apic_logical_id(map, mda);
179 }
180
181 for_each_set_bit(i, &bitmap, 16) {
182 if (!dst[i])
183 continue;
184 if (dst[i]->vcpu == vcpu) {
185 __set_bit(irq->vector,
186 (unsigned long *)eoi_exit_bitmap);
187 break;
188 }
189 }
190
191out:
192 rcu_read_unlock();
168} 193}
169 194
170static void recalculate_apic_map(struct kvm *kvm) 195static void recalculate_apic_map(struct kvm *kvm)
@@ -230,6 +255,8 @@ out:
230 255
231 if (old) 256 if (old)
232 kfree_rcu(old, rcu); 257 kfree_rcu(old, rcu);
258
259 kvm_ioapic_make_eoibitmap_request(kvm);
233} 260}
234 261
235static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) 262static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
@@ -345,6 +372,10 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
345{ 372{
346 int result; 373 int result;
347 374
375 /*
376 * Note that irr_pending is just a hint. It will be always
377 * true with virtual interrupt delivery enabled.
378 */
348 if (!apic->irr_pending) 379 if (!apic->irr_pending)
349 return -1; 380 return -1;
350 381
@@ -461,6 +492,8 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
461static inline int apic_find_highest_isr(struct kvm_lapic *apic) 492static inline int apic_find_highest_isr(struct kvm_lapic *apic)
462{ 493{
463 int result; 494 int result;
495
496 /* Note that isr_count is always 1 with vid enabled */
464 if (!apic->isr_count) 497 if (!apic->isr_count)
465 return -1; 498 return -1;
466 if (likely(apic->highest_isr_cache != -1)) 499 if (likely(apic->highest_isr_cache != -1))
@@ -740,6 +773,19 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
740 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 773 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
741} 774}
742 775
776static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
777{
778 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
779 kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
780 int trigger_mode;
781 if (apic_test_vector(vector, apic->regs + APIC_TMR))
782 trigger_mode = IOAPIC_LEVEL_TRIG;
783 else
784 trigger_mode = IOAPIC_EDGE_TRIG;
785 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
786 }
787}
788
743static int apic_set_eoi(struct kvm_lapic *apic) 789static int apic_set_eoi(struct kvm_lapic *apic)
744{ 790{
745 int vector = apic_find_highest_isr(apic); 791 int vector = apic_find_highest_isr(apic);
@@ -756,19 +802,26 @@ static int apic_set_eoi(struct kvm_lapic *apic)
756 apic_clear_isr(vector, apic); 802 apic_clear_isr(vector, apic);
757 apic_update_ppr(apic); 803 apic_update_ppr(apic);
758 804
759 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && 805 kvm_ioapic_send_eoi(apic, vector);
760 kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
761 int trigger_mode;
762 if (apic_test_vector(vector, apic->regs + APIC_TMR))
763 trigger_mode = IOAPIC_LEVEL_TRIG;
764 else
765 trigger_mode = IOAPIC_EDGE_TRIG;
766 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
767 }
768 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 806 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
769 return vector; 807 return vector;
770} 808}
771 809
810/*
811 * this interface assumes a trap-like exit, which has already finished
812 * desired side effect including vISR and vPPR update.
813 */
814void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
815{
816 struct kvm_lapic *apic = vcpu->arch.apic;
817
818 trace_kvm_eoi(apic, vector);
819
820 kvm_ioapic_send_eoi(apic, vector);
821 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
822}
823EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
824
772static void apic_send_ipi(struct kvm_lapic *apic) 825static void apic_send_ipi(struct kvm_lapic *apic)
773{ 826{
774 u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); 827 u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
@@ -1212,6 +1265,21 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
1212} 1265}
1213EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 1266EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
1214 1267
1268/* emulate APIC access in a trap manner */
1269void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
1270{
1271 u32 val = 0;
1272
1273 /* hw has done the conditional check and inst decode */
1274 offset &= 0xff0;
1275
1276 apic_reg_read(vcpu->arch.apic, offset, 4, &val);
1277
1278 /* TODO: optimize to just emulate side effect w/o one more write */
1279 apic_reg_write(vcpu->arch.apic, offset, val);
1280}
1281EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
1282
1215void kvm_free_lapic(struct kvm_vcpu *vcpu) 1283void kvm_free_lapic(struct kvm_vcpu *vcpu)
1216{ 1284{
1217 struct kvm_lapic *apic = vcpu->arch.apic; 1285 struct kvm_lapic *apic = vcpu->arch.apic;
@@ -1288,6 +1356,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
1288 1356
1289void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 1357void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
1290{ 1358{
1359 u64 old_value = vcpu->arch.apic_base;
1291 struct kvm_lapic *apic = vcpu->arch.apic; 1360 struct kvm_lapic *apic = vcpu->arch.apic;
1292 1361
1293 if (!apic) { 1362 if (!apic) {
@@ -1309,11 +1378,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
1309 value &= ~MSR_IA32_APICBASE_BSP; 1378 value &= ~MSR_IA32_APICBASE_BSP;
1310 1379
1311 vcpu->arch.apic_base = value; 1380 vcpu->arch.apic_base = value;
1312 if (apic_x2apic_mode(apic)) { 1381 if ((old_value ^ value) & X2APIC_ENABLE) {
1313 u32 id = kvm_apic_id(apic); 1382 if (value & X2APIC_ENABLE) {
1314 u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); 1383 u32 id = kvm_apic_id(apic);
1315 kvm_apic_set_ldr(apic, ldr); 1384 u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
1385 kvm_apic_set_ldr(apic, ldr);
1386 kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
1387 } else
1388 kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
1316 } 1389 }
1390
1317 apic->base_address = apic->vcpu->arch.apic_base & 1391 apic->base_address = apic->vcpu->arch.apic_base &
1318 MSR_IA32_APICBASE_BASE; 1392 MSR_IA32_APICBASE_BASE;
1319 1393
@@ -1359,8 +1433,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
1359 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 1433 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
1360 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 1434 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
1361 } 1435 }
1362 apic->irr_pending = false; 1436 apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm);
1363 apic->isr_count = 0; 1437 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm);
1364 apic->highest_isr_cache = -1; 1438 apic->highest_isr_cache = -1;
1365 update_divide_count(apic); 1439 update_divide_count(apic);
1366 atomic_set(&apic->lapic_timer.pending, 0); 1440 atomic_set(&apic->lapic_timer.pending, 0);
@@ -1575,8 +1649,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
1575 update_divide_count(apic); 1649 update_divide_count(apic);
1576 start_apic_timer(apic); 1650 start_apic_timer(apic);
1577 apic->irr_pending = true; 1651 apic->irr_pending = true;
1578 apic->isr_count = count_vectors(apic->regs + APIC_ISR); 1652 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
1653 1 : count_vectors(apic->regs + APIC_ISR);
1579 apic->highest_isr_cache = -1; 1654 apic->highest_isr_cache = -1;
1655 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
1580 kvm_make_request(KVM_REQ_EVENT, vcpu); 1656 kvm_make_request(KVM_REQ_EVENT, vcpu);
1581} 1657}
1582 1658
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index e5ebf9f3571f..1676d34ddb4e 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -64,6 +64,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
64u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); 64u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
65void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); 65void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
66 66
67void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
68void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector);
69
67void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); 70void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
68void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); 71void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
69void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); 72void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
@@ -124,4 +127,35 @@ static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
124 return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); 127 return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic);
125} 128}
126 129
130static inline int apic_x2apic_mode(struct kvm_lapic *apic)
131{
132 return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
133}
134
135static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
136{
137 return kvm_x86_ops->vm_has_apicv(kvm);
138}
139
140static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
141{
142 u16 cid;
143 ldr >>= 32 - map->ldr_bits;
144 cid = (ldr >> map->cid_shift) & map->cid_mask;
145
146 BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
147
148 return cid;
149}
150
151static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
152{
153 ldr >>= (32 - map->ldr_bits);
154 return ldr & map->lid_mask;
155}
156
157void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
158 struct kvm_lapic_irq *irq,
159 u64 *eoi_bitmap);
160
127#endif 161#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 01d7c2ad05f5..956ca358108a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -448,7 +448,8 @@ static bool __check_direct_spte_mmio_pf(u64 spte)
448 448
449static bool spte_is_locklessly_modifiable(u64 spte) 449static bool spte_is_locklessly_modifiable(u64 spte)
450{ 450{
451 return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)); 451 return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) ==
452 (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE);
452} 453}
453 454
454static bool spte_has_volatile_bits(u64 spte) 455static bool spte_has_volatile_bits(u64 spte)
@@ -831,8 +832,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
831 if (host_level == PT_PAGE_TABLE_LEVEL) 832 if (host_level == PT_PAGE_TABLE_LEVEL)
832 return host_level; 833 return host_level;
833 834
834 max_level = kvm_x86_ops->get_lpage_level() < host_level ? 835 max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
835 kvm_x86_ops->get_lpage_level() : host_level;
836 836
837 for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) 837 for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
838 if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) 838 if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
@@ -1142,7 +1142,7 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
1142} 1142}
1143 1143
1144static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, 1144static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
1145 int level, bool pt_protect) 1145 bool pt_protect)
1146{ 1146{
1147 u64 *sptep; 1147 u64 *sptep;
1148 struct rmap_iterator iter; 1148 struct rmap_iterator iter;
@@ -1180,7 +1180,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
1180 while (mask) { 1180 while (mask) {
1181 rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), 1181 rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
1182 PT_PAGE_TABLE_LEVEL, slot); 1182 PT_PAGE_TABLE_LEVEL, slot);
1183 __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false); 1183 __rmap_write_protect(kvm, rmapp, false);
1184 1184
1185 /* clear the first set bit */ 1185 /* clear the first set bit */
1186 mask &= mask - 1; 1186 mask &= mask - 1;
@@ -1199,7 +1199,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
1199 for (i = PT_PAGE_TABLE_LEVEL; 1199 for (i = PT_PAGE_TABLE_LEVEL;
1200 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 1200 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
1201 rmapp = __gfn_to_rmap(gfn, i, slot); 1201 rmapp = __gfn_to_rmap(gfn, i, slot);
1202 write_protected |= __rmap_write_protect(kvm, rmapp, i, true); 1202 write_protected |= __rmap_write_protect(kvm, rmapp, true);
1203 } 1203 }
1204 1204
1205 return write_protected; 1205 return write_protected;
@@ -1460,28 +1460,14 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
1460 percpu_counter_add(&kvm_total_used_mmu_pages, nr); 1460 percpu_counter_add(&kvm_total_used_mmu_pages, nr);
1461} 1461}
1462 1462
1463/* 1463static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
1464 * Remove the sp from shadow page cache, after call it,
1465 * we can not find this sp from the cache, and the shadow
1466 * page table is still valid.
1467 * It should be under the protection of mmu lock.
1468 */
1469static void kvm_mmu_isolate_page(struct kvm_mmu_page *sp)
1470{ 1464{
1471 ASSERT(is_empty_shadow_page(sp->spt)); 1465 ASSERT(is_empty_shadow_page(sp->spt));
1472 hlist_del(&sp->hash_link); 1466 hlist_del(&sp->hash_link);
1473 if (!sp->role.direct)
1474 free_page((unsigned long)sp->gfns);
1475}
1476
1477/*
1478 * Free the shadow page table and the sp, we can do it
1479 * out of the protection of mmu lock.
1480 */
1481static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
1482{
1483 list_del(&sp->link); 1467 list_del(&sp->link);
1484 free_page((unsigned long)sp->spt); 1468 free_page((unsigned long)sp->spt);
1469 if (!sp->role.direct)
1470 free_page((unsigned long)sp->gfns);
1485 kmem_cache_free(mmu_page_header_cache, sp); 1471 kmem_cache_free(mmu_page_header_cache, sp);
1486} 1472}
1487 1473
@@ -1522,7 +1508,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
1522 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); 1508 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
1523 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 1509 set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
1524 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 1510 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
1525 bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM);
1526 sp->parent_ptes = 0; 1511 sp->parent_ptes = 0;
1527 mmu_page_add_parent_pte(vcpu, sp, parent_pte); 1512 mmu_page_add_parent_pte(vcpu, sp, parent_pte);
1528 kvm_mod_used_mmu_pages(vcpu->kvm, +1); 1513 kvm_mod_used_mmu_pages(vcpu->kvm, +1);
@@ -1659,13 +1644,13 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
1659static void kvm_mmu_commit_zap_page(struct kvm *kvm, 1644static void kvm_mmu_commit_zap_page(struct kvm *kvm,
1660 struct list_head *invalid_list); 1645 struct list_head *invalid_list);
1661 1646
1662#define for_each_gfn_sp(kvm, sp, gfn, pos) \ 1647#define for_each_gfn_sp(kvm, sp, gfn) \
1663 hlist_for_each_entry(sp, pos, \ 1648 hlist_for_each_entry(sp, \
1664 &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ 1649 &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \
1665 if ((sp)->gfn != (gfn)) {} else 1650 if ((sp)->gfn != (gfn)) {} else
1666 1651
1667#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos) \ 1652#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn) \
1668 hlist_for_each_entry(sp, pos, \ 1653 hlist_for_each_entry(sp, \
1669 &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ 1654 &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \
1670 if ((sp)->gfn != (gfn) || (sp)->role.direct || \ 1655 if ((sp)->gfn != (gfn) || (sp)->role.direct || \
1671 (sp)->role.invalid) {} else 1656 (sp)->role.invalid) {} else
@@ -1721,11 +1706,10 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
1721static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) 1706static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
1722{ 1707{
1723 struct kvm_mmu_page *s; 1708 struct kvm_mmu_page *s;
1724 struct hlist_node *node;
1725 LIST_HEAD(invalid_list); 1709 LIST_HEAD(invalid_list);
1726 bool flush = false; 1710 bool flush = false;
1727 1711
1728 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { 1712 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
1729 if (!s->unsync) 1713 if (!s->unsync)
1730 continue; 1714 continue;
1731 1715
@@ -1863,7 +1847,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1863 union kvm_mmu_page_role role; 1847 union kvm_mmu_page_role role;
1864 unsigned quadrant; 1848 unsigned quadrant;
1865 struct kvm_mmu_page *sp; 1849 struct kvm_mmu_page *sp;
1866 struct hlist_node *node;
1867 bool need_sync = false; 1850 bool need_sync = false;
1868 1851
1869 role = vcpu->arch.mmu.base_role; 1852 role = vcpu->arch.mmu.base_role;
@@ -1878,7 +1861,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1878 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; 1861 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
1879 role.quadrant = quadrant; 1862 role.quadrant = quadrant;
1880 } 1863 }
1881 for_each_gfn_sp(vcpu->kvm, sp, gfn, node) { 1864 for_each_gfn_sp(vcpu->kvm, sp, gfn) {
1882 if (!need_sync && sp->unsync) 1865 if (!need_sync && sp->unsync)
1883 need_sync = true; 1866 need_sync = true;
1884 1867
@@ -1973,9 +1956,9 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
1973{ 1956{
1974 u64 spte; 1957 u64 spte;
1975 1958
1976 spte = __pa(sp->spt) 1959 spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
1977 | PT_PRESENT_MASK | PT_ACCESSED_MASK 1960 shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
1978 | PT_WRITABLE_MASK | PT_USER_MASK; 1961
1979 mmu_spte_set(sptep, spte); 1962 mmu_spte_set(sptep, spte);
1980} 1963}
1981 1964
@@ -2126,7 +2109,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
2126 do { 2109 do {
2127 sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); 2110 sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
2128 WARN_ON(!sp->role.invalid || sp->root_count); 2111 WARN_ON(!sp->role.invalid || sp->root_count);
2129 kvm_mmu_isolate_page(sp);
2130 kvm_mmu_free_page(sp); 2112 kvm_mmu_free_page(sp);
2131 } while (!list_empty(invalid_list)); 2113 } while (!list_empty(invalid_list));
2132} 2114}
@@ -2144,6 +2126,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
2144 * change the value 2126 * change the value
2145 */ 2127 */
2146 2128
2129 spin_lock(&kvm->mmu_lock);
2130
2147 if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { 2131 if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
2148 while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages && 2132 while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages &&
2149 !list_empty(&kvm->arch.active_mmu_pages)) { 2133 !list_empty(&kvm->arch.active_mmu_pages)) {
@@ -2158,19 +2142,20 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
2158 } 2142 }
2159 2143
2160 kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; 2144 kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
2145
2146 spin_unlock(&kvm->mmu_lock);
2161} 2147}
2162 2148
2163int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) 2149int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
2164{ 2150{
2165 struct kvm_mmu_page *sp; 2151 struct kvm_mmu_page *sp;
2166 struct hlist_node *node;
2167 LIST_HEAD(invalid_list); 2152 LIST_HEAD(invalid_list);
2168 int r; 2153 int r;
2169 2154
2170 pgprintk("%s: looking for gfn %llx\n", __func__, gfn); 2155 pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
2171 r = 0; 2156 r = 0;
2172 spin_lock(&kvm->mmu_lock); 2157 spin_lock(&kvm->mmu_lock);
2173 for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { 2158 for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
2174 pgprintk("%s: gfn %llx role %x\n", __func__, gfn, 2159 pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
2175 sp->role.word); 2160 sp->role.word);
2176 r = 1; 2161 r = 1;
@@ -2183,14 +2168,6 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
2183} 2168}
2184EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); 2169EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
2185 2170
2186static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
2187{
2188 int slot = memslot_id(kvm, gfn);
2189 struct kvm_mmu_page *sp = page_header(__pa(pte));
2190
2191 __set_bit(slot, sp->slot_bitmap);
2192}
2193
2194/* 2171/*
2195 * The function is based on mtrr_type_lookup() in 2172 * The function is based on mtrr_type_lookup() in
2196 * arch/x86/kernel/cpu/mtrr/generic.c 2173 * arch/x86/kernel/cpu/mtrr/generic.c
@@ -2308,9 +2285,8 @@ static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
2308static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) 2285static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
2309{ 2286{
2310 struct kvm_mmu_page *s; 2287 struct kvm_mmu_page *s;
2311 struct hlist_node *node;
2312 2288
2313 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { 2289 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
2314 if (s->unsync) 2290 if (s->unsync)
2315 continue; 2291 continue;
2316 WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); 2292 WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
@@ -2322,19 +2298,17 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
2322 bool can_unsync) 2298 bool can_unsync)
2323{ 2299{
2324 struct kvm_mmu_page *s; 2300 struct kvm_mmu_page *s;
2325 struct hlist_node *node;
2326 bool need_unsync = false; 2301 bool need_unsync = false;
2327 2302
2328 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { 2303 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
2329 if (!can_unsync) 2304 if (!can_unsync)
2330 return 1; 2305 return 1;
2331 2306
2332 if (s->role.level != PT_PAGE_TABLE_LEVEL) 2307 if (s->role.level != PT_PAGE_TABLE_LEVEL)
2333 return 1; 2308 return 1;
2334 2309
2335 if (!need_unsync && !s->unsync) { 2310 if (!s->unsync)
2336 need_unsync = true; 2311 need_unsync = true;
2337 }
2338 } 2312 }
2339 if (need_unsync) 2313 if (need_unsync)
2340 kvm_unsync_pages(vcpu, gfn); 2314 kvm_unsync_pages(vcpu, gfn);
@@ -2342,8 +2316,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
2342} 2316}
2343 2317
2344static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, 2318static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2345 unsigned pte_access, int user_fault, 2319 unsigned pte_access, int level,
2346 int write_fault, int level,
2347 gfn_t gfn, pfn_t pfn, bool speculative, 2320 gfn_t gfn, pfn_t pfn, bool speculative,
2348 bool can_unsync, bool host_writable) 2321 bool can_unsync, bool host_writable)
2349{ 2322{
@@ -2378,20 +2351,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2378 2351
2379 spte |= (u64)pfn << PAGE_SHIFT; 2352 spte |= (u64)pfn << PAGE_SHIFT;
2380 2353
2381 if ((pte_access & ACC_WRITE_MASK) 2354 if (pte_access & ACC_WRITE_MASK) {
2382 || (!vcpu->arch.mmu.direct_map && write_fault
2383 && !is_write_protection(vcpu) && !user_fault)) {
2384 2355
2385 /* 2356 /*
2386 * There are two cases: 2357 * Other vcpu creates new sp in the window between
2387 * - the one is other vcpu creates new sp in the window 2358 * mapping_level() and acquiring mmu-lock. We can
2388 * between mapping_level() and acquiring mmu-lock. 2359 * allow guest to retry the access, the mapping can
2389 * - the another case is the new sp is created by itself 2360 * be fixed if guest refault.
2390 * (page-fault path) when guest uses the target gfn as
2391 * its page table.
2392 * Both of these cases can be fixed by allowing guest to
2393 * retry the access, it will refault, then we can establish
2394 * the mapping by using small page.
2395 */ 2361 */
2396 if (level > PT_PAGE_TABLE_LEVEL && 2362 if (level > PT_PAGE_TABLE_LEVEL &&
2397 has_wrprotected_page(vcpu->kvm, gfn, level)) 2363 has_wrprotected_page(vcpu->kvm, gfn, level))
@@ -2399,19 +2365,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2399 2365
2400 spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; 2366 spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
2401 2367
2402 if (!vcpu->arch.mmu.direct_map
2403 && !(pte_access & ACC_WRITE_MASK)) {
2404 spte &= ~PT_USER_MASK;
2405 /*
2406 * If we converted a user page to a kernel page,
2407 * so that the kernel can write to it when cr0.wp=0,
2408 * then we should prevent the kernel from executing it
2409 * if SMEP is enabled.
2410 */
2411 if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
2412 spte |= PT64_NX_MASK;
2413 }
2414
2415 /* 2368 /*
2416 * Optimization: for pte sync, if spte was writable the hash 2369 * Optimization: for pte sync, if spte was writable the hash
2417 * lookup is unnecessary (and expensive). Write protection 2370 * lookup is unnecessary (and expensive). Write protection
@@ -2441,19 +2394,15 @@ done:
2441} 2394}
2442 2395
2443static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, 2396static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2444 unsigned pt_access, unsigned pte_access, 2397 unsigned pte_access, int write_fault, int *emulate,
2445 int user_fault, int write_fault, 2398 int level, gfn_t gfn, pfn_t pfn, bool speculative,
2446 int *emulate, int level, gfn_t gfn,
2447 pfn_t pfn, bool speculative,
2448 bool host_writable) 2399 bool host_writable)
2449{ 2400{
2450 int was_rmapped = 0; 2401 int was_rmapped = 0;
2451 int rmap_count; 2402 int rmap_count;
2452 2403
2453 pgprintk("%s: spte %llx access %x write_fault %d" 2404 pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
2454 " user_fault %d gfn %llx\n", 2405 *sptep, write_fault, gfn);
2455 __func__, *sptep, pt_access,
2456 write_fault, user_fault, gfn);
2457 2406
2458 if (is_rmap_spte(*sptep)) { 2407 if (is_rmap_spte(*sptep)) {
2459 /* 2408 /*
@@ -2477,9 +2426,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2477 was_rmapped = 1; 2426 was_rmapped = 1;
2478 } 2427 }
2479 2428
2480 if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, 2429 if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
2481 level, gfn, pfn, speculative, true, 2430 true, host_writable)) {
2482 host_writable)) {
2483 if (write_fault) 2431 if (write_fault)
2484 *emulate = 1; 2432 *emulate = 1;
2485 kvm_mmu_flush_tlb(vcpu); 2433 kvm_mmu_flush_tlb(vcpu);
@@ -2497,7 +2445,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2497 ++vcpu->kvm->stat.lpages; 2445 ++vcpu->kvm->stat.lpages;
2498 2446
2499 if (is_shadow_present_pte(*sptep)) { 2447 if (is_shadow_present_pte(*sptep)) {
2500 page_header_update_slot(vcpu->kvm, sptep, gfn);
2501 if (!was_rmapped) { 2448 if (!was_rmapped) {
2502 rmap_count = rmap_add(vcpu, sptep, gfn); 2449 rmap_count = rmap_add(vcpu, sptep, gfn);
2503 if (rmap_count > RMAP_RECYCLE_THRESHOLD) 2450 if (rmap_count > RMAP_RECYCLE_THRESHOLD)
@@ -2571,10 +2518,9 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
2571 return -1; 2518 return -1;
2572 2519
2573 for (i = 0; i < ret; i++, gfn++, start++) 2520 for (i = 0; i < ret; i++, gfn++, start++)
2574 mmu_set_spte(vcpu, start, ACC_ALL, 2521 mmu_set_spte(vcpu, start, access, 0, NULL,
2575 access, 0, 0, NULL, 2522 sp->role.level, gfn, page_to_pfn(pages[i]),
2576 sp->role.level, gfn, 2523 true, true);
2577 page_to_pfn(pages[i]), true, true);
2578 2524
2579 return 0; 2525 return 0;
2580} 2526}
@@ -2633,11 +2579,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2633 2579
2634 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { 2580 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
2635 if (iterator.level == level) { 2581 if (iterator.level == level) {
2636 unsigned pte_access = ACC_ALL; 2582 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
2637 2583 write, &emulate, level, gfn, pfn,
2638 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access, 2584 prefault, map_writable);
2639 0, write, &emulate,
2640 level, gfn, pfn, prefault, map_writable);
2641 direct_pte_prefetch(vcpu, iterator.sptep); 2585 direct_pte_prefetch(vcpu, iterator.sptep);
2642 ++vcpu->stat.pf_fixed; 2586 ++vcpu->stat.pf_fixed;
2643 break; 2587 break;
@@ -2652,11 +2596,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2652 iterator.level - 1, 2596 iterator.level - 1,
2653 1, ACC_ALL, iterator.sptep); 2597 1, ACC_ALL, iterator.sptep);
2654 2598
2655 mmu_spte_set(iterator.sptep, 2599 link_shadow_page(iterator.sptep, sp);
2656 __pa(sp->spt)
2657 | PT_PRESENT_MASK | PT_WRITABLE_MASK
2658 | shadow_user_mask | shadow_x_mask
2659 | shadow_accessed_mask);
2660 } 2600 }
2661 } 2601 }
2662 return emulate; 2602 return emulate;
@@ -3719,6 +3659,7 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
3719 else 3659 else
3720 r = paging32_init_context(vcpu, context); 3660 r = paging32_init_context(vcpu, context);
3721 3661
3662 vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
3722 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); 3663 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
3723 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); 3664 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
3724 vcpu->arch.mmu.base_role.smep_andnot_wp 3665 vcpu->arch.mmu.base_role.smep_andnot_wp
@@ -3885,7 +3826,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
3885 /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ 3826 /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
3886 *gpa &= ~(gpa_t)7; 3827 *gpa &= ~(gpa_t)7;
3887 *bytes = 8; 3828 *bytes = 8;
3888 r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8)); 3829 r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, 8);
3889 if (r) 3830 if (r)
3890 gentry = 0; 3831 gentry = 0;
3891 new = (const u8 *)&gentry; 3832 new = (const u8 *)&gentry;
@@ -3987,7 +3928,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3987 gfn_t gfn = gpa >> PAGE_SHIFT; 3928 gfn_t gfn = gpa >> PAGE_SHIFT;
3988 union kvm_mmu_page_role mask = { .word = 0 }; 3929 union kvm_mmu_page_role mask = { .word = 0 };
3989 struct kvm_mmu_page *sp; 3930 struct kvm_mmu_page *sp;
3990 struct hlist_node *node;
3991 LIST_HEAD(invalid_list); 3931 LIST_HEAD(invalid_list);
3992 u64 entry, gentry, *spte; 3932 u64 entry, gentry, *spte;
3993 int npte; 3933 int npte;
@@ -4018,7 +3958,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
4018 kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); 3958 kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
4019 3959
4020 mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; 3960 mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
4021 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { 3961 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
4022 if (detect_write_misaligned(sp, gpa, bytes) || 3962 if (detect_write_misaligned(sp, gpa, bytes) ||
4023 detect_write_flooding(sp)) { 3963 detect_write_flooding(sp)) {
4024 zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, 3964 zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
@@ -4039,7 +3979,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
4039 !((sp->role.word ^ vcpu->arch.mmu.base_role.word) 3979 !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
4040 & mask.word) && rmap_can_add(vcpu)) 3980 & mask.word) && rmap_can_add(vcpu))
4041 mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); 3981 mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
4042 if (!remote_flush && need_remote_flush(entry, *spte)) 3982 if (need_remote_flush(entry, *spte))
4043 remote_flush = true; 3983 remote_flush = true;
4044 ++spte; 3984 ++spte;
4045 } 3985 }
@@ -4198,26 +4138,36 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu)
4198 4138
4199void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) 4139void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
4200{ 4140{
4201 struct kvm_mmu_page *sp; 4141 struct kvm_memory_slot *memslot;
4202 bool flush = false; 4142 gfn_t last_gfn;
4143 int i;
4203 4144
4204 list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { 4145 memslot = id_to_memslot(kvm->memslots, slot);
4205 int i; 4146 last_gfn = memslot->base_gfn + memslot->npages - 1;
4206 u64 *pt;
4207 4147
4208 if (!test_bit(slot, sp->slot_bitmap)) 4148 spin_lock(&kvm->mmu_lock);
4209 continue;
4210 4149
4211 pt = sp->spt; 4150 for (i = PT_PAGE_TABLE_LEVEL;
4212 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { 4151 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
4213 if (!is_shadow_present_pte(pt[i]) || 4152 unsigned long *rmapp;
4214 !is_last_spte(pt[i], sp->role.level)) 4153 unsigned long last_index, index;
4215 continue;
4216 4154
4217 spte_write_protect(kvm, &pt[i], &flush, false); 4155 rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL];
4156 last_index = gfn_to_index(last_gfn, memslot->base_gfn, i);
4157
4158 for (index = 0; index <= last_index; ++index, ++rmapp) {
4159 if (*rmapp)
4160 __rmap_write_protect(kvm, rmapp, false);
4161
4162 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
4163 kvm_flush_remote_tlbs(kvm);
4164 cond_resched_lock(&kvm->mmu_lock);
4165 }
4218 } 4166 }
4219 } 4167 }
4168
4220 kvm_flush_remote_tlbs(kvm); 4169 kvm_flush_remote_tlbs(kvm);
4170 spin_unlock(&kvm->mmu_lock);
4221} 4171}
4222 4172
4223void kvm_mmu_zap_all(struct kvm *kvm) 4173void kvm_mmu_zap_all(struct kvm *kvm)
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index cd6e98333ba3..b8f6172f4174 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -195,12 +195,6 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page,
195 TP_ARGS(sp) 195 TP_ARGS(sp)
196); 196);
197 197
198DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_delay_free_pages,
199 TP_PROTO(struct kvm_mmu_page *sp),
200
201 TP_ARGS(sp)
202);
203
204TRACE_EVENT( 198TRACE_EVENT(
205 mark_mmio_spte, 199 mark_mmio_spte,
206 TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access), 200 TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access),
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 891eb6d93b8b..105dd5bd550e 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -151,7 +151,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
151 pt_element_t pte; 151 pt_element_t pte;
152 pt_element_t __user *uninitialized_var(ptep_user); 152 pt_element_t __user *uninitialized_var(ptep_user);
153 gfn_t table_gfn; 153 gfn_t table_gfn;
154 unsigned index, pt_access, pte_access, accessed_dirty, shift; 154 unsigned index, pt_access, pte_access, accessed_dirty;
155 gpa_t pte_gpa; 155 gpa_t pte_gpa;
156 int offset; 156 int offset;
157 const int write_fault = access & PFERR_WRITE_MASK; 157 const int write_fault = access & PFERR_WRITE_MASK;
@@ -249,16 +249,12 @@ retry_walk:
249 249
250 if (!write_fault) 250 if (!write_fault)
251 protect_clean_gpte(&pte_access, pte); 251 protect_clean_gpte(&pte_access, pte);
252 252 else
253 /* 253 /*
254 * On a write fault, fold the dirty bit into accessed_dirty by shifting it one 254 * On a write fault, fold the dirty bit into accessed_dirty by
255 * place right. 255 * shifting it one place right.
256 * 256 */
257 * On a read fault, do nothing. 257 accessed_dirty &= pte >> (PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT);
258 */
259 shift = write_fault >> ilog2(PFERR_WRITE_MASK);
260 shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT;
261 accessed_dirty &= pte >> shift;
262 258
263 if (unlikely(!accessed_dirty)) { 259 if (unlikely(!accessed_dirty)) {
264 ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); 260 ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
@@ -330,8 +326,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
330 * we call mmu_set_spte() with host_writable = true because 326 * we call mmu_set_spte() with host_writable = true because
331 * pte_prefetch_gfn_to_pfn always gets a writable pfn. 327 * pte_prefetch_gfn_to_pfn always gets a writable pfn.
332 */ 328 */
333 mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, 329 mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL,
334 NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true); 330 gfn, pfn, true, true);
335 331
336 return true; 332 return true;
337} 333}
@@ -405,7 +401,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
405 */ 401 */
406static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, 402static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
407 struct guest_walker *gw, 403 struct guest_walker *gw,
408 int user_fault, int write_fault, int hlevel, 404 int write_fault, int hlevel,
409 pfn_t pfn, bool map_writable, bool prefault) 405 pfn_t pfn, bool map_writable, bool prefault)
410{ 406{
411 struct kvm_mmu_page *sp = NULL; 407 struct kvm_mmu_page *sp = NULL;
@@ -413,9 +409,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
413 unsigned direct_access, access = gw->pt_access; 409 unsigned direct_access, access = gw->pt_access;
414 int top_level, emulate = 0; 410 int top_level, emulate = 0;
415 411
416 if (!is_present_gpte(gw->ptes[gw->level - 1]))
417 return 0;
418
419 direct_access = gw->pte_access; 412 direct_access = gw->pte_access;
420 413
421 top_level = vcpu->arch.mmu.root_level; 414 top_level = vcpu->arch.mmu.root_level;
@@ -477,9 +470,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
477 } 470 }
478 471
479 clear_sp_write_flooding_count(it.sptep); 472 clear_sp_write_flooding_count(it.sptep);
480 mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, 473 mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate,
481 user_fault, write_fault, &emulate, it.level, 474 it.level, gw->gfn, pfn, prefault, map_writable);
482 gw->gfn, pfn, prefault, map_writable);
483 FNAME(pte_prefetch)(vcpu, gw, it.sptep); 475 FNAME(pte_prefetch)(vcpu, gw, it.sptep);
484 476
485 return emulate; 477 return emulate;
@@ -491,6 +483,46 @@ out_gpte_changed:
491 return 0; 483 return 0;
492} 484}
493 485
486 /*
487 * To see whether the mapped gfn can write its page table in the current
488 * mapping.
489 *
490 * It is the helper function of FNAME(page_fault). When guest uses large page
491 * size to map the writable gfn which is used as current page table, we should
492 * force kvm to use small page size to map it because new shadow page will be
493 * created when kvm establishes shadow page table that stop kvm using large
494 * page size. Do it early can avoid unnecessary #PF and emulation.
495 *
496 * @write_fault_to_shadow_pgtable will return true if the fault gfn is
497 * currently used as its page table.
498 *
499 * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok
500 * since the PDPT is always shadowed, that means, we can not use large page
501 * size to map the gfn which is used as PDPT.
502 */
503static bool
504FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
505 struct guest_walker *walker, int user_fault,
506 bool *write_fault_to_shadow_pgtable)
507{
508 int level;
509 gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1);
510 bool self_changed = false;
511
512 if (!(walker->pte_access & ACC_WRITE_MASK ||
513 (!is_write_protection(vcpu) && !user_fault)))
514 return false;
515
516 for (level = walker->level; level <= walker->max_level; level++) {
517 gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1];
518
519 self_changed |= !(gfn & mask);
520 *write_fault_to_shadow_pgtable |= !gfn;
521 }
522
523 return self_changed;
524}
525
494/* 526/*
495 * Page fault handler. There are several causes for a page fault: 527 * Page fault handler. There are several causes for a page fault:
496 * - there is no shadow pte for the guest pte 528 * - there is no shadow pte for the guest pte
@@ -516,7 +548,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
516 int level = PT_PAGE_TABLE_LEVEL; 548 int level = PT_PAGE_TABLE_LEVEL;
517 int force_pt_level; 549 int force_pt_level;
518 unsigned long mmu_seq; 550 unsigned long mmu_seq;
519 bool map_writable; 551 bool map_writable, is_self_change_mapping;
520 552
521 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); 553 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
522 554
@@ -544,8 +576,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
544 return 0; 576 return 0;
545 } 577 }
546 578
579 vcpu->arch.write_fault_to_shadow_pgtable = false;
580
581 is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
582 &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);
583
547 if (walker.level >= PT_DIRECTORY_LEVEL) 584 if (walker.level >= PT_DIRECTORY_LEVEL)
548 force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn); 585 force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn)
586 || is_self_change_mapping;
549 else 587 else
550 force_pt_level = 1; 588 force_pt_level = 1;
551 if (!force_pt_level) { 589 if (!force_pt_level) {
@@ -564,6 +602,26 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
564 walker.gfn, pfn, walker.pte_access, &r)) 602 walker.gfn, pfn, walker.pte_access, &r))
565 return r; 603 return r;
566 604
605 /*
606 * Do not change pte_access if the pfn is a mmio page, otherwise
607 * we will cache the incorrect access into mmio spte.
608 */
609 if (write_fault && !(walker.pte_access & ACC_WRITE_MASK) &&
610 !is_write_protection(vcpu) && !user_fault &&
611 !is_noslot_pfn(pfn)) {
612 walker.pte_access |= ACC_WRITE_MASK;
613 walker.pte_access &= ~ACC_USER_MASK;
614
615 /*
616 * If we converted a user page to a kernel page,
617 * so that the kernel can write to it when cr0.wp=0,
618 * then we should prevent the kernel from executing it
619 * if SMEP is enabled.
620 */
621 if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
622 walker.pte_access &= ~ACC_EXEC_MASK;
623 }
624
567 spin_lock(&vcpu->kvm->mmu_lock); 625 spin_lock(&vcpu->kvm->mmu_lock);
568 if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) 626 if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
569 goto out_unlock; 627 goto out_unlock;
@@ -572,7 +630,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
572 kvm_mmu_free_some_pages(vcpu); 630 kvm_mmu_free_some_pages(vcpu);
573 if (!force_pt_level) 631 if (!force_pt_level)
574 transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); 632 transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
575 r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 633 r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
576 level, pfn, map_writable, prefault); 634 level, pfn, map_writable, prefault);
577 ++vcpu->stat.pf_fixed; 635 ++vcpu->stat.pf_fixed;
578 kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); 636 kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
@@ -747,7 +805,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
747 805
748 host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; 806 host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
749 807
750 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, 808 set_spte(vcpu, &sp->spt[i], pte_access,
751 PT_PAGE_TABLE_LEVEL, gfn, 809 PT_PAGE_TABLE_LEVEL, gfn,
752 spte_to_pfn(sp->spt[i]), true, false, 810 spte_to_pfn(sp->spt[i]), true, false,
753 host_writable); 811 host_writable);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d29d3cd1c156..e1b1ce21bc00 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3571,6 +3571,26 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3571 set_cr_intercept(svm, INTERCEPT_CR8_WRITE); 3571 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3572} 3572}
3573 3573
3574static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
3575{
3576 return;
3577}
3578
3579static int svm_vm_has_apicv(struct kvm *kvm)
3580{
3581 return 0;
3582}
3583
3584static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
3585{
3586 return;
3587}
3588
3589static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
3590{
3591 return;
3592}
3593
3574static int svm_nmi_allowed(struct kvm_vcpu *vcpu) 3594static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
3575{ 3595{
3576 struct vcpu_svm *svm = to_svm(vcpu); 3596 struct vcpu_svm *svm = to_svm(vcpu);
@@ -4290,6 +4310,10 @@ static struct kvm_x86_ops svm_x86_ops = {
4290 .enable_nmi_window = enable_nmi_window, 4310 .enable_nmi_window = enable_nmi_window,
4291 .enable_irq_window = enable_irq_window, 4311 .enable_irq_window = enable_irq_window,
4292 .update_cr8_intercept = update_cr8_intercept, 4312 .update_cr8_intercept = update_cr8_intercept,
4313 .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
4314 .vm_has_apicv = svm_vm_has_apicv,
4315 .load_eoi_exitmap = svm_load_eoi_exitmap,
4316 .hwapic_isr_update = svm_hwapic_isr_update,
4293 4317
4294 .set_tss_addr = svm_set_tss_addr, 4318 .set_tss_addr = svm_set_tss_addr,
4295 .get_tdp_level = get_npt_level, 4319 .get_tdp_level = get_npt_level,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9120ae1901e4..6667042714cc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,6 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO);
84static bool __read_mostly fasteoi = 1; 84static bool __read_mostly fasteoi = 1;
85module_param(fasteoi, bool, S_IRUGO); 85module_param(fasteoi, bool, S_IRUGO);
86 86
87static bool __read_mostly enable_apicv_reg_vid;
88
87/* 89/*
88 * If nested=1, nested virtualization is supported, i.e., guests may use 90 * If nested=1, nested virtualization is supported, i.e., guests may use
89 * VMX and be a hypervisor for its own guests. If nested=0, guests may not 91 * VMX and be a hypervisor for its own guests. If nested=0, guests may not
@@ -92,12 +94,8 @@ module_param(fasteoi, bool, S_IRUGO);
92static bool __read_mostly nested = 0; 94static bool __read_mostly nested = 0;
93module_param(nested, bool, S_IRUGO); 95module_param(nested, bool, S_IRUGO);
94 96
95#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ 97#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
96 (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) 98#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
97#define KVM_GUEST_CR0_MASK \
98 (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
99#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
100 (X86_CR0_WP | X86_CR0_NE)
101#define KVM_VM_CR0_ALWAYS_ON \ 99#define KVM_VM_CR0_ALWAYS_ON \
102 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) 100 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
103#define KVM_CR4_GUEST_OWNED_BITS \ 101#define KVM_CR4_GUEST_OWNED_BITS \
@@ -624,6 +622,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
624 struct kvm_segment *var, int seg); 622 struct kvm_segment *var, int seg);
625static void vmx_get_segment(struct kvm_vcpu *vcpu, 623static void vmx_get_segment(struct kvm_vcpu *vcpu,
626 struct kvm_segment *var, int seg); 624 struct kvm_segment *var, int seg);
625static bool guest_state_valid(struct kvm_vcpu *vcpu);
626static u32 vmx_segment_access_rights(struct kvm_segment *var);
627 627
628static DEFINE_PER_CPU(struct vmcs *, vmxarea); 628static DEFINE_PER_CPU(struct vmcs *, vmxarea);
629static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 629static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -638,6 +638,8 @@ static unsigned long *vmx_io_bitmap_a;
638static unsigned long *vmx_io_bitmap_b; 638static unsigned long *vmx_io_bitmap_b;
639static unsigned long *vmx_msr_bitmap_legacy; 639static unsigned long *vmx_msr_bitmap_legacy;
640static unsigned long *vmx_msr_bitmap_longmode; 640static unsigned long *vmx_msr_bitmap_longmode;
641static unsigned long *vmx_msr_bitmap_legacy_x2apic;
642static unsigned long *vmx_msr_bitmap_longmode_x2apic;
641 643
642static bool cpu_has_load_ia32_efer; 644static bool cpu_has_load_ia32_efer;
643static bool cpu_has_load_perf_global_ctrl; 645static bool cpu_has_load_perf_global_ctrl;
@@ -762,6 +764,24 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
762 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 764 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
763} 765}
764 766
767static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
768{
769 return vmcs_config.cpu_based_2nd_exec_ctrl &
770 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
771}
772
773static inline bool cpu_has_vmx_apic_register_virt(void)
774{
775 return vmcs_config.cpu_based_2nd_exec_ctrl &
776 SECONDARY_EXEC_APIC_REGISTER_VIRT;
777}
778
779static inline bool cpu_has_vmx_virtual_intr_delivery(void)
780{
781 return vmcs_config.cpu_based_2nd_exec_ctrl &
782 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
783}
784
765static inline bool cpu_has_vmx_flexpriority(void) 785static inline bool cpu_has_vmx_flexpriority(void)
766{ 786{
767 return cpu_has_vmx_tpr_shadow() && 787 return cpu_has_vmx_tpr_shadow() &&
@@ -1694,7 +1714,6 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
1694static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 1714static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1695{ 1715{
1696 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); 1716 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
1697 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
1698 to_vmx(vcpu)->rflags = rflags; 1717 to_vmx(vcpu)->rflags = rflags;
1699 if (to_vmx(vcpu)->rmode.vm86_active) { 1718 if (to_vmx(vcpu)->rmode.vm86_active) {
1700 to_vmx(vcpu)->rmode.save_rflags = rflags; 1719 to_vmx(vcpu)->rmode.save_rflags = rflags;
@@ -1820,6 +1839,25 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
1820 vmx->guest_msrs[from] = tmp; 1839 vmx->guest_msrs[from] = tmp;
1821} 1840}
1822 1841
1842static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
1843{
1844 unsigned long *msr_bitmap;
1845
1846 if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
1847 if (is_long_mode(vcpu))
1848 msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
1849 else
1850 msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
1851 } else {
1852 if (is_long_mode(vcpu))
1853 msr_bitmap = vmx_msr_bitmap_longmode;
1854 else
1855 msr_bitmap = vmx_msr_bitmap_legacy;
1856 }
1857
1858 vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
1859}
1860
1823/* 1861/*
1824 * Set up the vmcs to automatically save and restore system 1862 * Set up the vmcs to automatically save and restore system
1825 * msrs. Don't touch the 64-bit msrs if the guest is in legacy 1863 * msrs. Don't touch the 64-bit msrs if the guest is in legacy
@@ -1828,7 +1866,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
1828static void setup_msrs(struct vcpu_vmx *vmx) 1866static void setup_msrs(struct vcpu_vmx *vmx)
1829{ 1867{
1830 int save_nmsrs, index; 1868 int save_nmsrs, index;
1831 unsigned long *msr_bitmap;
1832 1869
1833 save_nmsrs = 0; 1870 save_nmsrs = 0;
1834#ifdef CONFIG_X86_64 1871#ifdef CONFIG_X86_64
@@ -1860,14 +1897,8 @@ static void setup_msrs(struct vcpu_vmx *vmx)
1860 1897
1861 vmx->save_nmsrs = save_nmsrs; 1898 vmx->save_nmsrs = save_nmsrs;
1862 1899
1863 if (cpu_has_vmx_msr_bitmap()) { 1900 if (cpu_has_vmx_msr_bitmap())
1864 if (is_long_mode(&vmx->vcpu)) 1901 vmx_set_msr_bitmap(&vmx->vcpu);
1865 msr_bitmap = vmx_msr_bitmap_longmode;
1866 else
1867 msr_bitmap = vmx_msr_bitmap_legacy;
1868
1869 vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
1870 }
1871} 1902}
1872 1903
1873/* 1904/*
@@ -2533,13 +2564,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2533 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { 2564 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
2534 min2 = 0; 2565 min2 = 0;
2535 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 2566 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2567 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
2536 SECONDARY_EXEC_WBINVD_EXITING | 2568 SECONDARY_EXEC_WBINVD_EXITING |
2537 SECONDARY_EXEC_ENABLE_VPID | 2569 SECONDARY_EXEC_ENABLE_VPID |
2538 SECONDARY_EXEC_ENABLE_EPT | 2570 SECONDARY_EXEC_ENABLE_EPT |
2539 SECONDARY_EXEC_UNRESTRICTED_GUEST | 2571 SECONDARY_EXEC_UNRESTRICTED_GUEST |
2540 SECONDARY_EXEC_PAUSE_LOOP_EXITING | 2572 SECONDARY_EXEC_PAUSE_LOOP_EXITING |
2541 SECONDARY_EXEC_RDTSCP | 2573 SECONDARY_EXEC_RDTSCP |
2542 SECONDARY_EXEC_ENABLE_INVPCID; 2574 SECONDARY_EXEC_ENABLE_INVPCID |
2575 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2576 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
2543 if (adjust_vmx_controls(min2, opt2, 2577 if (adjust_vmx_controls(min2, opt2,
2544 MSR_IA32_VMX_PROCBASED_CTLS2, 2578 MSR_IA32_VMX_PROCBASED_CTLS2,
2545 &_cpu_based_2nd_exec_control) < 0) 2579 &_cpu_based_2nd_exec_control) < 0)
@@ -2550,6 +2584,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2550 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) 2584 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
2551 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; 2585 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
2552#endif 2586#endif
2587
2588 if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
2589 _cpu_based_2nd_exec_control &= ~(
2590 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2591 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
2592 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
2593
2553 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { 2594 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
2554 /* CR3 accesses and invlpg don't need to cause VM Exits when EPT 2595 /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
2555 enabled */ 2596 enabled */
@@ -2747,6 +2788,15 @@ static __init int hardware_setup(void)
2747 if (!cpu_has_vmx_ple()) 2788 if (!cpu_has_vmx_ple())
2748 ple_gap = 0; 2789 ple_gap = 0;
2749 2790
2791 if (!cpu_has_vmx_apic_register_virt() ||
2792 !cpu_has_vmx_virtual_intr_delivery())
2793 enable_apicv_reg_vid = 0;
2794
2795 if (enable_apicv_reg_vid)
2796 kvm_x86_ops->update_cr8_intercept = NULL;
2797 else
2798 kvm_x86_ops->hwapic_irr_update = NULL;
2799
2750 if (nested) 2800 if (nested)
2751 nested_vmx_setup_ctls_msrs(); 2801 nested_vmx_setup_ctls_msrs();
2752 2802
@@ -2758,18 +2808,28 @@ static __exit void hardware_unsetup(void)
2758 free_kvm_area(); 2808 free_kvm_area();
2759} 2809}
2760 2810
2761static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) 2811static bool emulation_required(struct kvm_vcpu *vcpu)
2762{ 2812{
2763 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2813 return emulate_invalid_guest_state && !guest_state_valid(vcpu);
2764 struct kvm_segment tmp = *save; 2814}
2765 2815
2766 if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) { 2816static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
2767 tmp.base = vmcs_readl(sf->base); 2817 struct kvm_segment *save)
2768 tmp.selector = vmcs_read16(sf->selector); 2818{
2769 tmp.dpl = tmp.selector & SELECTOR_RPL_MASK; 2819 if (!emulate_invalid_guest_state) {
2770 tmp.s = 1; 2820 /*
2821 * CS and SS RPL should be equal during guest entry according
2822 * to VMX spec, but in reality it is not always so. Since vcpu
2823 * is in the middle of the transition from real mode to
2824 * protected mode it is safe to assume that RPL 0 is a good
2825 * default value.
2826 */
2827 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
2828 save->selector &= ~SELECTOR_RPL_MASK;
2829 save->dpl = save->selector & SELECTOR_RPL_MASK;
2830 save->s = 1;
2771 } 2831 }
2772 vmx_set_segment(vcpu, &tmp, seg); 2832 vmx_set_segment(vcpu, save, seg);
2773} 2833}
2774 2834
2775static void enter_pmode(struct kvm_vcpu *vcpu) 2835static void enter_pmode(struct kvm_vcpu *vcpu)
@@ -2777,7 +2837,17 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
2777 unsigned long flags; 2837 unsigned long flags;
2778 struct vcpu_vmx *vmx = to_vmx(vcpu); 2838 struct vcpu_vmx *vmx = to_vmx(vcpu);
2779 2839
2780 vmx->emulation_required = 1; 2840 /*
2841 * Update real mode segment cache. It may be not up-to-date if sement
2842 * register was written while vcpu was in a guest mode.
2843 */
2844 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
2845 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
2846 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
2847 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
2848 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
2849 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
2850
2781 vmx->rmode.vm86_active = 0; 2851 vmx->rmode.vm86_active = 0;
2782 2852
2783 vmx_segment_cache_clear(vmx); 2853 vmx_segment_cache_clear(vmx);
@@ -2794,22 +2864,16 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
2794 2864
2795 update_exception_bitmap(vcpu); 2865 update_exception_bitmap(vcpu);
2796 2866
2797 if (emulate_invalid_guest_state) 2867 fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
2798 return; 2868 fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
2799 2869 fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
2800 fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); 2870 fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
2801 fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); 2871 fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
2802 fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); 2872 fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
2803 fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
2804
2805 vmx_segment_cache_clear(vmx);
2806 2873
2807 vmcs_write16(GUEST_SS_SELECTOR, 0); 2874 /* CPL is always 0 when CPU enters protected mode */
2808 vmcs_write32(GUEST_SS_AR_BYTES, 0x93); 2875 __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
2809 2876 vmx->cpl = 0;
2810 vmcs_write16(GUEST_CS_SELECTOR,
2811 vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK);
2812 vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
2813} 2877}
2814 2878
2815static gva_t rmode_tss_base(struct kvm *kvm) 2879static gva_t rmode_tss_base(struct kvm *kvm)
@@ -2831,36 +2895,51 @@ static gva_t rmode_tss_base(struct kvm *kvm)
2831static void fix_rmode_seg(int seg, struct kvm_segment *save) 2895static void fix_rmode_seg(int seg, struct kvm_segment *save)
2832{ 2896{
2833 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2897 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2834 2898 struct kvm_segment var = *save;
2835 vmcs_write16(sf->selector, save->base >> 4); 2899
2836 vmcs_write32(sf->base, save->base & 0xffff0); 2900 var.dpl = 0x3;
2837 vmcs_write32(sf->limit, 0xffff); 2901 if (seg == VCPU_SREG_CS)
2838 vmcs_write32(sf->ar_bytes, 0xf3); 2902 var.type = 0x3;
2839 if (save->base & 0xf) 2903
2840 printk_once(KERN_WARNING "kvm: segment base is not paragraph" 2904 if (!emulate_invalid_guest_state) {
2841 " aligned when entering protected mode (seg=%d)", 2905 var.selector = var.base >> 4;
2842 seg); 2906 var.base = var.base & 0xffff0;
2907 var.limit = 0xffff;
2908 var.g = 0;
2909 var.db = 0;
2910 var.present = 1;
2911 var.s = 1;
2912 var.l = 0;
2913 var.unusable = 0;
2914 var.type = 0x3;
2915 var.avl = 0;
2916 if (save->base & 0xf)
2917 printk_once(KERN_WARNING "kvm: segment base is not "
2918 "paragraph aligned when entering "
2919 "protected mode (seg=%d)", seg);
2920 }
2921
2922 vmcs_write16(sf->selector, var.selector);
2923 vmcs_write32(sf->base, var.base);
2924 vmcs_write32(sf->limit, var.limit);
2925 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
2843} 2926}
2844 2927
2845static void enter_rmode(struct kvm_vcpu *vcpu) 2928static void enter_rmode(struct kvm_vcpu *vcpu)
2846{ 2929{
2847 unsigned long flags; 2930 unsigned long flags;
2848 struct vcpu_vmx *vmx = to_vmx(vcpu); 2931 struct vcpu_vmx *vmx = to_vmx(vcpu);
2849 struct kvm_segment var;
2850
2851 if (enable_unrestricted_guest)
2852 return;
2853 2932
2854 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); 2933 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
2855 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); 2934 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
2856 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); 2935 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
2857 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); 2936 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
2858 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); 2937 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
2938 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
2939 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
2859 2940
2860 vmx->emulation_required = 1;
2861 vmx->rmode.vm86_active = 1; 2941 vmx->rmode.vm86_active = 1;
2862 2942
2863
2864 /* 2943 /*
2865 * Very old userspace does not call KVM_SET_TSS_ADDR before entering 2944 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
2866 * vcpu. Call it here with phys address pointing 16M below 4G. 2945 * vcpu. Call it here with phys address pointing 16M below 4G.
@@ -2888,28 +2967,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
2888 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); 2967 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
2889 update_exception_bitmap(vcpu); 2968 update_exception_bitmap(vcpu);
2890 2969
2891 if (emulate_invalid_guest_state) 2970 fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
2892 goto continue_rmode; 2971 fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
2893 2972 fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
2894 vmx_get_segment(vcpu, &var, VCPU_SREG_SS); 2973 fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
2895 vmx_set_segment(vcpu, &var, VCPU_SREG_SS); 2974 fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
2896 2975 fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
2897 vmx_get_segment(vcpu, &var, VCPU_SREG_CS);
2898 vmx_set_segment(vcpu, &var, VCPU_SREG_CS);
2899
2900 vmx_get_segment(vcpu, &var, VCPU_SREG_ES);
2901 vmx_set_segment(vcpu, &var, VCPU_SREG_ES);
2902
2903 vmx_get_segment(vcpu, &var, VCPU_SREG_DS);
2904 vmx_set_segment(vcpu, &var, VCPU_SREG_DS);
2905 2976
2906 vmx_get_segment(vcpu, &var, VCPU_SREG_GS);
2907 vmx_set_segment(vcpu, &var, VCPU_SREG_GS);
2908
2909 vmx_get_segment(vcpu, &var, VCPU_SREG_FS);
2910 vmx_set_segment(vcpu, &var, VCPU_SREG_FS);
2911
2912continue_rmode:
2913 kvm_mmu_reset_context(vcpu); 2977 kvm_mmu_reset_context(vcpu);
2914} 2978}
2915 2979
@@ -3068,17 +3132,18 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
3068 struct vcpu_vmx *vmx = to_vmx(vcpu); 3132 struct vcpu_vmx *vmx = to_vmx(vcpu);
3069 unsigned long hw_cr0; 3133 unsigned long hw_cr0;
3070 3134
3135 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK);
3071 if (enable_unrestricted_guest) 3136 if (enable_unrestricted_guest)
3072 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST) 3137 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
3073 | KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; 3138 else {
3074 else 3139 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
3075 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
3076 3140
3077 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) 3141 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
3078 enter_pmode(vcpu); 3142 enter_pmode(vcpu);
3079 3143
3080 if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) 3144 if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
3081 enter_rmode(vcpu); 3145 enter_rmode(vcpu);
3146 }
3082 3147
3083#ifdef CONFIG_X86_64 3148#ifdef CONFIG_X86_64
3084 if (vcpu->arch.efer & EFER_LME) { 3149 if (vcpu->arch.efer & EFER_LME) {
@@ -3098,7 +3163,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
3098 vmcs_writel(CR0_READ_SHADOW, cr0); 3163 vmcs_writel(CR0_READ_SHADOW, cr0);
3099 vmcs_writel(GUEST_CR0, hw_cr0); 3164 vmcs_writel(GUEST_CR0, hw_cr0);
3100 vcpu->arch.cr0 = cr0; 3165 vcpu->arch.cr0 = cr0;
3101 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); 3166
3167 /* depends on vcpu->arch.cr0 to be set to a new value */
3168 vmx->emulation_required = emulation_required(vcpu);
3102} 3169}
3103 3170
3104static u64 construct_eptp(unsigned long root_hpa) 3171static u64 construct_eptp(unsigned long root_hpa)
@@ -3155,6 +3222,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
3155 if (!is_paging(vcpu)) { 3222 if (!is_paging(vcpu)) {
3156 hw_cr4 &= ~X86_CR4_PAE; 3223 hw_cr4 &= ~X86_CR4_PAE;
3157 hw_cr4 |= X86_CR4_PSE; 3224 hw_cr4 |= X86_CR4_PSE;
3225 /*
3226 * SMEP is disabled if CPU is in non-paging mode in
3227 * hardware. However KVM always uses paging mode to
3228 * emulate guest non-paging mode with TDP.
3229 * To emulate this behavior, SMEP needs to be manually
3230 * disabled when guest switches to non-paging mode.
3231 */
3232 hw_cr4 &= ~X86_CR4_SMEP;
3158 } else if (!(cr4 & X86_CR4_PAE)) { 3233 } else if (!(cr4 & X86_CR4_PAE)) {
3159 hw_cr4 &= ~X86_CR4_PAE; 3234 hw_cr4 &= ~X86_CR4_PAE;
3160 } 3235 }
@@ -3171,10 +3246,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
3171 struct vcpu_vmx *vmx = to_vmx(vcpu); 3246 struct vcpu_vmx *vmx = to_vmx(vcpu);
3172 u32 ar; 3247 u32 ar;
3173 3248
3174 if (vmx->rmode.vm86_active 3249 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3175 && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES
3176 || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
3177 || seg == VCPU_SREG_GS)) {
3178 *var = vmx->rmode.segs[seg]; 3250 *var = vmx->rmode.segs[seg];
3179 if (seg == VCPU_SREG_TR 3251 if (seg == VCPU_SREG_TR
3180 || var->selector == vmx_read_guest_seg_selector(vmx, seg)) 3252 || var->selector == vmx_read_guest_seg_selector(vmx, seg))
@@ -3187,8 +3259,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
3187 var->limit = vmx_read_guest_seg_limit(vmx, seg); 3259 var->limit = vmx_read_guest_seg_limit(vmx, seg);
3188 var->selector = vmx_read_guest_seg_selector(vmx, seg); 3260 var->selector = vmx_read_guest_seg_selector(vmx, seg);
3189 ar = vmx_read_guest_seg_ar(vmx, seg); 3261 ar = vmx_read_guest_seg_ar(vmx, seg);
3190 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
3191 ar = 0;
3192 var->type = ar & 15; 3262 var->type = ar & 15;
3193 var->s = (ar >> 4) & 1; 3263 var->s = (ar >> 4) & 1;
3194 var->dpl = (ar >> 5) & 3; 3264 var->dpl = (ar >> 5) & 3;
@@ -3211,8 +3281,10 @@ static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
3211 return vmx_read_guest_seg_base(to_vmx(vcpu), seg); 3281 return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
3212} 3282}
3213 3283
3214static int __vmx_get_cpl(struct kvm_vcpu *vcpu) 3284static int vmx_get_cpl(struct kvm_vcpu *vcpu)
3215{ 3285{
3286 struct vcpu_vmx *vmx = to_vmx(vcpu);
3287
3216 if (!is_protmode(vcpu)) 3288 if (!is_protmode(vcpu))
3217 return 0; 3289 return 0;
3218 3290
@@ -3220,24 +3292,9 @@ static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
3220 && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ 3292 && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */
3221 return 3; 3293 return 3;
3222 3294
3223 return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3;
3224}
3225
3226static int vmx_get_cpl(struct kvm_vcpu *vcpu)
3227{
3228 struct vcpu_vmx *vmx = to_vmx(vcpu);
3229
3230 /*
3231 * If we enter real mode with cs.sel & 3 != 0, the normal CPL calculations
3232 * fail; use the cache instead.
3233 */
3234 if (unlikely(vmx->emulation_required && emulate_invalid_guest_state)) {
3235 return vmx->cpl;
3236 }
3237
3238 if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { 3295 if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
3239 __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); 3296 __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
3240 vmx->cpl = __vmx_get_cpl(vcpu); 3297 vmx->cpl = vmx_read_guest_seg_selector(vmx, VCPU_SREG_CS) & 3;
3241 } 3298 }
3242 3299
3243 return vmx->cpl; 3300 return vmx->cpl;
@@ -3269,28 +3326,23 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
3269{ 3326{
3270 struct vcpu_vmx *vmx = to_vmx(vcpu); 3327 struct vcpu_vmx *vmx = to_vmx(vcpu);
3271 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 3328 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3272 u32 ar;
3273 3329
3274 vmx_segment_cache_clear(vmx); 3330 vmx_segment_cache_clear(vmx);
3331 if (seg == VCPU_SREG_CS)
3332 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
3275 3333
3276 if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { 3334 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3277 vmcs_write16(sf->selector, var->selector); 3335 vmx->rmode.segs[seg] = *var;
3278 vmx->rmode.segs[VCPU_SREG_TR] = *var; 3336 if (seg == VCPU_SREG_TR)
3279 return; 3337 vmcs_write16(sf->selector, var->selector);
3338 else if (var->s)
3339 fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
3340 goto out;
3280 } 3341 }
3342
3281 vmcs_writel(sf->base, var->base); 3343 vmcs_writel(sf->base, var->base);
3282 vmcs_write32(sf->limit, var->limit); 3344 vmcs_write32(sf->limit, var->limit);
3283 vmcs_write16(sf->selector, var->selector); 3345 vmcs_write16(sf->selector, var->selector);
3284 if (vmx->rmode.vm86_active && var->s) {
3285 vmx->rmode.segs[seg] = *var;
3286 /*
3287 * Hack real-mode segments into vm86 compatibility.
3288 */
3289 if (var->base == 0xffff0000 && var->selector == 0xf000)
3290 vmcs_writel(sf->base, 0xf0000);
3291 ar = 0xf3;
3292 } else
3293 ar = vmx_segment_access_rights(var);
3294 3346
3295 /* 3347 /*
3296 * Fix the "Accessed" bit in AR field of segment registers for older 3348 * Fix the "Accessed" bit in AR field of segment registers for older
@@ -3304,42 +3356,12 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
3304 * kvm hack. 3356 * kvm hack.
3305 */ 3357 */
3306 if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) 3358 if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
3307 ar |= 0x1; /* Accessed */ 3359 var->type |= 0x1; /* Accessed */
3308 3360
3309 vmcs_write32(sf->ar_bytes, ar); 3361 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
3310 __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
3311 3362
3312 /* 3363out:
3313 * Fix segments for real mode guest in hosts that don't have 3364 vmx->emulation_required |= emulation_required(vcpu);
3314 * "unrestricted_mode" or it was disabled.
3315 * This is done to allow migration of the guests from hosts with
3316 * unrestricted guest like Westmere to older host that don't have
3317 * unrestricted guest like Nehelem.
3318 */
3319 if (vmx->rmode.vm86_active) {
3320 switch (seg) {
3321 case VCPU_SREG_CS:
3322 vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
3323 vmcs_write32(GUEST_CS_LIMIT, 0xffff);
3324 if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000)
3325 vmcs_writel(GUEST_CS_BASE, 0xf0000);
3326 vmcs_write16(GUEST_CS_SELECTOR,
3327 vmcs_readl(GUEST_CS_BASE) >> 4);
3328 break;
3329 case VCPU_SREG_ES:
3330 case VCPU_SREG_DS:
3331 case VCPU_SREG_GS:
3332 case VCPU_SREG_FS:
3333 fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
3334 break;
3335 case VCPU_SREG_SS:
3336 vmcs_write16(GUEST_SS_SELECTOR,
3337 vmcs_readl(GUEST_SS_BASE) >> 4);
3338 vmcs_write32(GUEST_SS_LIMIT, 0xffff);
3339 vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
3340 break;
3341 }
3342 }
3343} 3365}
3344 3366
3345static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 3367static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -3380,13 +3402,16 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
3380 u32 ar; 3402 u32 ar;
3381 3403
3382 vmx_get_segment(vcpu, &var, seg); 3404 vmx_get_segment(vcpu, &var, seg);
3405 var.dpl = 0x3;
3406 if (seg == VCPU_SREG_CS)
3407 var.type = 0x3;
3383 ar = vmx_segment_access_rights(&var); 3408 ar = vmx_segment_access_rights(&var);
3384 3409
3385 if (var.base != (var.selector << 4)) 3410 if (var.base != (var.selector << 4))
3386 return false; 3411 return false;
3387 if (var.limit < 0xffff) 3412 if (var.limit != 0xffff)
3388 return false; 3413 return false;
3389 if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3) 3414 if (ar != 0xf3)
3390 return false; 3415 return false;
3391 3416
3392 return true; 3417 return true;
@@ -3521,6 +3546,9 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
3521 */ 3546 */
3522static bool guest_state_valid(struct kvm_vcpu *vcpu) 3547static bool guest_state_valid(struct kvm_vcpu *vcpu)
3523{ 3548{
3549 if (enable_unrestricted_guest)
3550 return true;
3551
3524 /* real mode guest state checks */ 3552 /* real mode guest state checks */
3525 if (!is_protmode(vcpu)) { 3553 if (!is_protmode(vcpu)) {
3526 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) 3554 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
@@ -3644,12 +3672,9 @@ static void seg_setup(int seg)
3644 vmcs_write16(sf->selector, 0); 3672 vmcs_write16(sf->selector, 0);
3645 vmcs_writel(sf->base, 0); 3673 vmcs_writel(sf->base, 0);
3646 vmcs_write32(sf->limit, 0xffff); 3674 vmcs_write32(sf->limit, 0xffff);
3647 if (enable_unrestricted_guest) { 3675 ar = 0x93;
3648 ar = 0x93; 3676 if (seg == VCPU_SREG_CS)
3649 if (seg == VCPU_SREG_CS) 3677 ar |= 0x08; /* code segment */
3650 ar |= 0x08; /* code segment */
3651 } else
3652 ar = 0xf3;
3653 3678
3654 vmcs_write32(sf->ar_bytes, ar); 3679 vmcs_write32(sf->ar_bytes, ar);
3655} 3680}
@@ -3667,7 +3692,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
3667 kvm_userspace_mem.flags = 0; 3692 kvm_userspace_mem.flags = 0;
3668 kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; 3693 kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
3669 kvm_userspace_mem.memory_size = PAGE_SIZE; 3694 kvm_userspace_mem.memory_size = PAGE_SIZE;
3670 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); 3695 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
3671 if (r) 3696 if (r)
3672 goto out; 3697 goto out;
3673 3698
@@ -3697,7 +3722,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
3697 kvm_userspace_mem.guest_phys_addr = 3722 kvm_userspace_mem.guest_phys_addr =
3698 kvm->arch.ept_identity_map_addr; 3723 kvm->arch.ept_identity_map_addr;
3699 kvm_userspace_mem.memory_size = PAGE_SIZE; 3724 kvm_userspace_mem.memory_size = PAGE_SIZE;
3700 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); 3725 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
3701 if (r) 3726 if (r)
3702 goto out; 3727 goto out;
3703 3728
@@ -3739,7 +3764,10 @@ static void free_vpid(struct vcpu_vmx *vmx)
3739 spin_unlock(&vmx_vpid_lock); 3764 spin_unlock(&vmx_vpid_lock);
3740} 3765}
3741 3766
3742static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) 3767#define MSR_TYPE_R 1
3768#define MSR_TYPE_W 2
3769static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
3770 u32 msr, int type)
3743{ 3771{
3744 int f = sizeof(unsigned long); 3772 int f = sizeof(unsigned long);
3745 3773
@@ -3752,20 +3780,93 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
3752 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. 3780 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
3753 */ 3781 */
3754 if (msr <= 0x1fff) { 3782 if (msr <= 0x1fff) {
3755 __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ 3783 if (type & MSR_TYPE_R)
3756 __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ 3784 /* read-low */
3785 __clear_bit(msr, msr_bitmap + 0x000 / f);
3786
3787 if (type & MSR_TYPE_W)
3788 /* write-low */
3789 __clear_bit(msr, msr_bitmap + 0x800 / f);
3790
3757 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { 3791 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
3758 msr &= 0x1fff; 3792 msr &= 0x1fff;
3759 __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ 3793 if (type & MSR_TYPE_R)
3760 __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ 3794 /* read-high */
3795 __clear_bit(msr, msr_bitmap + 0x400 / f);
3796
3797 if (type & MSR_TYPE_W)
3798 /* write-high */
3799 __clear_bit(msr, msr_bitmap + 0xc00 / f);
3800
3801 }
3802}
3803
3804static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
3805 u32 msr, int type)
3806{
3807 int f = sizeof(unsigned long);
3808
3809 if (!cpu_has_vmx_msr_bitmap())
3810 return;
3811
3812 /*
3813 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
3814 * have the write-low and read-high bitmap offsets the wrong way round.
3815 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
3816 */
3817 if (msr <= 0x1fff) {
3818 if (type & MSR_TYPE_R)
3819 /* read-low */
3820 __set_bit(msr, msr_bitmap + 0x000 / f);
3821
3822 if (type & MSR_TYPE_W)
3823 /* write-low */
3824 __set_bit(msr, msr_bitmap + 0x800 / f);
3825
3826 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
3827 msr &= 0x1fff;
3828 if (type & MSR_TYPE_R)
3829 /* read-high */
3830 __set_bit(msr, msr_bitmap + 0x400 / f);
3831
3832 if (type & MSR_TYPE_W)
3833 /* write-high */
3834 __set_bit(msr, msr_bitmap + 0xc00 / f);
3835
3761 } 3836 }
3762} 3837}
3763 3838
3764static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) 3839static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
3765{ 3840{
3766 if (!longmode_only) 3841 if (!longmode_only)
3767 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); 3842 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
3768 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); 3843 msr, MSR_TYPE_R | MSR_TYPE_W);
3844 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
3845 msr, MSR_TYPE_R | MSR_TYPE_W);
3846}
3847
3848static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
3849{
3850 __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
3851 msr, MSR_TYPE_R);
3852 __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
3853 msr, MSR_TYPE_R);
3854}
3855
3856static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
3857{
3858 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
3859 msr, MSR_TYPE_R);
3860 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
3861 msr, MSR_TYPE_R);
3862}
3863
3864static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
3865{
3866 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
3867 msr, MSR_TYPE_W);
3868 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
3869 msr, MSR_TYPE_W);
3769} 3870}
3770 3871
3771/* 3872/*
@@ -3844,6 +3945,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
3844 return exec_control; 3945 return exec_control;
3845} 3946}
3846 3947
3948static int vmx_vm_has_apicv(struct kvm *kvm)
3949{
3950 return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
3951}
3952
3847static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) 3953static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
3848{ 3954{
3849 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; 3955 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
@@ -3861,6 +3967,10 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
3861 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; 3967 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
3862 if (!ple_gap) 3968 if (!ple_gap)
3863 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; 3969 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
3970 if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
3971 exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
3972 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
3973 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
3864 return exec_control; 3974 return exec_control;
3865} 3975}
3866 3976
@@ -3905,6 +4015,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
3905 vmx_secondary_exec_control(vmx)); 4015 vmx_secondary_exec_control(vmx));
3906 } 4016 }
3907 4017
4018 if (enable_apicv_reg_vid) {
4019 vmcs_write64(EOI_EXIT_BITMAP0, 0);
4020 vmcs_write64(EOI_EXIT_BITMAP1, 0);
4021 vmcs_write64(EOI_EXIT_BITMAP2, 0);
4022 vmcs_write64(EOI_EXIT_BITMAP3, 0);
4023
4024 vmcs_write16(GUEST_INTR_STATUS, 0);
4025 }
4026
3908 if (ple_gap) { 4027 if (ple_gap) {
3909 vmcs_write32(PLE_GAP, ple_gap); 4028 vmcs_write32(PLE_GAP, ple_gap);
3910 vmcs_write32(PLE_WINDOW, ple_window); 4029 vmcs_write32(PLE_WINDOW, ple_window);
@@ -3990,14 +4109,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
3990 vmx_segment_cache_clear(vmx); 4109 vmx_segment_cache_clear(vmx);
3991 4110
3992 seg_setup(VCPU_SREG_CS); 4111 seg_setup(VCPU_SREG_CS);
3993 /* 4112 if (kvm_vcpu_is_bsp(&vmx->vcpu))
3994 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
3995 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
3996 */
3997 if (kvm_vcpu_is_bsp(&vmx->vcpu)) {
3998 vmcs_write16(GUEST_CS_SELECTOR, 0xf000); 4113 vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
3999 vmcs_writel(GUEST_CS_BASE, 0x000f0000); 4114 else {
4000 } else {
4001 vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); 4115 vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8);
4002 vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); 4116 vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12);
4003 } 4117 }
@@ -4073,9 +4187,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4073 4187
4074 ret = 0; 4188 ret = 0;
4075 4189
4076 /* HACK: Don't enable emulation on guest boot/reset */
4077 vmx->emulation_required = 0;
4078
4079 return ret; 4190 return ret;
4080} 4191}
4081 4192
@@ -4251,7 +4362,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
4251 .flags = 0, 4362 .flags = 0,
4252 }; 4363 };
4253 4364
4254 ret = kvm_set_memory_region(kvm, &tss_mem, 0); 4365 ret = kvm_set_memory_region(kvm, &tss_mem, false);
4255 if (ret) 4366 if (ret)
4256 return ret; 4367 return ret;
4257 kvm->arch.tss_addr = addr; 4368 kvm->arch.tss_addr = addr;
@@ -4261,28 +4372,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
4261 return 0; 4372 return 0;
4262} 4373}
4263 4374
4264static int handle_rmode_exception(struct kvm_vcpu *vcpu, 4375static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
4265 int vec, u32 err_code)
4266{ 4376{
4267 /*
4268 * Instruction with address size override prefix opcode 0x67
4269 * Cause the #SS fault with 0 error code in VM86 mode.
4270 */
4271 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0)
4272 if (emulate_instruction(vcpu, 0) == EMULATE_DONE)
4273 return 1;
4274 /*
4275 * Forward all other exceptions that are valid in real mode.
4276 * FIXME: Breaks guest debugging in real mode, needs to be fixed with
4277 * the required debugging infrastructure rework.
4278 */
4279 switch (vec) { 4377 switch (vec) {
4280 case DB_VECTOR:
4281 if (vcpu->guest_debug &
4282 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
4283 return 0;
4284 kvm_queue_exception(vcpu, vec);
4285 return 1;
4286 case BP_VECTOR: 4378 case BP_VECTOR:
4287 /* 4379 /*
4288 * Update instruction length as we may reinject the exception 4380 * Update instruction length as we may reinject the exception
@@ -4291,7 +4383,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
4291 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = 4383 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
4292 vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 4384 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
4293 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 4385 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
4294 return 0; 4386 return false;
4387 /* fall through */
4388 case DB_VECTOR:
4389 if (vcpu->guest_debug &
4390 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
4391 return false;
4295 /* fall through */ 4392 /* fall through */
4296 case DE_VECTOR: 4393 case DE_VECTOR:
4297 case OF_VECTOR: 4394 case OF_VECTOR:
@@ -4301,10 +4398,37 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
4301 case SS_VECTOR: 4398 case SS_VECTOR:
4302 case GP_VECTOR: 4399 case GP_VECTOR:
4303 case MF_VECTOR: 4400 case MF_VECTOR:
4304 kvm_queue_exception(vcpu, vec); 4401 return true;
4305 return 1; 4402 break;
4306 } 4403 }
4307 return 0; 4404 return false;
4405}
4406
4407static int handle_rmode_exception(struct kvm_vcpu *vcpu,
4408 int vec, u32 err_code)
4409{
4410 /*
4411 * Instruction with address size override prefix opcode 0x67
4412 * Cause the #SS fault with 0 error code in VM86 mode.
4413 */
4414 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
4415 if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
4416 if (vcpu->arch.halt_request) {
4417 vcpu->arch.halt_request = 0;
4418 return kvm_emulate_halt(vcpu);
4419 }
4420 return 1;
4421 }
4422 return 0;
4423 }
4424
4425 /*
4426 * Forward all other exceptions that are valid in real mode.
4427 * FIXME: Breaks guest debugging in real mode, needs to be fixed with
4428 * the required debugging infrastructure rework.
4429 */
4430 kvm_queue_exception(vcpu, vec);
4431 return 1;
4308} 4432}
4309 4433
4310/* 4434/*
@@ -4392,17 +4516,11 @@ static int handle_exception(struct kvm_vcpu *vcpu)
4392 return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); 4516 return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
4393 } 4517 }
4394 4518
4395 if (vmx->rmode.vm86_active &&
4396 handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
4397 error_code)) {
4398 if (vcpu->arch.halt_request) {
4399 vcpu->arch.halt_request = 0;
4400 return kvm_emulate_halt(vcpu);
4401 }
4402 return 1;
4403 }
4404
4405 ex_no = intr_info & INTR_INFO_VECTOR_MASK; 4519 ex_no = intr_info & INTR_INFO_VECTOR_MASK;
4520
4521 if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
4522 return handle_rmode_exception(vcpu, ex_no, error_code);
4523
4406 switch (ex_no) { 4524 switch (ex_no) {
4407 case DB_VECTOR: 4525 case DB_VECTOR:
4408 dr6 = vmcs_readl(EXIT_QUALIFICATION); 4526 dr6 = vmcs_readl(EXIT_QUALIFICATION);
@@ -4820,6 +4938,26 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
4820 return emulate_instruction(vcpu, 0) == EMULATE_DONE; 4938 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
4821} 4939}
4822 4940
4941static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
4942{
4943 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4944 int vector = exit_qualification & 0xff;
4945
4946 /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
4947 kvm_apic_set_eoi_accelerated(vcpu, vector);
4948 return 1;
4949}
4950
4951static int handle_apic_write(struct kvm_vcpu *vcpu)
4952{
4953 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4954 u32 offset = exit_qualification & 0xfff;
4955
4956 /* APIC-write VM exit is trap-like and thus no need to adjust IP */
4957 kvm_apic_write_nodecode(vcpu, offset);
4958 return 1;
4959}
4960
4823static int handle_task_switch(struct kvm_vcpu *vcpu) 4961static int handle_task_switch(struct kvm_vcpu *vcpu)
4824{ 4962{
4825 struct vcpu_vmx *vmx = to_vmx(vcpu); 4963 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5065,7 +5203,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
5065 schedule(); 5203 schedule();
5066 } 5204 }
5067 5205
5068 vmx->emulation_required = !guest_state_valid(vcpu); 5206 vmx->emulation_required = emulation_required(vcpu);
5069out: 5207out:
5070 return ret; 5208 return ret;
5071} 5209}
@@ -5754,6 +5892,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
5754 [EXIT_REASON_VMON] = handle_vmon, 5892 [EXIT_REASON_VMON] = handle_vmon,
5755 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, 5893 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
5756 [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 5894 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
5895 [EXIT_REASON_APIC_WRITE] = handle_apic_write,
5896 [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
5757 [EXIT_REASON_WBINVD] = handle_wbinvd, 5897 [EXIT_REASON_WBINVD] = handle_wbinvd,
5758 [EXIT_REASON_XSETBV] = handle_xsetbv, 5898 [EXIT_REASON_XSETBV] = handle_xsetbv,
5759 [EXIT_REASON_TASK_SWITCH] = handle_task_switch, 5899 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
@@ -5780,7 +5920,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
5780 u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; 5920 u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX];
5781 gpa_t bitmap; 5921 gpa_t bitmap;
5782 5922
5783 if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS)) 5923 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
5784 return 1; 5924 return 1;
5785 5925
5786 /* 5926 /*
@@ -6008,7 +6148,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
6008 u32 vectoring_info = vmx->idt_vectoring_info; 6148 u32 vectoring_info = vmx->idt_vectoring_info;
6009 6149
6010 /* If guest state is invalid, start emulating */ 6150 /* If guest state is invalid, start emulating */
6011 if (vmx->emulation_required && emulate_invalid_guest_state) 6151 if (vmx->emulation_required)
6012 return handle_invalid_guest_state(vcpu); 6152 return handle_invalid_guest_state(vcpu);
6013 6153
6014 /* 6154 /*
@@ -6103,6 +6243,85 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
6103 vmcs_write32(TPR_THRESHOLD, irr); 6243 vmcs_write32(TPR_THRESHOLD, irr);
6104} 6244}
6105 6245
6246static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
6247{
6248 u32 sec_exec_control;
6249
6250 /*
6251 * There is not point to enable virtualize x2apic without enable
6252 * apicv
6253 */
6254 if (!cpu_has_vmx_virtualize_x2apic_mode() ||
6255 !vmx_vm_has_apicv(vcpu->kvm))
6256 return;
6257
6258 if (!vm_need_tpr_shadow(vcpu->kvm))
6259 return;
6260
6261 sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
6262
6263 if (set) {
6264 sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6265 sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
6266 } else {
6267 sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
6268 sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6269 }
6270 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
6271
6272 vmx_set_msr_bitmap(vcpu);
6273}
6274
6275static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
6276{
6277 u16 status;
6278 u8 old;
6279
6280 if (!vmx_vm_has_apicv(kvm))
6281 return;
6282
6283 if (isr == -1)
6284 isr = 0;
6285
6286 status = vmcs_read16(GUEST_INTR_STATUS);
6287 old = status >> 8;
6288 if (isr != old) {
6289 status &= 0xff;
6290 status |= isr << 8;
6291 vmcs_write16(GUEST_INTR_STATUS, status);
6292 }
6293}
6294
6295static void vmx_set_rvi(int vector)
6296{
6297 u16 status;
6298 u8 old;
6299
6300 status = vmcs_read16(GUEST_INTR_STATUS);
6301 old = (u8)status & 0xff;
6302 if ((u8)vector != old) {
6303 status &= ~0xff;
6304 status |= (u8)vector;
6305 vmcs_write16(GUEST_INTR_STATUS, status);
6306 }
6307}
6308
6309static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
6310{
6311 if (max_irr == -1)
6312 return;
6313
6314 vmx_set_rvi(max_irr);
6315}
6316
6317static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
6318{
6319 vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
6320 vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
6321 vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
6322 vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
6323}
6324
6106static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) 6325static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
6107{ 6326{
6108 u32 exit_intr_info; 6327 u32 exit_intr_info;
@@ -6291,7 +6510,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6291 6510
6292 /* Don't enter VMX if guest state is invalid, let the exit handler 6511 /* Don't enter VMX if guest state is invalid, let the exit handler
6293 start emulation until we arrive back to a valid state */ 6512 start emulation until we arrive back to a valid state */
6294 if (vmx->emulation_required && emulate_invalid_guest_state) 6513 if (vmx->emulation_required)
6295 return; 6514 return;
6296 6515
6297 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) 6516 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
@@ -7366,6 +7585,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
7366 .enable_nmi_window = enable_nmi_window, 7585 .enable_nmi_window = enable_nmi_window,
7367 .enable_irq_window = enable_irq_window, 7586 .enable_irq_window = enable_irq_window,
7368 .update_cr8_intercept = update_cr8_intercept, 7587 .update_cr8_intercept = update_cr8_intercept,
7588 .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
7589 .vm_has_apicv = vmx_vm_has_apicv,
7590 .load_eoi_exitmap = vmx_load_eoi_exitmap,
7591 .hwapic_irr_update = vmx_hwapic_irr_update,
7592 .hwapic_isr_update = vmx_hwapic_isr_update,
7369 7593
7370 .set_tss_addr = vmx_set_tss_addr, 7594 .set_tss_addr = vmx_set_tss_addr,
7371 .get_tdp_level = get_ept_level, 7595 .get_tdp_level = get_ept_level,
@@ -7398,7 +7622,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
7398 7622
7399static int __init vmx_init(void) 7623static int __init vmx_init(void)
7400{ 7624{
7401 int r, i; 7625 int r, i, msr;
7402 7626
7403 rdmsrl_safe(MSR_EFER, &host_efer); 7627 rdmsrl_safe(MSR_EFER, &host_efer);
7404 7628
@@ -7419,11 +7643,19 @@ static int __init vmx_init(void)
7419 if (!vmx_msr_bitmap_legacy) 7643 if (!vmx_msr_bitmap_legacy)
7420 goto out1; 7644 goto out1;
7421 7645
7646 vmx_msr_bitmap_legacy_x2apic =
7647 (unsigned long *)__get_free_page(GFP_KERNEL);
7648 if (!vmx_msr_bitmap_legacy_x2apic)
7649 goto out2;
7422 7650
7423 vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); 7651 vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
7424 if (!vmx_msr_bitmap_longmode) 7652 if (!vmx_msr_bitmap_longmode)
7425 goto out2; 7653 goto out3;
7426 7654
7655 vmx_msr_bitmap_longmode_x2apic =
7656 (unsigned long *)__get_free_page(GFP_KERNEL);
7657 if (!vmx_msr_bitmap_longmode_x2apic)
7658 goto out4;
7427 7659
7428 /* 7660 /*
7429 * Allow direct access to the PC debug port (it is often used for I/O 7661 * Allow direct access to the PC debug port (it is often used for I/O
@@ -7455,6 +7687,28 @@ static int __init vmx_init(void)
7455 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); 7687 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
7456 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); 7688 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
7457 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); 7689 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
7690 memcpy(vmx_msr_bitmap_legacy_x2apic,
7691 vmx_msr_bitmap_legacy, PAGE_SIZE);
7692 memcpy(vmx_msr_bitmap_longmode_x2apic,
7693 vmx_msr_bitmap_longmode, PAGE_SIZE);
7694
7695 if (enable_apicv_reg_vid) {
7696 for (msr = 0x800; msr <= 0x8ff; msr++)
7697 vmx_disable_intercept_msr_read_x2apic(msr);
7698
7699 /* According SDM, in x2apic mode, the whole id reg is used.
7700 * But in KVM, it only use the highest eight bits. Need to
7701 * intercept it */
7702 vmx_enable_intercept_msr_read_x2apic(0x802);
7703 /* TMCCT */
7704 vmx_enable_intercept_msr_read_x2apic(0x839);
7705 /* TPR */
7706 vmx_disable_intercept_msr_write_x2apic(0x808);
7707 /* EOI */
7708 vmx_disable_intercept_msr_write_x2apic(0x80b);
7709 /* SELF-IPI */
7710 vmx_disable_intercept_msr_write_x2apic(0x83f);
7711 }
7458 7712
7459 if (enable_ept) { 7713 if (enable_ept) {
7460 kvm_mmu_set_mask_ptes(0ull, 7714 kvm_mmu_set_mask_ptes(0ull,
@@ -7468,8 +7722,10 @@ static int __init vmx_init(void)
7468 7722
7469 return 0; 7723 return 0;
7470 7724
7471out3: 7725out4:
7472 free_page((unsigned long)vmx_msr_bitmap_longmode); 7726 free_page((unsigned long)vmx_msr_bitmap_longmode);
7727out3:
7728 free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
7473out2: 7729out2:
7474 free_page((unsigned long)vmx_msr_bitmap_legacy); 7730 free_page((unsigned long)vmx_msr_bitmap_legacy);
7475out1: 7731out1:
@@ -7481,6 +7737,8 @@ out:
7481 7737
7482static void __exit vmx_exit(void) 7738static void __exit vmx_exit(void)
7483{ 7739{
7740 free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
7741 free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
7484 free_page((unsigned long)vmx_msr_bitmap_legacy); 7742 free_page((unsigned long)vmx_msr_bitmap_legacy);
7485 free_page((unsigned long)vmx_msr_bitmap_longmode); 7743 free_page((unsigned long)vmx_msr_bitmap_longmode);
7486 free_page((unsigned long)vmx_io_bitmap_b); 7744 free_page((unsigned long)vmx_io_bitmap_b);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 37040079cd6b..f71500af1f81 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -872,8 +872,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
872 872
873 kvm_x86_ops->set_efer(vcpu, efer); 873 kvm_x86_ops->set_efer(vcpu, efer);
874 874
875 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
876
877 /* Update reserved bits */ 875 /* Update reserved bits */
878 if ((efer ^ old_efer) & EFER_NX) 876 if ((efer ^ old_efer) & EFER_NX)
879 kvm_mmu_reset_context(vcpu); 877 kvm_mmu_reset_context(vcpu);
@@ -2522,7 +2520,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2522 r = KVM_MAX_VCPUS; 2520 r = KVM_MAX_VCPUS;
2523 break; 2521 break;
2524 case KVM_CAP_NR_MEMSLOTS: 2522 case KVM_CAP_NR_MEMSLOTS:
2525 r = KVM_MEMORY_SLOTS; 2523 r = KVM_USER_MEM_SLOTS;
2526 break; 2524 break;
2527 case KVM_CAP_PV_MMU: /* obsolete */ 2525 case KVM_CAP_PV_MMU: /* obsolete */
2528 r = 0; 2526 r = 0;
@@ -3274,12 +3272,10 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
3274 return -EINVAL; 3272 return -EINVAL;
3275 3273
3276 mutex_lock(&kvm->slots_lock); 3274 mutex_lock(&kvm->slots_lock);
3277 spin_lock(&kvm->mmu_lock);
3278 3275
3279 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); 3276 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
3280 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; 3277 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
3281 3278
3282 spin_unlock(&kvm->mmu_lock);
3283 mutex_unlock(&kvm->slots_lock); 3279 mutex_unlock(&kvm->slots_lock);
3284 return 0; 3280 return 0;
3285} 3281}
@@ -3439,7 +3435,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3439 mutex_lock(&kvm->slots_lock); 3435 mutex_lock(&kvm->slots_lock);
3440 3436
3441 r = -EINVAL; 3437 r = -EINVAL;
3442 if (log->slot >= KVM_MEMORY_SLOTS) 3438 if (log->slot >= KVM_USER_MEM_SLOTS)
3443 goto out; 3439 goto out;
3444 3440
3445 memslot = id_to_memslot(kvm->memslots, log->slot); 3441 memslot = id_to_memslot(kvm->memslots, log->slot);
@@ -4495,8 +4491,10 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
4495 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); 4491 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
4496 *selector = var.selector; 4492 *selector = var.selector;
4497 4493
4498 if (var.unusable) 4494 if (var.unusable) {
4495 memset(desc, 0, sizeof(*desc));
4499 return false; 4496 return false;
4497 }
4500 4498
4501 if (var.g) 4499 if (var.g)
4502 var.limit >>= 12; 4500 var.limit >>= 12;
@@ -4757,26 +4755,26 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4757 return r; 4755 return r;
4758} 4756}
4759 4757
4760static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) 4758static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
4759 bool write_fault_to_shadow_pgtable)
4761{ 4760{
4762 gpa_t gpa; 4761 gpa_t gpa = cr2;
4763 pfn_t pfn; 4762 pfn_t pfn;
4764 4763
4765 if (tdp_enabled) 4764 if (!vcpu->arch.mmu.direct_map) {
4766 return false; 4765 /*
4767 4766 * Write permission should be allowed since only
4768 /* 4767 * write access need to be emulated.
4769 * if emulation was due to access to shadowed page table 4768 */
4770 * and it failed try to unshadow page and re-enter the 4769 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4771 * guest to let CPU execute the instruction.
4772 */
4773 if (kvm_mmu_unprotect_page_virt(vcpu, gva))
4774 return true;
4775
4776 gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
4777 4770
4778 if (gpa == UNMAPPED_GVA) 4771 /*
4779 return true; /* let cpu generate fault */ 4772 * If the mapping is invalid in guest, let cpu retry
4773 * it to generate fault.
4774 */
4775 if (gpa == UNMAPPED_GVA)
4776 return true;
4777 }
4780 4778
4781 /* 4779 /*
4782 * Do not retry the unhandleable instruction if it faults on the 4780 * Do not retry the unhandleable instruction if it faults on the
@@ -4785,12 +4783,43 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4785 * instruction -> ... 4783 * instruction -> ...
4786 */ 4784 */
4787 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); 4785 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
4788 if (!is_error_noslot_pfn(pfn)) { 4786
4789 kvm_release_pfn_clean(pfn); 4787 /*
4788 * If the instruction failed on the error pfn, it can not be fixed,
4789 * report the error to userspace.
4790 */
4791 if (is_error_noslot_pfn(pfn))
4792 return false;
4793
4794 kvm_release_pfn_clean(pfn);
4795
4796 /* The instructions are well-emulated on direct mmu. */
4797 if (vcpu->arch.mmu.direct_map) {
4798 unsigned int indirect_shadow_pages;
4799
4800 spin_lock(&vcpu->kvm->mmu_lock);
4801 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
4802 spin_unlock(&vcpu->kvm->mmu_lock);
4803
4804 if (indirect_shadow_pages)
4805 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4806
4790 return true; 4807 return true;
4791 } 4808 }
4792 4809
4793 return false; 4810 /*
4811 * if emulation was due to access to shadowed page table
4812 * and it failed try to unshadow page and re-enter the
4813 * guest to let CPU execute the instruction.
4814 */
4815 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4816
4817 /*
4818 * If the access faults on its page table, it can not
4819 * be fixed by unprotecting shadow page and it should
4820 * be reported to userspace.
4821 */
4822 return !write_fault_to_shadow_pgtable;
4794} 4823}
4795 4824
4796static bool retry_instruction(struct x86_emulate_ctxt *ctxt, 4825static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
@@ -4832,7 +4861,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
4832 if (!vcpu->arch.mmu.direct_map) 4861 if (!vcpu->arch.mmu.direct_map)
4833 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); 4862 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4834 4863
4835 kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); 4864 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4836 4865
4837 return true; 4866 return true;
4838} 4867}
@@ -4849,7 +4878,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4849 int r; 4878 int r;
4850 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; 4879 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4851 bool writeback = true; 4880 bool writeback = true;
4881 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
4852 4882
4883 /*
4884 * Clear write_fault_to_shadow_pgtable here to ensure it is
4885 * never reused.
4886 */
4887 vcpu->arch.write_fault_to_shadow_pgtable = false;
4853 kvm_clear_exception_queue(vcpu); 4888 kvm_clear_exception_queue(vcpu);
4854 4889
4855 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 4890 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
@@ -4868,7 +4903,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4868 if (r != EMULATION_OK) { 4903 if (r != EMULATION_OK) {
4869 if (emulation_type & EMULTYPE_TRAP_UD) 4904 if (emulation_type & EMULTYPE_TRAP_UD)
4870 return EMULATE_FAIL; 4905 return EMULATE_FAIL;
4871 if (reexecute_instruction(vcpu, cr2)) 4906 if (reexecute_instruction(vcpu, cr2,
4907 write_fault_to_spt))
4872 return EMULATE_DONE; 4908 return EMULATE_DONE;
4873 if (emulation_type & EMULTYPE_SKIP) 4909 if (emulation_type & EMULTYPE_SKIP)
4874 return EMULATE_FAIL; 4910 return EMULATE_FAIL;
@@ -4898,7 +4934,7 @@ restart:
4898 return EMULATE_DONE; 4934 return EMULATE_DONE;
4899 4935
4900 if (r == EMULATION_FAILED) { 4936 if (r == EMULATION_FAILED) {
4901 if (reexecute_instruction(vcpu, cr2)) 4937 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt))
4902 return EMULATE_DONE; 4938 return EMULATE_DONE;
4903 4939
4904 return handle_emulation_failure(vcpu); 4940 return handle_emulation_failure(vcpu);
@@ -5541,7 +5577,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
5541 vcpu->arch.nmi_injected = true; 5577 vcpu->arch.nmi_injected = true;
5542 kvm_x86_ops->set_nmi(vcpu); 5578 kvm_x86_ops->set_nmi(vcpu);
5543 } 5579 }
5544 } else if (kvm_cpu_has_interrupt(vcpu)) { 5580 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
5545 if (kvm_x86_ops->interrupt_allowed(vcpu)) { 5581 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
5546 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), 5582 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
5547 false); 5583 false);
@@ -5609,6 +5645,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
5609#endif 5645#endif
5610} 5646}
5611 5647
5648static void update_eoi_exitmap(struct kvm_vcpu *vcpu)
5649{
5650 u64 eoi_exit_bitmap[4];
5651
5652 memset(eoi_exit_bitmap, 0, 32);
5653
5654 kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap);
5655 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
5656}
5657
5612static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5658static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5613{ 5659{
5614 int r; 5660 int r;
@@ -5662,6 +5708,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5662 kvm_handle_pmu_event(vcpu); 5708 kvm_handle_pmu_event(vcpu);
5663 if (kvm_check_request(KVM_REQ_PMI, vcpu)) 5709 if (kvm_check_request(KVM_REQ_PMI, vcpu))
5664 kvm_deliver_pmi(vcpu); 5710 kvm_deliver_pmi(vcpu);
5711 if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
5712 update_eoi_exitmap(vcpu);
5665 } 5713 }
5666 5714
5667 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { 5715 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -5670,10 +5718,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5670 /* enable NMI/IRQ window open exits if needed */ 5718 /* enable NMI/IRQ window open exits if needed */
5671 if (vcpu->arch.nmi_pending) 5719 if (vcpu->arch.nmi_pending)
5672 kvm_x86_ops->enable_nmi_window(vcpu); 5720 kvm_x86_ops->enable_nmi_window(vcpu);
5673 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) 5721 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
5674 kvm_x86_ops->enable_irq_window(vcpu); 5722 kvm_x86_ops->enable_irq_window(vcpu);
5675 5723
5676 if (kvm_lapic_enabled(vcpu)) { 5724 if (kvm_lapic_enabled(vcpu)) {
5725 /*
5726 * Update architecture specific hints for APIC
5727 * virtual interrupt delivery.
5728 */
5729 if (kvm_x86_ops->hwapic_irr_update)
5730 kvm_x86_ops->hwapic_irr_update(vcpu,
5731 kvm_lapic_find_highest_irr(vcpu));
5677 update_cr8_intercept(vcpu); 5732 update_cr8_intercept(vcpu);
5678 kvm_lapic_sync_to_vapic(vcpu); 5733 kvm_lapic_sync_to_vapic(vcpu);
5679 } 5734 }
@@ -6853,48 +6908,43 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
6853 struct kvm_memory_slot *memslot, 6908 struct kvm_memory_slot *memslot,
6854 struct kvm_memory_slot old, 6909 struct kvm_memory_slot old,
6855 struct kvm_userspace_memory_region *mem, 6910 struct kvm_userspace_memory_region *mem,
6856 int user_alloc) 6911 bool user_alloc)
6857{ 6912{
6858 int npages = memslot->npages; 6913 int npages = memslot->npages;
6859 int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
6860 6914
6861 /* Prevent internal slot pages from being moved by fork()/COW. */ 6915 /*
6862 if (memslot->id >= KVM_MEMORY_SLOTS) 6916 * Only private memory slots need to be mapped here since
6863 map_flags = MAP_SHARED | MAP_ANONYMOUS; 6917 * KVM_SET_MEMORY_REGION ioctl is no longer supported.
6864
6865 /*To keep backward compatibility with older userspace,
6866 *x86 needs to handle !user_alloc case.
6867 */ 6918 */
6868 if (!user_alloc) { 6919 if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) {
6869 if (npages && !old.npages) { 6920 unsigned long userspace_addr;
6870 unsigned long userspace_addr;
6871 6921
6872 userspace_addr = vm_mmap(NULL, 0, 6922 /*
6873 npages * PAGE_SIZE, 6923 * MAP_SHARED to prevent internal slot pages from being moved
6874 PROT_READ | PROT_WRITE, 6924 * by fork()/COW.
6875 map_flags, 6925 */
6876 0); 6926 userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE,
6927 PROT_READ | PROT_WRITE,
6928 MAP_SHARED | MAP_ANONYMOUS, 0);
6877 6929
6878 if (IS_ERR((void *)userspace_addr)) 6930 if (IS_ERR((void *)userspace_addr))
6879 return PTR_ERR((void *)userspace_addr); 6931 return PTR_ERR((void *)userspace_addr);
6880 6932
6881 memslot->userspace_addr = userspace_addr; 6933 memslot->userspace_addr = userspace_addr;
6882 }
6883 } 6934 }
6884 6935
6885
6886 return 0; 6936 return 0;
6887} 6937}
6888 6938
6889void kvm_arch_commit_memory_region(struct kvm *kvm, 6939void kvm_arch_commit_memory_region(struct kvm *kvm,
6890 struct kvm_userspace_memory_region *mem, 6940 struct kvm_userspace_memory_region *mem,
6891 struct kvm_memory_slot old, 6941 struct kvm_memory_slot old,
6892 int user_alloc) 6942 bool user_alloc)
6893{ 6943{
6894 6944
6895 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; 6945 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
6896 6946
6897 if (!user_alloc && !old.user_alloc && old.npages && !npages) { 6947 if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) {
6898 int ret; 6948 int ret;
6899 6949
6900 ret = vm_munmap(old.userspace_addr, 6950 ret = vm_munmap(old.userspace_addr,
@@ -6908,11 +6958,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
6908 if (!kvm->arch.n_requested_mmu_pages) 6958 if (!kvm->arch.n_requested_mmu_pages)
6909 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); 6959 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
6910 6960
6911 spin_lock(&kvm->mmu_lock);
6912 if (nr_mmu_pages) 6961 if (nr_mmu_pages)
6913 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); 6962 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
6914 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 6963 /*
6915 spin_unlock(&kvm->mmu_lock); 6964 * Write protect all pages for dirty logging.
6965 * Existing largepage mappings are destroyed here and new ones will
6966 * not be created until the end of the logging.
6967 */
6968 if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
6969 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
6916 /* 6970 /*
6917 * If memory slot is created, or moved, we need to clear all 6971 * If memory slot is created, or moved, we need to clear all
6918 * mmio sptes. 6972 * mmio sptes.
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index fb674fd3fc22..2b97525246d4 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -939,14 +939,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
939 if (pmd_large(*pmd)) 939 if (pmd_large(*pmd))
940 return spurious_fault_check(error_code, (pte_t *) pmd); 940 return spurious_fault_check(error_code, (pte_t *) pmd);
941 941
942 /*
943 * Note: don't use pte_present() here, since it returns true
944 * if the _PAGE_PROTNONE bit is set. However, this aliases the
945 * _PAGE_GLOBAL bit, which for kernel pages give false positives
946 * when CONFIG_DEBUG_PAGEALLOC is used.
947 */
948 pte = pte_offset_kernel(pmd, address); 942 pte = pte_offset_kernel(pmd, address);
949 if (!(pte_flags(*pte) & _PAGE_PRESENT)) 943 if (!pte_present(*pte))
950 return 0; 944 return 0;
951 945
952 ret = spurious_fault_check(error_code, pte); 946 ret = spurious_fault_check(error_code, pte);
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index dfd30259eb89..ff3633c794c6 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -97,8 +97,7 @@ void numa_set_node(int cpu, int node)
97#endif 97#endif
98 per_cpu(x86_cpu_to_node_map, cpu) = node; 98 per_cpu(x86_cpu_to_node_map, cpu) = node;
99 99
100 if (node != NUMA_NO_NODE) 100 set_cpu_numa_node(cpu, node);
101 set_cpu_numa_node(cpu, node);
102} 101}
103 102
104void numa_clear_node(int cpu) 103void numa_clear_node(int cpu)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index ca1f1c2bb7be..091934e1d0d9 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -473,6 +473,19 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
473 pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); 473 pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
474 474
475 /* 475 /*
476 * Set the PSE and GLOBAL flags only if the PRESENT flag is
477 * set otherwise pmd_present/pmd_huge will return true even on
478 * a non present pmd. The canon_pgprot will clear _PAGE_GLOBAL
479 * for the ancient hardware that doesn't support it.
480 */
481 if (pgprot_val(new_prot) & _PAGE_PRESENT)
482 pgprot_val(new_prot) |= _PAGE_PSE | _PAGE_GLOBAL;
483 else
484 pgprot_val(new_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL);
485
486 new_prot = canon_pgprot(new_prot);
487
488 /*
476 * old_pte points to the large page base address. So we need 489 * old_pte points to the large page base address. So we need
477 * to add the offset of the virtual address: 490 * to add the offset of the virtual address:
478 */ 491 */
@@ -517,7 +530,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
517 * The address is aligned and the number of pages 530 * The address is aligned and the number of pages
518 * covers the full page. 531 * covers the full page.
519 */ 532 */
520 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); 533 new_pte = pfn_pte(pte_pfn(old_pte), new_prot);
521 __set_pmd_pte(kpte, address, new_pte); 534 __set_pmd_pte(kpte, address, new_pte);
522 cpa->flags |= CPA_FLUSHTLB; 535 cpa->flags |= CPA_FLUSHTLB;
523 do_split = 0; 536 do_split = 0;
@@ -561,16 +574,35 @@ int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
561#ifdef CONFIG_X86_64 574#ifdef CONFIG_X86_64
562 if (level == PG_LEVEL_1G) { 575 if (level == PG_LEVEL_1G) {
563 pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; 576 pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
564 pgprot_val(ref_prot) |= _PAGE_PSE; 577 /*
578 * Set the PSE flags only if the PRESENT flag is set
579 * otherwise pmd_present/pmd_huge will return true
580 * even on a non present pmd.
581 */
582 if (pgprot_val(ref_prot) & _PAGE_PRESENT)
583 pgprot_val(ref_prot) |= _PAGE_PSE;
584 else
585 pgprot_val(ref_prot) &= ~_PAGE_PSE;
565 } 586 }
566#endif 587#endif
567 588
568 /* 589 /*
590 * Set the GLOBAL flags only if the PRESENT flag is set
591 * otherwise pmd/pte_present will return true even on a non
592 * present pmd/pte. The canon_pgprot will clear _PAGE_GLOBAL
593 * for the ancient hardware that doesn't support it.
594 */
595 if (pgprot_val(ref_prot) & _PAGE_PRESENT)
596 pgprot_val(ref_prot) |= _PAGE_GLOBAL;
597 else
598 pgprot_val(ref_prot) &= ~_PAGE_GLOBAL;
599
600 /*
569 * Get the target pfn from the original entry: 601 * Get the target pfn from the original entry:
570 */ 602 */
571 pfn = pte_pfn(*kpte); 603 pfn = pte_pfn(*kpte);
572 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) 604 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
573 set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); 605 set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot)));
574 606
575 if (pfn_range_is_mapped(PFN_DOWN(__pa(address)), 607 if (pfn_range_is_mapped(PFN_DOWN(__pa(address)),
576 PFN_DOWN(__pa(address)) + 1)) 608 PFN_DOWN(__pa(address)) + 1))
@@ -685,6 +717,18 @@ repeat:
685 new_prot = static_protections(new_prot, address, pfn); 717 new_prot = static_protections(new_prot, address, pfn);
686 718
687 /* 719 /*
720 * Set the GLOBAL flags only if the PRESENT flag is
721 * set otherwise pte_present will return true even on
722 * a non present pte. The canon_pgprot will clear
723 * _PAGE_GLOBAL for the ancient hardware that doesn't
724 * support it.
725 */
726 if (pgprot_val(new_prot) & _PAGE_PRESENT)
727 pgprot_val(new_prot) |= _PAGE_GLOBAL;
728 else
729 pgprot_val(new_prot) &= ~_PAGE_GLOBAL;
730
731 /*
688 * We need to keep the pfn from the existing PTE, 732 * We need to keep the pfn from the existing PTE,
689 * after all we're only going to change it's attributes 733 * after all we're only going to change it's attributes
690 * not the memory it points to 734 * not the memory it points to
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 53ea60458e01..3e724256dbee 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -521,6 +521,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
521 sd = &info->sd; 521 sd = &info->sd;
522 sd->domain = domain; 522 sd->domain = domain;
523 sd->node = node; 523 sd->node = node;
524 sd->acpi = device->handle;
524 /* 525 /*
525 * Maybe the desired pci bus has been already scanned. In such case 526 * Maybe the desired pci bus has been already scanned. In such case
526 * it is unnecessary to scan the pci bus with the given domain,busnum. 527 * it is unnecessary to scan the pci bus with the given domain,busnum.
@@ -592,6 +593,14 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
592 return bus; 593 return bus;
593} 594}
594 595
596int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
597{
598 struct pci_sysdata *sd = bridge->bus->sysdata;
599
600 ACPI_HANDLE_SET(&bridge->dev, sd->acpi);
601 return 0;
602}
603
595int __init pci_acpi_init(void) 604int __init pci_acpi_init(void)
596{ 605{
597 struct pci_dev *dev = NULL; 606 struct pci_dev *dev = NULL;
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index ccd0ab3ab899..901177d75ff5 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -34,7 +34,6 @@ int noioapicreroute = 1;
34#endif 34#endif
35int pcibios_last_bus = -1; 35int pcibios_last_bus = -1;
36unsigned long pirq_table_addr; 36unsigned long pirq_table_addr;
37struct pci_bus *pci_root_bus;
38const struct pci_raw_ops *__read_mostly raw_pci_ops; 37const struct pci_raw_ops *__read_mostly raw_pci_ops;
39const struct pci_raw_ops *__read_mostly raw_pci_ext_ops; 38const struct pci_raw_ops *__read_mostly raw_pci_ext_ops;
40 39
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index dd8ca6f7223b..94919e307f8e 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -51,6 +51,7 @@ struct pcibios_fwaddrmap {
51 51
52static LIST_HEAD(pcibios_fwaddrmappings); 52static LIST_HEAD(pcibios_fwaddrmappings);
53static DEFINE_SPINLOCK(pcibios_fwaddrmap_lock); 53static DEFINE_SPINLOCK(pcibios_fwaddrmap_lock);
54static bool pcibios_fw_addr_done;
54 55
55/* Must be called with 'pcibios_fwaddrmap_lock' lock held. */ 56/* Must be called with 'pcibios_fwaddrmap_lock' lock held. */
56static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev) 57static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev)
@@ -72,6 +73,9 @@ pcibios_save_fw_addr(struct pci_dev *dev, int idx, resource_size_t fw_addr)
72 unsigned long flags; 73 unsigned long flags;
73 struct pcibios_fwaddrmap *map; 74 struct pcibios_fwaddrmap *map;
74 75
76 if (pcibios_fw_addr_done)
77 return;
78
75 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); 79 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
76 map = pcibios_fwaddrmap_lookup(dev); 80 map = pcibios_fwaddrmap_lookup(dev);
77 if (!map) { 81 if (!map) {
@@ -97,6 +101,9 @@ resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx)
97 struct pcibios_fwaddrmap *map; 101 struct pcibios_fwaddrmap *map;
98 resource_size_t fw_addr = 0; 102 resource_size_t fw_addr = 0;
99 103
104 if (pcibios_fw_addr_done)
105 return 0;
106
100 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); 107 spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
101 map = pcibios_fwaddrmap_lookup(dev); 108 map = pcibios_fwaddrmap_lookup(dev);
102 if (map) 109 if (map)
@@ -106,7 +113,7 @@ resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx)
106 return fw_addr; 113 return fw_addr;
107} 114}
108 115
109static void pcibios_fw_addr_list_del(void) 116static void __init pcibios_fw_addr_list_del(void)
110{ 117{
111 unsigned long flags; 118 unsigned long flags;
112 struct pcibios_fwaddrmap *entry, *next; 119 struct pcibios_fwaddrmap *entry, *next;
@@ -118,6 +125,7 @@ static void pcibios_fw_addr_list_del(void)
118 kfree(entry); 125 kfree(entry);
119 } 126 }
120 spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); 127 spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
128 pcibios_fw_addr_done = true;
121} 129}
122 130
123static int 131static int
@@ -193,46 +201,46 @@ EXPORT_SYMBOL(pcibios_align_resource);
193 * as well. 201 * as well.
194 */ 202 */
195 203
196static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) 204static void pcibios_allocate_bridge_resources(struct pci_dev *dev)
197{ 205{
198 struct pci_bus *bus;
199 struct pci_dev *dev;
200 int idx; 206 int idx;
201 struct resource *r; 207 struct resource *r;
202 208
203 /* Depth-First Search on bus tree */ 209 for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
204 list_for_each_entry(bus, bus_list, node) { 210 r = &dev->resource[idx];
205 if ((dev = bus->self)) { 211 if (!r->flags)
206 for (idx = PCI_BRIDGE_RESOURCES; 212 continue;
207 idx < PCI_NUM_RESOURCES; idx++) { 213 if (!r->start || pci_claim_resource(dev, idx) < 0) {
208 r = &dev->resource[idx]; 214 /*
209 if (!r->flags) 215 * Something is wrong with the region.
210 continue; 216 * Invalidate the resource to prevent
211 if (!r->start || 217 * child resource allocations in this
212 pci_claim_resource(dev, idx) < 0) { 218 * range.
213 /* 219 */
214 * Something is wrong with the region. 220 r->start = r->end = 0;
215 * Invalidate the resource to prevent 221 r->flags = 0;
216 * child resource allocations in this
217 * range.
218 */
219 r->start = r->end = 0;
220 r->flags = 0;
221 }
222 }
223 } 222 }
224 pcibios_allocate_bus_resources(&bus->children);
225 } 223 }
226} 224}
227 225
226static void pcibios_allocate_bus_resources(struct pci_bus *bus)
227{
228 struct pci_bus *child;
229
230 /* Depth-First Search on bus tree */
231 if (bus->self)
232 pcibios_allocate_bridge_resources(bus->self);
233 list_for_each_entry(child, &bus->children, node)
234 pcibios_allocate_bus_resources(child);
235}
236
228struct pci_check_idx_range { 237struct pci_check_idx_range {
229 int start; 238 int start;
230 int end; 239 int end;
231}; 240};
232 241
233static void __init pcibios_allocate_resources(int pass) 242static void pcibios_allocate_dev_resources(struct pci_dev *dev, int pass)
234{ 243{
235 struct pci_dev *dev = NULL;
236 int idx, disabled, i; 244 int idx, disabled, i;
237 u16 command; 245 u16 command;
238 struct resource *r; 246 struct resource *r;
@@ -244,14 +252,13 @@ static void __init pcibios_allocate_resources(int pass)
244#endif 252#endif
245 }; 253 };
246 254
247 for_each_pci_dev(dev) { 255 pci_read_config_word(dev, PCI_COMMAND, &command);
248 pci_read_config_word(dev, PCI_COMMAND, &command); 256 for (i = 0; i < ARRAY_SIZE(idx_range); i++)
249 for (i = 0; i < ARRAY_SIZE(idx_range); i++)
250 for (idx = idx_range[i].start; idx <= idx_range[i].end; idx++) { 257 for (idx = idx_range[i].start; idx <= idx_range[i].end; idx++) {
251 r = &dev->resource[idx]; 258 r = &dev->resource[idx];
252 if (r->parent) /* Already allocated */ 259 if (r->parent) /* Already allocated */
253 continue; 260 continue;
254 if (!r->start) /* Address not assigned at all */ 261 if (!r->start) /* Address not assigned at all */
255 continue; 262 continue;
256 if (r->flags & IORESOURCE_IO) 263 if (r->flags & IORESOURCE_IO)
257 disabled = !(command & PCI_COMMAND_IO); 264 disabled = !(command & PCI_COMMAND_IO);
@@ -270,44 +277,74 @@ static void __init pcibios_allocate_resources(int pass)
270 } 277 }
271 } 278 }
272 } 279 }
273 if (!pass) { 280 if (!pass) {
274 r = &dev->resource[PCI_ROM_RESOURCE]; 281 r = &dev->resource[PCI_ROM_RESOURCE];
275 if (r->flags & IORESOURCE_ROM_ENABLE) { 282 if (r->flags & IORESOURCE_ROM_ENABLE) {
276 /* Turn the ROM off, leave the resource region, 283 /* Turn the ROM off, leave the resource region,
277 * but keep it unregistered. */ 284 * but keep it unregistered. */
278 u32 reg; 285 u32 reg;
279 dev_dbg(&dev->dev, "disabling ROM %pR\n", r); 286 dev_dbg(&dev->dev, "disabling ROM %pR\n", r);
280 r->flags &= ~IORESOURCE_ROM_ENABLE; 287 r->flags &= ~IORESOURCE_ROM_ENABLE;
281 pci_read_config_dword(dev, 288 pci_read_config_dword(dev, dev->rom_base_reg, &reg);
282 dev->rom_base_reg, &reg); 289 pci_write_config_dword(dev, dev->rom_base_reg,
283 pci_write_config_dword(dev, dev->rom_base_reg,
284 reg & ~PCI_ROM_ADDRESS_ENABLE); 290 reg & ~PCI_ROM_ADDRESS_ENABLE);
285 }
286 } 291 }
287 } 292 }
288} 293}
289 294
290static int __init pcibios_assign_resources(void) 295static void pcibios_allocate_resources(struct pci_bus *bus, int pass)
296{
297 struct pci_dev *dev;
298 struct pci_bus *child;
299
300 list_for_each_entry(dev, &bus->devices, bus_list) {
301 pcibios_allocate_dev_resources(dev, pass);
302
303 child = dev->subordinate;
304 if (child)
305 pcibios_allocate_resources(child, pass);
306 }
307}
308
309static void pcibios_allocate_dev_rom_resource(struct pci_dev *dev)
291{ 310{
292 struct pci_dev *dev = NULL;
293 struct resource *r; 311 struct resource *r;
294 312
295 if (!(pci_probe & PCI_ASSIGN_ROMS)) { 313 /*
296 /* 314 * Try to use BIOS settings for ROMs, otherwise let
297 * Try to use BIOS settings for ROMs, otherwise let 315 * pci_assign_unassigned_resources() allocate the new
298 * pci_assign_unassigned_resources() allocate the new 316 * addresses.
299 * addresses. 317 */
300 */ 318 r = &dev->resource[PCI_ROM_RESOURCE];
301 for_each_pci_dev(dev) { 319 if (!r->flags || !r->start)
302 r = &dev->resource[PCI_ROM_RESOURCE]; 320 return;
303 if (!r->flags || !r->start) 321
304 continue; 322 if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) {
305 if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) { 323 r->end -= r->start;
306 r->end -= r->start; 324 r->start = 0;
307 r->start = 0;
308 }
309 }
310 } 325 }
326}
327static void pcibios_allocate_rom_resources(struct pci_bus *bus)
328{
329 struct pci_dev *dev;
330 struct pci_bus *child;
331
332 list_for_each_entry(dev, &bus->devices, bus_list) {
333 pcibios_allocate_dev_rom_resource(dev);
334
335 child = dev->subordinate;
336 if (child)
337 pcibios_allocate_rom_resources(child);
338 }
339}
340
341static int __init pcibios_assign_resources(void)
342{
343 struct pci_bus *bus;
344
345 if (!(pci_probe & PCI_ASSIGN_ROMS))
346 list_for_each_entry(bus, &pci_root_buses, node)
347 pcibios_allocate_rom_resources(bus);
311 348
312 pci_assign_unassigned_resources(); 349 pci_assign_unassigned_resources();
313 pcibios_fw_addr_list_del(); 350 pcibios_fw_addr_list_del();
@@ -315,12 +352,32 @@ static int __init pcibios_assign_resources(void)
315 return 0; 352 return 0;
316} 353}
317 354
355void pcibios_resource_survey_bus(struct pci_bus *bus)
356{
357 dev_printk(KERN_DEBUG, &bus->dev, "Allocating resources\n");
358
359 pcibios_allocate_bus_resources(bus);
360
361 pcibios_allocate_resources(bus, 0);
362 pcibios_allocate_resources(bus, 1);
363
364 if (!(pci_probe & PCI_ASSIGN_ROMS))
365 pcibios_allocate_rom_resources(bus);
366}
367
318void __init pcibios_resource_survey(void) 368void __init pcibios_resource_survey(void)
319{ 369{
370 struct pci_bus *bus;
371
320 DBG("PCI: Allocating resources\n"); 372 DBG("PCI: Allocating resources\n");
321 pcibios_allocate_bus_resources(&pci_root_buses); 373
322 pcibios_allocate_resources(0); 374 list_for_each_entry(bus, &pci_root_buses, node)
323 pcibios_allocate_resources(1); 375 pcibios_allocate_bus_resources(bus);
376
377 list_for_each_entry(bus, &pci_root_buses, node)
378 pcibios_allocate_resources(bus, 0);
379 list_for_each_entry(bus, &pci_root_buses, node)
380 pcibios_allocate_resources(bus, 1);
324 381
325 e820_reserve_resources_late(); 382 e820_reserve_resources_late();
326 /* 383 /*
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index 4a2ab9cb3659..4db96fb1c232 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -30,7 +30,7 @@ int __init pci_legacy_init(void)
30 } 30 }
31 31
32 printk("PCI: Probing PCI hardware\n"); 32 printk("PCI: Probing PCI hardware\n");
33 pci_root_bus = pcibios_scan_root(0); 33 pcibios_scan_root(0);
34 return 0; 34 return 0;
35} 35}
36 36
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index b96b14c250b6..72c229f9ebcf 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -152,7 +152,7 @@ int __init pci_numaq_init(void)
152 152
153 raw_pci_ops = &pci_direct_conf1_mq; 153 raw_pci_ops = &pci_direct_conf1_mq;
154 154
155 pci_root_bus = pcibios_scan_root(0); 155 pcibios_scan_root(0);
156 if (num_online_nodes() > 1) 156 if (num_online_nodes() > 1)
157 for_each_online_node(quad) { 157 for_each_online_node(quad) {
158 if (quad == 0) 158 if (quad == 0)
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 70b2a3a305d6..5f2ecaf3f9d8 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -69,11 +69,6 @@ struct efi_memory_map memmap;
69static struct efi efi_phys __initdata; 69static struct efi efi_phys __initdata;
70static efi_system_table_t efi_systab __initdata; 70static efi_system_table_t efi_systab __initdata;
71 71
72static inline bool efi_is_native(void)
73{
74 return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);
75}
76
77unsigned long x86_efi_facility; 72unsigned long x86_efi_facility;
78 73
79/* 74/*
@@ -85,9 +80,10 @@ int efi_enabled(int facility)
85} 80}
86EXPORT_SYMBOL(efi_enabled); 81EXPORT_SYMBOL(efi_enabled);
87 82
83static bool __initdata disable_runtime = false;
88static int __init setup_noefi(char *arg) 84static int __init setup_noefi(char *arg)
89{ 85{
90 clear_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); 86 disable_runtime = true;
91 return 0; 87 return 0;
92} 88}
93early_param("noefi", setup_noefi); 89early_param("noefi", setup_noefi);
@@ -734,7 +730,7 @@ void __init efi_init(void)
734 if (!efi_is_native()) 730 if (!efi_is_native())
735 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); 731 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
736 else { 732 else {
737 if (efi_runtime_init()) 733 if (disable_runtime || efi_runtime_init())
738 return; 734 return;
739 set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); 735 set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
740 } 736 }
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 34bc4cee8887..09ea61d2e02f 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -300,8 +300,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
300 gdt = get_cpu_gdt_table(cpu); 300 gdt = get_cpu_gdt_table(cpu);
301 301
302 ctxt->flags = VGCF_IN_KERNEL; 302 ctxt->flags = VGCF_IN_KERNEL;
303 ctxt->user_regs.ds = __USER_DS;
304 ctxt->user_regs.es = __USER_DS;
305 ctxt->user_regs.ss = __KERNEL_DS; 303 ctxt->user_regs.ss = __KERNEL_DS;
306#ifdef CONFIG_X86_32 304#ifdef CONFIG_X86_32
307 ctxt->user_regs.fs = __KERNEL_PERCPU; 305 ctxt->user_regs.fs = __KERNEL_PERCPU;
@@ -310,35 +308,41 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
310 ctxt->gs_base_kernel = per_cpu_offset(cpu); 308 ctxt->gs_base_kernel = per_cpu_offset(cpu);
311#endif 309#endif
312 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; 310 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
313 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
314 311
315 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); 312 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
316 313
317 xen_copy_trap_info(ctxt->trap_ctxt); 314 {
315 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
316 ctxt->user_regs.ds = __USER_DS;
317 ctxt->user_regs.es = __USER_DS;
318 318
319 ctxt->ldt_ents = 0; 319 xen_copy_trap_info(ctxt->trap_ctxt);
320 320
321 BUG_ON((unsigned long)gdt & ~PAGE_MASK); 321 ctxt->ldt_ents = 0;
322 322
323 gdt_mfn = arbitrary_virt_to_mfn(gdt); 323 BUG_ON((unsigned long)gdt & ~PAGE_MASK);
324 make_lowmem_page_readonly(gdt);
325 make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
326 324
327 ctxt->gdt_frames[0] = gdt_mfn; 325 gdt_mfn = arbitrary_virt_to_mfn(gdt);
328 ctxt->gdt_ents = GDT_ENTRIES; 326 make_lowmem_page_readonly(gdt);
327 make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
329 328
330 ctxt->user_regs.cs = __KERNEL_CS; 329 ctxt->gdt_frames[0] = gdt_mfn;
331 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); 330 ctxt->gdt_ents = GDT_ENTRIES;
332 331
333 ctxt->kernel_ss = __KERNEL_DS; 332 ctxt->kernel_ss = __KERNEL_DS;
334 ctxt->kernel_sp = idle->thread.sp0; 333 ctxt->kernel_sp = idle->thread.sp0;
335 334
336#ifdef CONFIG_X86_32 335#ifdef CONFIG_X86_32
337 ctxt->event_callback_cs = __KERNEL_CS; 336 ctxt->event_callback_cs = __KERNEL_CS;
338 ctxt->failsafe_callback_cs = __KERNEL_CS; 337 ctxt->failsafe_callback_cs = __KERNEL_CS;
339#endif 338#endif
340 ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; 339 ctxt->event_callback_eip =
341 ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; 340 (unsigned long)xen_hypervisor_callback;
341 ctxt->failsafe_callback_eip =
342 (unsigned long)xen_failsafe_callback;
343 }
344 ctxt->user_regs.cs = __KERNEL_CS;
345 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
342 346
343 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); 347 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
344 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); 348 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 83e866d714ce..f7a080ef0354 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -328,7 +328,6 @@ static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
328 if (per_cpu(lock_spinners, cpu) == xl) { 328 if (per_cpu(lock_spinners, cpu) == xl) {
329 ADD_STATS(released_slow_kicked, 1); 329 ADD_STATS(released_slow_kicked, 1);
330 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); 330 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
331 break;
332 } 331 }
333 } 332 }
334} 333}