aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig6
-rw-r--r--arch/x86/Makefile9
-rw-r--r--arch/x86/boot/code16gcc.h24
-rw-r--r--arch/x86/boot/compressed/Makefile3
-rw-r--r--arch/x86/boot/compressed/eboot.c48
-rw-r--r--arch/x86/boot/compressed/eboot.h16
-rw-r--r--arch/x86/boot/header.S2
-rw-r--r--arch/x86/crypto/Makefile4
-rw-r--r--arch/x86/crypto/aes_ctrby8_avx-x86_64.S546
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c40
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S281
-rw-r--r--arch/x86/crypto/des3_ede-asm_64.S805
-rw-r--r--arch/x86/crypto/des3_ede_glue.c509
-rw-r--r--arch/x86/include/asm/apic.h2
-rw-r--r--arch/x86/include/asm/barrier.h2
-rw-r--r--arch/x86/include/asm/cmpxchg.h4
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h2
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h409
-rw-r--r--arch/x86/include/asm/efi.h33
-rw-r--r--arch/x86/include/asm/fpu-internal.h2
-rw-r--r--arch/x86/include/asm/ftrace.h2
-rw-r--r--arch/x86/include/asm/irqflags.h2
-rw-r--r--arch/x86/include/asm/kvm_emulate.h33
-rw-r--r--arch/x86/include/asm/kvm_host.h15
-rw-r--r--arch/x86/include/asm/mc146818rtc.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h6
-rw-r--r--arch/x86/include/asm/mutex_32.h16
-rw-r--r--arch/x86/include/asm/mwait.h2
-rw-r--r--arch/x86/include/asm/percpu.h3
-rw-r--r--arch/x86/include/asm/pmc_atom.h107
-rw-r--r--arch/x86/include/asm/processor.h3
-rw-r--r--arch/x86/include/asm/qrwlock.h2
-rw-r--r--arch/x86/include/asm/vga.h6
-rw-r--r--arch/x86/include/asm/vmx.h7
-rw-r--r--arch/x86/include/uapi/asm/Kbuild1
-rw-r--r--arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--arch/x86/include/uapi/asm/kvm_perf.h16
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h3
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/cpu/amd.c348
-rw-r--r--arch/x86/kernel/cpu/common.c22
-rw-r--r--arch/x86/kernel/cpu/intel.c52
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c12
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh51
-rw-r--r--arch/x86/kernel/cpu/perf_event.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event.h12
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_uncore.c111
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c69
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c16
-rw-r--r--arch/x86/kernel/cpu/proc.c8
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/entry_32.S18
-rw-r--r--arch/x86/kernel/entry_64.S80
-rw-r--r--arch/x86/kernel/ftrace.c3
-rw-r--r--arch/x86/kernel/kprobes/core.c3
-rw-r--r--arch/x86/kernel/mcount_64.S13
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c2
-rw-r--r--arch/x86/kernel/pmc_atom.c321
-rw-r--r--arch/x86/kernel/reboot.c24
-rw-r--r--arch/x86/kernel/resource.c8
-rw-r--r--arch/x86/kernel/setup.c4
-rw-r--r--arch/x86/kernel/tsc.c7
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/emulate.c494
-rw-r--r--arch/x86/kvm/lapic.c4
-rw-r--r--arch/x86/kvm/mmutrace.h4
-rw-r--r--arch/x86/kvm/pmu.c9
-rw-r--r--arch/x86/kvm/svm.c57
-rw-r--r--arch/x86/kvm/trace.h6
-rw-r--r--arch/x86/kvm/vmx.c239
-rw-r--r--arch/x86/kvm/x86.c183
-rw-r--r--arch/x86/kvm/x86.h27
-rw-r--r--arch/x86/mm/fault.c6
-rw-r--r--arch/x86/mm/init.c7
-rw-r--r--arch/x86/mm/tlb.c103
-rw-r--r--arch/x86/pci/fixup.c21
-rw-r--r--arch/x86/pci/i386.c4
-rw-r--r--arch/x86/platform/efi/Makefile2
-rw-r--r--arch/x86/platform/efi/efi.c483
-rw-r--r--arch/x86/platform/efi/quirks.c290
-rw-r--r--arch/x86/platform/ts5500/ts5500.c94
-rw-r--r--arch/x86/power/cpu.c4
-rw-r--r--arch/x86/um/asm/processor.h3
-rw-r--r--arch/x86/xen/Makefile1
-rw-r--r--arch/x86/xen/efi.c43
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--arch/x86/xen/grant-table.c148
-rw-r--r--arch/x86/xen/xen-ops.h8
91 files changed, 4612 insertions, 1815 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 43873442dee1..6b71f0417293 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -54,7 +54,6 @@ config X86
54 select HAVE_FUNCTION_TRACER 54 select HAVE_FUNCTION_TRACER
55 select HAVE_FUNCTION_GRAPH_TRACER 55 select HAVE_FUNCTION_GRAPH_TRACER
56 select HAVE_FUNCTION_GRAPH_FP_TEST 56 select HAVE_FUNCTION_GRAPH_FP_TEST
57 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
58 select HAVE_SYSCALL_TRACEPOINTS 57 select HAVE_SYSCALL_TRACEPOINTS
59 select SYSCTL_EXCEPTION_TRACE 58 select SYSCTL_EXCEPTION_TRACE
60 select HAVE_KVM 59 select HAVE_KVM
@@ -1525,6 +1524,7 @@ config EFI
1525 bool "EFI runtime service support" 1524 bool "EFI runtime service support"
1526 depends on ACPI 1525 depends on ACPI
1527 select UCS2_STRING 1526 select UCS2_STRING
1527 select EFI_RUNTIME_WRAPPERS
1528 ---help--- 1528 ---help---
1529 This enables the kernel to use EFI runtime services that are 1529 This enables the kernel to use EFI runtime services that are
1530 available (such as the EFI variable services). 1530 available (such as the EFI variable services).
@@ -2406,6 +2406,10 @@ config IOSF_MBI
2406 default m 2406 default m
2407 depends on PCI 2407 depends on PCI
2408 2408
2409config PMC_ATOM
2410 def_bool y
2411 depends on PCI
2412
2409source "net/Kconfig" 2413source "net/Kconfig"
2410 2414
2411source "drivers/Kconfig" 2415source "drivers/Kconfig"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 33f71b01fd22..c65fd9650467 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -15,12 +15,9 @@ endif
15# that way we can complain to the user if the CPU is insufficient. 15# that way we can complain to the user if the CPU is insufficient.
16# 16#
17# The -m16 option is supported by GCC >= 4.9 and clang >= 3.5. For 17# The -m16 option is supported by GCC >= 4.9 and clang >= 3.5. For
18# older versions of GCC, we need to play evil and unreliable tricks to 18# older versions of GCC, include an *assembly* header to make sure that
19# attempt to ensure that our asm(".code16gcc") is first in the asm 19# gcc doesn't play any games behind our back.
20# output. 20CODE16GCC_CFLAGS := -m32 -Wa,$(srctree)/arch/x86/boot/code16gcc.h
21CODE16GCC_CFLAGS := -m32 -include $(srctree)/arch/x86/boot/code16gcc.h \
22 $(call cc-option, -fno-toplevel-reorder,\
23 $(call cc-option, -fno-unit-at-a-time))
24M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS)) 21M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS))
25 22
26REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \ 23REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \
diff --git a/arch/x86/boot/code16gcc.h b/arch/x86/boot/code16gcc.h
index d93e48010b61..5ff426535397 100644
--- a/arch/x86/boot/code16gcc.h
+++ b/arch/x86/boot/code16gcc.h
@@ -1,15 +1,11 @@
1/* 1#
2 * code16gcc.h 2# code16gcc.h
3 * 3#
4 * This file is -include'd when compiling 16-bit C code. 4# This file is added to the assembler via -Wa when compiling 16-bit C code.
5 * Note: this asm() needs to be emitted before gcc emits any code. 5# This is done this way instead via asm() to make sure gcc does not reorder
6 * Depending on gcc version, this requires -fno-unit-at-a-time or 6# things around us.
7 * -fno-toplevel-reorder. 7#
8 * 8# gcc 4.9+ has a real -m16 option so we can drop this hack long term.
9 * Hopefully gcc will eventually have a real -m16 option so we can 9#
10 * drop this hack long term.
11 */
12 10
13#ifndef __ASSEMBLY__ 11 .code16gcc
14asm(".code16gcc");
15#endif
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 0fcd9133790c..7a801a310e37 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -33,7 +33,8 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
33$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone 33$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
34 34
35ifeq ($(CONFIG_EFI_STUB), y) 35ifeq ($(CONFIG_EFI_STUB), y)
36 VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o 36 VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
37 $(objtree)/drivers/firmware/efi/libstub/lib.a
37endif 38endif
38 39
39$(obj)/vmlinux: $(VMLINUX_OBJS) FORCE 40$(obj)/vmlinux: $(VMLINUX_OBJS) FORCE
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 0331d765c2bb..f277184e2ac1 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -19,10 +19,7 @@
19 19
20static efi_system_table_t *sys_table; 20static efi_system_table_t *sys_table;
21 21
22static struct efi_config *efi_early; 22struct efi_config *efi_early;
23
24#define efi_call_early(f, ...) \
25 efi_early->call(efi_early->f, __VA_ARGS__);
26 23
27#define BOOT_SERVICES(bits) \ 24#define BOOT_SERVICES(bits) \
28static void setup_boot_services##bits(struct efi_config *c) \ 25static void setup_boot_services##bits(struct efi_config *c) \
@@ -48,8 +45,7 @@ static void setup_boot_services##bits(struct efi_config *c) \
48BOOT_SERVICES(32); 45BOOT_SERVICES(32);
49BOOT_SERVICES(64); 46BOOT_SERVICES(64);
50 47
51static void efi_printk(efi_system_table_t *, char *); 48void efi_char16_printk(efi_system_table_t *, efi_char16_t *);
52static void efi_char16_printk(efi_system_table_t *, efi_char16_t *);
53 49
54static efi_status_t 50static efi_status_t
55__file_size32(void *__fh, efi_char16_t *filename_16, 51__file_size32(void *__fh, efi_char16_t *filename_16,
@@ -156,7 +152,7 @@ grow:
156 152
157 return status; 153 return status;
158} 154}
159static efi_status_t 155efi_status_t
160efi_file_size(efi_system_table_t *sys_table, void *__fh, 156efi_file_size(efi_system_table_t *sys_table, void *__fh,
161 efi_char16_t *filename_16, void **handle, u64 *file_sz) 157 efi_char16_t *filename_16, void **handle, u64 *file_sz)
162{ 158{
@@ -166,7 +162,7 @@ efi_file_size(efi_system_table_t *sys_table, void *__fh,
166 return __file_size32(__fh, filename_16, handle, file_sz); 162 return __file_size32(__fh, filename_16, handle, file_sz);
167} 163}
168 164
169static inline efi_status_t 165efi_status_t
170efi_file_read(void *handle, unsigned long *size, void *addr) 166efi_file_read(void *handle, unsigned long *size, void *addr)
171{ 167{
172 unsigned long func; 168 unsigned long func;
@@ -184,7 +180,7 @@ efi_file_read(void *handle, unsigned long *size, void *addr)
184 } 180 }
185} 181}
186 182
187static inline efi_status_t efi_file_close(void *handle) 183efi_status_t efi_file_close(void *handle)
188{ 184{
189 if (efi_early->is64) { 185 if (efi_early->is64) {
190 efi_file_handle_64_t *fh = handle; 186 efi_file_handle_64_t *fh = handle;
@@ -249,7 +245,7 @@ static inline efi_status_t __open_volume64(void *__image, void **__fh)
249 return status; 245 return status;
250} 246}
251 247
252static inline efi_status_t 248efi_status_t
253efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh) 249efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh)
254{ 250{
255 if (efi_early->is64) 251 if (efi_early->is64)
@@ -258,7 +254,7 @@ efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh)
258 return __open_volume32(__image, __fh); 254 return __open_volume32(__image, __fh);
259} 255}
260 256
261static void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) 257void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
262{ 258{
263 unsigned long output_string; 259 unsigned long output_string;
264 size_t offset; 260 size_t offset;
@@ -284,8 +280,6 @@ static void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
284 } 280 }
285} 281}
286 282
287#include "../../../../drivers/firmware/efi/efi-stub-helper.c"
288
289static void find_bits(unsigned long mask, u8 *pos, u8 *size) 283static void find_bits(unsigned long mask, u8 *pos, u8 *size)
290{ 284{
291 u8 first, len; 285 u8 first, len;
@@ -1038,6 +1032,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
1038 int i; 1032 int i;
1039 unsigned long ramdisk_addr; 1033 unsigned long ramdisk_addr;
1040 unsigned long ramdisk_size; 1034 unsigned long ramdisk_size;
1035 unsigned long initrd_addr_max;
1041 1036
1042 efi_early = c; 1037 efi_early = c;
1043 sys_table = (efi_system_table_t *)(unsigned long)efi_early->table; 1038 sys_table = (efi_system_table_t *)(unsigned long)efi_early->table;
@@ -1100,14 +1095,21 @@ struct boot_params *make_boot_params(struct efi_config *c)
1100 1095
1101 memset(sdt, 0, sizeof(*sdt)); 1096 memset(sdt, 0, sizeof(*sdt));
1102 1097
1098 if (hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G)
1099 initrd_addr_max = -1UL;
1100 else
1101 initrd_addr_max = hdr->initrd_addr_max;
1102
1103 status = handle_cmdline_files(sys_table, image, 1103 status = handle_cmdline_files(sys_table, image,
1104 (char *)(unsigned long)hdr->cmd_line_ptr, 1104 (char *)(unsigned long)hdr->cmd_line_ptr,
1105 "initrd=", hdr->initrd_addr_max, 1105 "initrd=", initrd_addr_max,
1106 &ramdisk_addr, &ramdisk_size); 1106 &ramdisk_addr, &ramdisk_size);
1107 if (status != EFI_SUCCESS) 1107 if (status != EFI_SUCCESS)
1108 goto fail2; 1108 goto fail2;
1109 hdr->ramdisk_image = ramdisk_addr; 1109 hdr->ramdisk_image = ramdisk_addr & 0xffffffff;
1110 hdr->ramdisk_size = ramdisk_size; 1110 hdr->ramdisk_size = ramdisk_size & 0xffffffff;
1111 boot_params->ext_ramdisk_image = (u64)ramdisk_addr >> 32;
1112 boot_params->ext_ramdisk_size = (u64)ramdisk_size >> 32;
1111 1113
1112 return boot_params; 1114 return boot_params;
1113fail2: 1115fail2:
@@ -1374,7 +1376,10 @@ struct boot_params *efi_main(struct efi_config *c,
1374 1376
1375 setup_graphics(boot_params); 1377 setup_graphics(boot_params);
1376 1378
1377 setup_efi_pci(boot_params); 1379 status = setup_efi_pci(boot_params);
1380 if (status != EFI_SUCCESS) {
1381 efi_printk(sys_table, "setup_efi_pci() failed!\n");
1382 }
1378 1383
1379 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, 1384 status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
1380 sizeof(*gdt), (void **)&gdt); 1385 sizeof(*gdt), (void **)&gdt);
@@ -1401,16 +1406,20 @@ struct boot_params *efi_main(struct efi_config *c,
1401 hdr->init_size, hdr->init_size, 1406 hdr->init_size, hdr->init_size,
1402 hdr->pref_address, 1407 hdr->pref_address,
1403 hdr->kernel_alignment); 1408 hdr->kernel_alignment);
1404 if (status != EFI_SUCCESS) 1409 if (status != EFI_SUCCESS) {
1410 efi_printk(sys_table, "efi_relocate_kernel() failed!\n");
1405 goto fail; 1411 goto fail;
1412 }
1406 1413
1407 hdr->pref_address = hdr->code32_start; 1414 hdr->pref_address = hdr->code32_start;
1408 hdr->code32_start = bzimage_addr; 1415 hdr->code32_start = bzimage_addr;
1409 } 1416 }
1410 1417
1411 status = exit_boot(boot_params, handle, is64); 1418 status = exit_boot(boot_params, handle, is64);
1412 if (status != EFI_SUCCESS) 1419 if (status != EFI_SUCCESS) {
1420 efi_printk(sys_table, "exit_boot() failed!\n");
1413 goto fail; 1421 goto fail;
1422 }
1414 1423
1415 memset((char *)gdt->address, 0x0, gdt->size); 1424 memset((char *)gdt->address, 0x0, gdt->size);
1416 desc = (struct desc_struct *)gdt->address; 1425 desc = (struct desc_struct *)gdt->address;
@@ -1470,5 +1479,6 @@ struct boot_params *efi_main(struct efi_config *c,
1470 1479
1471 return boot_params; 1480 return boot_params;
1472fail: 1481fail:
1482 efi_printk(sys_table, "efi_main() failed!\n");
1473 return NULL; 1483 return NULL;
1474} 1484}
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index c88c31ecad12..d487e727f1ec 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -103,20 +103,4 @@ struct efi_uga_draw_protocol {
103 void *blt; 103 void *blt;
104}; 104};
105 105
106struct efi_config {
107 u64 image_handle;
108 u64 table;
109 u64 allocate_pool;
110 u64 allocate_pages;
111 u64 get_memory_map;
112 u64 free_pool;
113 u64 free_pages;
114 u64 locate_handle;
115 u64 handle_protocol;
116 u64 exit_boot_services;
117 u64 text_output;
118 efi_status_t (*call)(unsigned long, ...);
119 bool is64;
120} __packed;
121
122#endif /* BOOT_COMPRESSED_EBOOT_H */ 106#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 7a6d43a554d7..16ef02596db2 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -154,7 +154,7 @@ extra_header_fields:
154#else 154#else
155 .quad 0 # ImageBase 155 .quad 0 # ImageBase
156#endif 156#endif
157 .long 0x20 # SectionAlignment 157 .long CONFIG_PHYSICAL_ALIGN # SectionAlignment
158 .long 0x20 # FileAlignment 158 .long 0x20 # FileAlignment
159 .word 0 # MajorOperatingSystemVersion 159 .word 0 # MajorOperatingSystemVersion
160 .word 0 # MinorOperatingSystemVersion 160 .word 0 # MinorOperatingSystemVersion
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 61d6e281898b..d551165a3159 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
14obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o 14obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
15 15
16obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o 16obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
17obj-$(CONFIG_CRYPTO_DES3_EDE_X86_64) += des3_ede-x86_64.o
17obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o 18obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
18obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o 19obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
19obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o 20obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
@@ -52,6 +53,7 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
52serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o 53serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
53 54
54aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o 55aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
56des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o
55camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o 57camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
56blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o 58blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
57twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o 59twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
@@ -76,7 +78,7 @@ ifeq ($(avx2_supported),yes)
76endif 78endif
77 79
78aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o 80aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
79aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o 81aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
80ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o 82ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
81sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o 83sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
82ifeq ($(avx2_supported),yes) 84ifeq ($(avx2_supported),yes)
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
new file mode 100644
index 000000000000..f091f122ed24
--- /dev/null
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -0,0 +1,546 @@
1/*
2 * Implement AES CTR mode by8 optimization with AVX instructions. (x86_64)
3 *
4 * This is AES128/192/256 CTR mode optimization implementation. It requires
5 * the support of Intel(R) AESNI and AVX instructions.
6 *
7 * This work was inspired by the AES CTR mode optimization published
8 * in Intel Optimized IPSEC Cryptograhpic library.
9 * Additional information on it can be found at:
10 * http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972
11 *
12 * This file is provided under a dual BSD/GPLv2 license. When using or
13 * redistributing this file, you may do so under either license.
14 *
15 * GPL LICENSE SUMMARY
16 *
17 * Copyright(c) 2014 Intel Corporation.
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of version 2 of the GNU General Public License as
21 * published by the Free Software Foundation.
22 *
23 * This program is distributed in the hope that it will be useful, but
24 * WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 * General Public License for more details.
27 *
28 * Contact Information:
29 * James Guilford <james.guilford@intel.com>
30 * Sean Gulley <sean.m.gulley@intel.com>
31 * Chandramouli Narayanan <mouli@linux.intel.com>
32 *
33 * BSD LICENSE
34 *
35 * Copyright(c) 2014 Intel Corporation.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 *
41 * Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in
45 * the documentation and/or other materials provided with the
46 * distribution.
47 * Neither the name of Intel Corporation nor the names of its
48 * contributors may be used to endorse or promote products derived
49 * from this software without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
52 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
53 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
54 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
55 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
56 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
57 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
58 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
59 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
60 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
61 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
62 *
63 */
64
65#include <linux/linkage.h>
66#include <asm/inst.h>
67
68#define CONCAT(a,b) a##b
69#define VMOVDQ vmovdqu
70
71#define xdata0 %xmm0
72#define xdata1 %xmm1
73#define xdata2 %xmm2
74#define xdata3 %xmm3
75#define xdata4 %xmm4
76#define xdata5 %xmm5
77#define xdata6 %xmm6
78#define xdata7 %xmm7
79#define xcounter %xmm8
80#define xbyteswap %xmm9
81#define xkey0 %xmm10
82#define xkey3 %xmm11
83#define xkey6 %xmm12
84#define xkey9 %xmm13
85#define xkey4 %xmm11
86#define xkey8 %xmm12
87#define xkey12 %xmm13
88#define xkeyA %xmm14
89#define xkeyB %xmm15
90
91#define p_in %rdi
92#define p_iv %rsi
93#define p_keys %rdx
94#define p_out %rcx
95#define num_bytes %r8
96
97#define tmp %r10
98#define DDQ(i) CONCAT(ddq_add_,i)
99#define XMM(i) CONCAT(%xmm, i)
100#define DDQ_DATA 0
101#define XDATA 1
102#define KEY_128 1
103#define KEY_192 2
104#define KEY_256 3
105
106.section .rodata
107.align 16
108
109byteswap_const:
110 .octa 0x000102030405060708090A0B0C0D0E0F
111ddq_add_1:
112 .octa 0x00000000000000000000000000000001
113ddq_add_2:
114 .octa 0x00000000000000000000000000000002
115ddq_add_3:
116 .octa 0x00000000000000000000000000000003
117ddq_add_4:
118 .octa 0x00000000000000000000000000000004
119ddq_add_5:
120 .octa 0x00000000000000000000000000000005
121ddq_add_6:
122 .octa 0x00000000000000000000000000000006
123ddq_add_7:
124 .octa 0x00000000000000000000000000000007
125ddq_add_8:
126 .octa 0x00000000000000000000000000000008
127
128.text
129
130/* generate a unique variable for ddq_add_x */
131
132.macro setddq n
133 var_ddq_add = DDQ(\n)
134.endm
135
136/* generate a unique variable for xmm register */
137.macro setxdata n
138 var_xdata = XMM(\n)
139.endm
140
141/* club the numeric 'id' to the symbol 'name' */
142
143.macro club name, id
144.altmacro
145 .if \name == DDQ_DATA
146 setddq %\id
147 .elseif \name == XDATA
148 setxdata %\id
149 .endif
150.noaltmacro
151.endm
152
153/*
154 * do_aes num_in_par load_keys key_len
155 * This increments p_in, but not p_out
156 */
157.macro do_aes b, k, key_len
158 .set by, \b
159 .set load_keys, \k
160 .set klen, \key_len
161
162 .if (load_keys)
163 vmovdqa 0*16(p_keys), xkey0
164 .endif
165
166 vpshufb xbyteswap, xcounter, xdata0
167
168 .set i, 1
169 .rept (by - 1)
170 club DDQ_DATA, i
171 club XDATA, i
172 vpaddd var_ddq_add(%rip), xcounter, var_xdata
173 vpshufb xbyteswap, var_xdata, var_xdata
174 .set i, (i +1)
175 .endr
176
177 vmovdqa 1*16(p_keys), xkeyA
178
179 vpxor xkey0, xdata0, xdata0
180 club DDQ_DATA, by
181 vpaddd var_ddq_add(%rip), xcounter, xcounter
182
183 .set i, 1
184 .rept (by - 1)
185 club XDATA, i
186 vpxor xkey0, var_xdata, var_xdata
187 .set i, (i +1)
188 .endr
189
190 vmovdqa 2*16(p_keys), xkeyB
191
192 .set i, 0
193 .rept by
194 club XDATA, i
195 vaesenc xkeyA, var_xdata, var_xdata /* key 1 */
196 .set i, (i +1)
197 .endr
198
199 .if (klen == KEY_128)
200 .if (load_keys)
201 vmovdqa 3*16(p_keys), xkeyA
202 .endif
203 .else
204 vmovdqa 3*16(p_keys), xkeyA
205 .endif
206
207 .set i, 0
208 .rept by
209 club XDATA, i
210 vaesenc xkeyB, var_xdata, var_xdata /* key 2 */
211 .set i, (i +1)
212 .endr
213
214 add $(16*by), p_in
215
216 .if (klen == KEY_128)
217 vmovdqa 4*16(p_keys), xkey4
218 .else
219 .if (load_keys)
220 vmovdqa 4*16(p_keys), xkey4
221 .endif
222 .endif
223
224 .set i, 0
225 .rept by
226 club XDATA, i
227 vaesenc xkeyA, var_xdata, var_xdata /* key 3 */
228 .set i, (i +1)
229 .endr
230
231 vmovdqa 5*16(p_keys), xkeyA
232
233 .set i, 0
234 .rept by
235 club XDATA, i
236 vaesenc xkey4, var_xdata, var_xdata /* key 4 */
237 .set i, (i +1)
238 .endr
239
240 .if (klen == KEY_128)
241 .if (load_keys)
242 vmovdqa 6*16(p_keys), xkeyB
243 .endif
244 .else
245 vmovdqa 6*16(p_keys), xkeyB
246 .endif
247
248 .set i, 0
249 .rept by
250 club XDATA, i
251 vaesenc xkeyA, var_xdata, var_xdata /* key 5 */
252 .set i, (i +1)
253 .endr
254
255 vmovdqa 7*16(p_keys), xkeyA
256
257 .set i, 0
258 .rept by
259 club XDATA, i
260 vaesenc xkeyB, var_xdata, var_xdata /* key 6 */
261 .set i, (i +1)
262 .endr
263
264 .if (klen == KEY_128)
265 vmovdqa 8*16(p_keys), xkey8
266 .else
267 .if (load_keys)
268 vmovdqa 8*16(p_keys), xkey8
269 .endif
270 .endif
271
272 .set i, 0
273 .rept by
274 club XDATA, i
275 vaesenc xkeyA, var_xdata, var_xdata /* key 7 */
276 .set i, (i +1)
277 .endr
278
279 .if (klen == KEY_128)
280 .if (load_keys)
281 vmovdqa 9*16(p_keys), xkeyA
282 .endif
283 .else
284 vmovdqa 9*16(p_keys), xkeyA
285 .endif
286
287 .set i, 0
288 .rept by
289 club XDATA, i
290 vaesenc xkey8, var_xdata, var_xdata /* key 8 */
291 .set i, (i +1)
292 .endr
293
294 vmovdqa 10*16(p_keys), xkeyB
295
296 .set i, 0
297 .rept by
298 club XDATA, i
299 vaesenc xkeyA, var_xdata, var_xdata /* key 9 */
300 .set i, (i +1)
301 .endr
302
303 .if (klen != KEY_128)
304 vmovdqa 11*16(p_keys), xkeyA
305 .endif
306
307 .set i, 0
308 .rept by
309 club XDATA, i
310 /* key 10 */
311 .if (klen == KEY_128)
312 vaesenclast xkeyB, var_xdata, var_xdata
313 .else
314 vaesenc xkeyB, var_xdata, var_xdata
315 .endif
316 .set i, (i +1)
317 .endr
318
319 .if (klen != KEY_128)
320 .if (load_keys)
321 vmovdqa 12*16(p_keys), xkey12
322 .endif
323
324 .set i, 0
325 .rept by
326 club XDATA, i
327 vaesenc xkeyA, var_xdata, var_xdata /* key 11 */
328 .set i, (i +1)
329 .endr
330
331 .if (klen == KEY_256)
332 vmovdqa 13*16(p_keys), xkeyA
333 .endif
334
335 .set i, 0
336 .rept by
337 club XDATA, i
338 .if (klen == KEY_256)
339 /* key 12 */
340 vaesenc xkey12, var_xdata, var_xdata
341 .else
342 vaesenclast xkey12, var_xdata, var_xdata
343 .endif
344 .set i, (i +1)
345 .endr
346
347 .if (klen == KEY_256)
348 vmovdqa 14*16(p_keys), xkeyB
349
350 .set i, 0
351 .rept by
352 club XDATA, i
353 /* key 13 */
354 vaesenc xkeyA, var_xdata, var_xdata
355 .set i, (i +1)
356 .endr
357
358 .set i, 0
359 .rept by
360 club XDATA, i
361 /* key 14 */
362 vaesenclast xkeyB, var_xdata, var_xdata
363 .set i, (i +1)
364 .endr
365 .endif
366 .endif
367
368 .set i, 0
369 .rept (by / 2)
370 .set j, (i+1)
371 VMOVDQ (i*16 - 16*by)(p_in), xkeyA
372 VMOVDQ (j*16 - 16*by)(p_in), xkeyB
373 club XDATA, i
374 vpxor xkeyA, var_xdata, var_xdata
375 club XDATA, j
376 vpxor xkeyB, var_xdata, var_xdata
377 .set i, (i+2)
378 .endr
379
380 .if (i < by)
381 VMOVDQ (i*16 - 16*by)(p_in), xkeyA
382 club XDATA, i
383 vpxor xkeyA, var_xdata, var_xdata
384 .endif
385
386 .set i, 0
387 .rept by
388 club XDATA, i
389 VMOVDQ var_xdata, i*16(p_out)
390 .set i, (i+1)
391 .endr
392.endm
393
394.macro do_aes_load val, key_len
395 do_aes \val, 1, \key_len
396.endm
397
398.macro do_aes_noload val, key_len
399 do_aes \val, 0, \key_len
400.endm
401
402/* main body of aes ctr load */
403
404.macro do_aes_ctrmain key_len
405
406 cmp $16, num_bytes
407 jb .Ldo_return2\key_len
408
409 vmovdqa byteswap_const(%rip), xbyteswap
410 vmovdqu (p_iv), xcounter
411 vpshufb xbyteswap, xcounter, xcounter
412
413 mov num_bytes, tmp
414 and $(7*16), tmp
415 jz .Lmult_of_8_blks\key_len
416
417 /* 1 <= tmp <= 7 */
418 cmp $(4*16), tmp
419 jg .Lgt4\key_len
420 je .Leq4\key_len
421
422.Llt4\key_len:
423 cmp $(2*16), tmp
424 jg .Leq3\key_len
425 je .Leq2\key_len
426
427.Leq1\key_len:
428 do_aes_load 1, \key_len
429 add $(1*16), p_out
430 and $(~7*16), num_bytes
431 jz .Ldo_return2\key_len
432 jmp .Lmain_loop2\key_len
433
434.Leq2\key_len:
435 do_aes_load 2, \key_len
436 add $(2*16), p_out
437 and $(~7*16), num_bytes
438 jz .Ldo_return2\key_len
439 jmp .Lmain_loop2\key_len
440
441
442.Leq3\key_len:
443 do_aes_load 3, \key_len
444 add $(3*16), p_out
445 and $(~7*16), num_bytes
446 jz .Ldo_return2\key_len
447 jmp .Lmain_loop2\key_len
448
449.Leq4\key_len:
450 do_aes_load 4, \key_len
451 add $(4*16), p_out
452 and $(~7*16), num_bytes
453 jz .Ldo_return2\key_len
454 jmp .Lmain_loop2\key_len
455
456.Lgt4\key_len:
457 cmp $(6*16), tmp
458 jg .Leq7\key_len
459 je .Leq6\key_len
460
461.Leq5\key_len:
462 do_aes_load 5, \key_len
463 add $(5*16), p_out
464 and $(~7*16), num_bytes
465 jz .Ldo_return2\key_len
466 jmp .Lmain_loop2\key_len
467
468.Leq6\key_len:
469 do_aes_load 6, \key_len
470 add $(6*16), p_out
471 and $(~7*16), num_bytes
472 jz .Ldo_return2\key_len
473 jmp .Lmain_loop2\key_len
474
475.Leq7\key_len:
476 do_aes_load 7, \key_len
477 add $(7*16), p_out
478 and $(~7*16), num_bytes
479 jz .Ldo_return2\key_len
480 jmp .Lmain_loop2\key_len
481
482.Lmult_of_8_blks\key_len:
483 .if (\key_len != KEY_128)
484 vmovdqa 0*16(p_keys), xkey0
485 vmovdqa 4*16(p_keys), xkey4
486 vmovdqa 8*16(p_keys), xkey8
487 vmovdqa 12*16(p_keys), xkey12
488 .else
489 vmovdqa 0*16(p_keys), xkey0
490 vmovdqa 3*16(p_keys), xkey4
491 vmovdqa 6*16(p_keys), xkey8
492 vmovdqa 9*16(p_keys), xkey12
493 .endif
494.align 16
495.Lmain_loop2\key_len:
496 /* num_bytes is a multiple of 8 and >0 */
497 do_aes_noload 8, \key_len
498 add $(8*16), p_out
499 sub $(8*16), num_bytes
500 jne .Lmain_loop2\key_len
501
502.Ldo_return2\key_len:
503 /* return updated IV */
504 vpshufb xbyteswap, xcounter, xcounter
505 vmovdqu xcounter, (p_iv)
506 ret
507.endm
508
509/*
510 * routine to do AES128 CTR enc/decrypt "by8"
511 * XMM registers are clobbered.
512 * Saving/restoring must be done at a higher level
513 * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out,
514 * unsigned int num_bytes)
515 */
516ENTRY(aes_ctr_enc_128_avx_by8)
517 /* call the aes main loop */
518 do_aes_ctrmain KEY_128
519
520ENDPROC(aes_ctr_enc_128_avx_by8)
521
522/*
523 * routine to do AES192 CTR enc/decrypt "by8"
524 * XMM registers are clobbered.
525 * Saving/restoring must be done at a higher level
526 * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out,
527 * unsigned int num_bytes)
528 */
529ENTRY(aes_ctr_enc_192_avx_by8)
530 /* call the aes main loop */
531 do_aes_ctrmain KEY_192
532
533ENDPROC(aes_ctr_enc_192_avx_by8)
534
535/*
536 * routine to do AES256 CTR enc/decrypt "by8"
537 * XMM registers are clobbered.
538 * Saving/restoring must be done at a higher level
539 * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out,
540 * unsigned int num_bytes)
541 */
542ENTRY(aes_ctr_enc_256_avx_by8)
543 /* call the aes main loop */
544 do_aes_ctrmain KEY_256
545
546ENDPROC(aes_ctr_enc_256_avx_by8)
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 948ad0e77741..888950f29fd9 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -105,6 +105,9 @@ void crypto_fpu_exit(void);
105#define AVX_GEN4_OPTSIZE 4096 105#define AVX_GEN4_OPTSIZE 4096
106 106
107#ifdef CONFIG_X86_64 107#ifdef CONFIG_X86_64
108
109static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out,
110 const u8 *in, unsigned int len, u8 *iv);
108asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, 111asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
109 const u8 *in, unsigned int len, u8 *iv); 112 const u8 *in, unsigned int len, u8 *iv);
110 113
@@ -155,6 +158,12 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
155 158
156 159
157#ifdef CONFIG_AS_AVX 160#ifdef CONFIG_AS_AVX
161asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv,
162 void *keys, u8 *out, unsigned int num_bytes);
163asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv,
164 void *keys, u8 *out, unsigned int num_bytes);
165asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv,
166 void *keys, u8 *out, unsigned int num_bytes);
158/* 167/*
159 * asmlinkage void aesni_gcm_precomp_avx_gen2() 168 * asmlinkage void aesni_gcm_precomp_avx_gen2()
160 * gcm_data *my_ctx_data, context data 169 * gcm_data *my_ctx_data, context data
@@ -472,6 +481,25 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
472 crypto_inc(ctrblk, AES_BLOCK_SIZE); 481 crypto_inc(ctrblk, AES_BLOCK_SIZE);
473} 482}
474 483
484#ifdef CONFIG_AS_AVX
485static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out,
486 const u8 *in, unsigned int len, u8 *iv)
487{
488 /*
489 * based on key length, override with the by8 version
490 * of ctr mode encryption/decryption for improved performance
491 * aes_set_key_common() ensures that key length is one of
492 * {128,192,256}
493 */
494 if (ctx->key_length == AES_KEYSIZE_128)
495 aes_ctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len);
496 else if (ctx->key_length == AES_KEYSIZE_192)
497 aes_ctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len);
498 else
499 aes_ctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len);
500}
501#endif
502
475static int ctr_crypt(struct blkcipher_desc *desc, 503static int ctr_crypt(struct blkcipher_desc *desc,
476 struct scatterlist *dst, struct scatterlist *src, 504 struct scatterlist *dst, struct scatterlist *src,
477 unsigned int nbytes) 505 unsigned int nbytes)
@@ -486,8 +514,8 @@ static int ctr_crypt(struct blkcipher_desc *desc,
486 514
487 kernel_fpu_begin(); 515 kernel_fpu_begin();
488 while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { 516 while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
489 aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, 517 aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
490 nbytes & AES_BLOCK_MASK, walk.iv); 518 nbytes & AES_BLOCK_MASK, walk.iv);
491 nbytes &= AES_BLOCK_SIZE - 1; 519 nbytes &= AES_BLOCK_SIZE - 1;
492 err = blkcipher_walk_done(desc, &walk, nbytes); 520 err = blkcipher_walk_done(desc, &walk, nbytes);
493 } 521 }
@@ -1493,6 +1521,14 @@ static int __init aesni_init(void)
1493 aesni_gcm_enc_tfm = aesni_gcm_enc; 1521 aesni_gcm_enc_tfm = aesni_gcm_enc;
1494 aesni_gcm_dec_tfm = aesni_gcm_dec; 1522 aesni_gcm_dec_tfm = aesni_gcm_dec;
1495 } 1523 }
1524 aesni_ctr_enc_tfm = aesni_ctr_enc;
1525#ifdef CONFIG_AS_AVX
1526 if (cpu_has_avx) {
1527 /* optimize performance of ctr mode encryption transform */
1528 aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm;
1529 pr_info("AES CTR mode by8 optimization enabled\n");
1530 }
1531#endif
1496#endif 1532#endif
1497 1533
1498 err = crypto_fpu_init(); 1534 err = crypto_fpu_init();
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index dbc4339b5417..26d49ebae040 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -72,6 +72,7 @@
72 72
73# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); 73# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
74 74
75.text
75ENTRY(crc_pcl) 76ENTRY(crc_pcl)
76#define bufp %rdi 77#define bufp %rdi
77#define bufp_dw %edi 78#define bufp_dw %edi
@@ -216,15 +217,11 @@ LABEL crc_ %i
216 ## 4) Combine three results: 217 ## 4) Combine three results:
217 ################################################################ 218 ################################################################
218 219
219 lea (K_table-16)(%rip), bufp # first entry is for idx 1 220 lea (K_table-8)(%rip), bufp # first entry is for idx 1
220 shlq $3, %rax # rax *= 8 221 shlq $3, %rax # rax *= 8
221 subq %rax, tmp # tmp -= rax*8 222 pmovzxdq (bufp,%rax), %xmm0 # 2 consts: K1:K2
222 shlq $1, %rax 223 leal (%eax,%eax,2), %eax # rax *= 3 (total *24)
223 subq %rax, tmp # tmp -= rax*16 224 subq %rax, tmp # tmp -= rax*24
224 # (total tmp -= rax*24)
225 addq %rax, bufp
226
227 movdqa (bufp), %xmm0 # 2 consts: K1:K2
228 225
229 movq crc_init, %xmm1 # CRC for block 1 226 movq crc_init, %xmm1 # CRC for block 1
230 PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2 227 PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2
@@ -238,9 +235,9 @@ LABEL crc_ %i
238 mov crc2, crc_init 235 mov crc2, crc_init
239 crc32 %rax, crc_init 236 crc32 %rax, crc_init
240 237
241################################################################ 238 ################################################################
242## 5) Check for end: 239 ## 5) Check for end:
243################################################################ 240 ################################################################
244 241
245LABEL crc_ 0 242LABEL crc_ 0
246 mov tmp, len 243 mov tmp, len
@@ -331,136 +328,136 @@ ENDPROC(crc_pcl)
331 328
332 ################################################################ 329 ################################################################
333 ## PCLMULQDQ tables 330 ## PCLMULQDQ tables
334 ## Table is 128 entries x 2 quad words each 331 ## Table is 128 entries x 2 words (8 bytes) each
335 ################################################################ 332 ################################################################
336.data 333.section .rotata, "a", %progbits
337.align 64 334.align 8
338K_table: 335K_table:
339 .quad 0x14cd00bd6,0x105ec76f0 336 .long 0x493c7d27, 0x00000001
340 .quad 0x0ba4fc28e,0x14cd00bd6 337 .long 0xba4fc28e, 0x493c7d27
341 .quad 0x1d82c63da,0x0f20c0dfe 338 .long 0xddc0152b, 0xf20c0dfe
342 .quad 0x09e4addf8,0x0ba4fc28e 339 .long 0x9e4addf8, 0xba4fc28e
343 .quad 0x039d3b296,0x1384aa63a 340 .long 0x39d3b296, 0x3da6d0cb
344 .quad 0x102f9b8a2,0x1d82c63da 341 .long 0x0715ce53, 0xddc0152b
345 .quad 0x14237f5e6,0x01c291d04 342 .long 0x47db8317, 0x1c291d04
346 .quad 0x00d3b6092,0x09e4addf8 343 .long 0x0d3b6092, 0x9e4addf8
347 .quad 0x0c96cfdc0,0x0740eef02 344 .long 0xc96cfdc0, 0x740eef02
348 .quad 0x18266e456,0x039d3b296 345 .long 0x878a92a7, 0x39d3b296
349 .quad 0x0daece73e,0x0083a6eec 346 .long 0xdaece73e, 0x083a6eec
350 .quad 0x0ab7aff2a,0x102f9b8a2 347 .long 0xab7aff2a, 0x0715ce53
351 .quad 0x1248ea574,0x1c1733996 348 .long 0x2162d385, 0xc49f4f67
352 .quad 0x083348832,0x14237f5e6 349 .long 0x83348832, 0x47db8317
353 .quad 0x12c743124,0x02ad91c30 350 .long 0x299847d5, 0x2ad91c30
354 .quad 0x0b9e02b86,0x00d3b6092 351 .long 0xb9e02b86, 0x0d3b6092
355 .quad 0x018b33a4e,0x06992cea2 352 .long 0x18b33a4e, 0x6992cea2
356 .quad 0x1b331e26a,0x0c96cfdc0 353 .long 0xb6dd949b, 0xc96cfdc0
357 .quad 0x17d35ba46,0x07e908048 354 .long 0x78d9ccb7, 0x7e908048
358 .quad 0x1bf2e8b8a,0x18266e456 355 .long 0xbac2fd7b, 0x878a92a7
359 .quad 0x1a3e0968a,0x11ed1f9d8 356 .long 0xa60ce07b, 0x1b3d8f29
360 .quad 0x0ce7f39f4,0x0daece73e 357 .long 0xce7f39f4, 0xdaece73e
361 .quad 0x061d82e56,0x0f1d0f55e 358 .long 0x61d82e56, 0xf1d0f55e
362 .quad 0x0d270f1a2,0x0ab7aff2a 359 .long 0xd270f1a2, 0xab7aff2a
363 .quad 0x1c3f5f66c,0x0a87ab8a8 360 .long 0xc619809d, 0xa87ab8a8
364 .quad 0x12ed0daac,0x1248ea574 361 .long 0x2b3cac5d, 0x2162d385
365 .quad 0x065863b64,0x08462d800 362 .long 0x65863b64, 0x8462d800
366 .quad 0x11eef4f8e,0x083348832 363 .long 0x1b03397f, 0x83348832
367 .quad 0x1ee54f54c,0x071d111a8 364 .long 0xebb883bd, 0x71d111a8
368 .quad 0x0b3e32c28,0x12c743124 365 .long 0xb3e32c28, 0x299847d5
369 .quad 0x0064f7f26,0x0ffd852c6 366 .long 0x064f7f26, 0xffd852c6
370 .quad 0x0dd7e3b0c,0x0b9e02b86 367 .long 0xdd7e3b0c, 0xb9e02b86
371 .quad 0x0f285651c,0x0dcb17aa4 368 .long 0xf285651c, 0xdcb17aa4
372 .quad 0x010746f3c,0x018b33a4e 369 .long 0x10746f3c, 0x18b33a4e
373 .quad 0x1c24afea4,0x0f37c5aee 370 .long 0xc7a68855, 0xf37c5aee
374 .quad 0x0271d9844,0x1b331e26a 371 .long 0x271d9844, 0xb6dd949b
375 .quad 0x08e766a0c,0x06051d5a2 372 .long 0x8e766a0c, 0x6051d5a2
376 .quad 0x093a5f730,0x17d35ba46 373 .long 0x93a5f730, 0x78d9ccb7
377 .quad 0x06cb08e5c,0x11d5ca20e 374 .long 0x6cb08e5c, 0x18b0d4ff
378 .quad 0x06b749fb2,0x1bf2e8b8a 375 .long 0x6b749fb2, 0xbac2fd7b
379 .quad 0x1167f94f2,0x021f3d99c 376 .long 0x1393e203, 0x21f3d99c
380 .quad 0x0cec3662e,0x1a3e0968a 377 .long 0xcec3662e, 0xa60ce07b
381 .quad 0x19329634a,0x08f158014 378 .long 0x96c515bb, 0x8f158014
382 .quad 0x0e6fc4e6a,0x0ce7f39f4 379 .long 0xe6fc4e6a, 0xce7f39f4
383 .quad 0x08227bb8a,0x1a5e82106 380 .long 0x8227bb8a, 0xa00457f7
384 .quad 0x0b0cd4768,0x061d82e56 381 .long 0xb0cd4768, 0x61d82e56
385 .quad 0x13c2b89c4,0x188815ab2 382 .long 0x39c7ff35, 0x8d6d2c43
386 .quad 0x0d7a4825c,0x0d270f1a2 383 .long 0xd7a4825c, 0xd270f1a2
387 .quad 0x10f5ff2ba,0x105405f3e 384 .long 0x0ab3844b, 0x00ac29cf
388 .quad 0x00167d312,0x1c3f5f66c 385 .long 0x0167d312, 0xc619809d
389 .quad 0x0f6076544,0x0e9adf796 386 .long 0xf6076544, 0xe9adf796
390 .quad 0x026f6a60a,0x12ed0daac 387 .long 0x26f6a60a, 0x2b3cac5d
391 .quad 0x1a2adb74e,0x096638b34 388 .long 0xa741c1bf, 0x96638b34
392 .quad 0x19d34af3a,0x065863b64 389 .long 0x98d8d9cb, 0x65863b64
393 .quad 0x049c3cc9c,0x1e50585a0 390 .long 0x49c3cc9c, 0xe0e9f351
394 .quad 0x068bce87a,0x11eef4f8e 391 .long 0x68bce87a, 0x1b03397f
395 .quad 0x1524fa6c6,0x19f1c69dc 392 .long 0x57a3d037, 0x9af01f2d
396 .quad 0x16cba8aca,0x1ee54f54c 393 .long 0x6956fc3b, 0xebb883bd
397 .quad 0x042d98888,0x12913343e 394 .long 0x42d98888, 0x2cff42cf
398 .quad 0x1329d9f7e,0x0b3e32c28 395 .long 0x3771e98f, 0xb3e32c28
399 .quad 0x1b1c69528,0x088f25a3a 396 .long 0xb42ae3d9, 0x88f25a3a
400 .quad 0x02178513a,0x0064f7f26 397 .long 0x2178513a, 0x064f7f26
401 .quad 0x0e0ac139e,0x04e36f0b0 398 .long 0xe0ac139e, 0x4e36f0b0
402 .quad 0x0170076fa,0x0dd7e3b0c 399 .long 0x170076fa, 0xdd7e3b0c
403 .quad 0x141a1a2e2,0x0bd6f81f8 400 .long 0x444dd413, 0xbd6f81f8
404 .quad 0x16ad828b4,0x0f285651c 401 .long 0x6f345e45, 0xf285651c
405 .quad 0x041d17b64,0x19425cbba 402 .long 0x41d17b64, 0x91c9bd4b
406 .quad 0x1fae1cc66,0x010746f3c 403 .long 0xff0dba97, 0x10746f3c
407 .quad 0x1a75b4b00,0x18db37e8a 404 .long 0xa2b73df1, 0x885f087b
408 .quad 0x0f872e54c,0x1c24afea4 405 .long 0xf872e54c, 0xc7a68855
409 .quad 0x01e41e9fc,0x04c144932 406 .long 0x1e41e9fc, 0x4c144932
410 .quad 0x086d8e4d2,0x0271d9844 407 .long 0x86d8e4d2, 0x271d9844
411 .quad 0x160f7af7a,0x052148f02 408 .long 0x651bd98b, 0x52148f02
412 .quad 0x05bb8f1bc,0x08e766a0c 409 .long 0x5bb8f1bc, 0x8e766a0c
413 .quad 0x0a90fd27a,0x0a3c6f37a 410 .long 0xa90fd27a, 0xa3c6f37a
414 .quad 0x0b3af077a,0x093a5f730 411 .long 0xb3af077a, 0x93a5f730
415 .quad 0x04984d782,0x1d22c238e 412 .long 0x4984d782, 0xd7c0557f
416 .quad 0x0ca6ef3ac,0x06cb08e5c 413 .long 0xca6ef3ac, 0x6cb08e5c
417 .quad 0x0234e0b26,0x063ded06a 414 .long 0x234e0b26, 0x63ded06a
418 .quad 0x1d88abd4a,0x06b749fb2 415 .long 0xdd66cbbb, 0x6b749fb2
419 .quad 0x04597456a,0x04d56973c 416 .long 0x4597456a, 0x4d56973c
420 .quad 0x0e9e28eb4,0x1167f94f2 417 .long 0xe9e28eb4, 0x1393e203
421 .quad 0x07b3ff57a,0x19385bf2e 418 .long 0x7b3ff57a, 0x9669c9df
422 .quad 0x0c9c8b782,0x0cec3662e 419 .long 0xc9c8b782, 0xcec3662e
423 .quad 0x13a9cba9e,0x0e417f38a 420 .long 0x3f70cc6f, 0xe417f38a
424 .quad 0x093e106a4,0x19329634a 421 .long 0x93e106a4, 0x96c515bb
425 .quad 0x167001a9c,0x14e727980 422 .long 0x62ec6c6d, 0x4b9e0f71
426 .quad 0x1ddffc5d4,0x0e6fc4e6a 423 .long 0xd813b325, 0xe6fc4e6a
427 .quad 0x00df04680,0x0d104b8fc 424 .long 0x0df04680, 0xd104b8fc
428 .quad 0x02342001e,0x08227bb8a 425 .long 0x2342001e, 0x8227bb8a
429 .quad 0x00a2a8d7e,0x05b397730 426 .long 0x0a2a8d7e, 0x5b397730
430 .quad 0x168763fa6,0x0b0cd4768 427 .long 0x6d9a4957, 0xb0cd4768
431 .quad 0x1ed5a407a,0x0e78eb416 428 .long 0xe8b6368b, 0xe78eb416
432 .quad 0x0d2c3ed1a,0x13c2b89c4 429 .long 0xd2c3ed1a, 0x39c7ff35
433 .quad 0x0995a5724,0x1641378f0 430 .long 0x995a5724, 0x61ff0e01
434 .quad 0x19b1afbc4,0x0d7a4825c 431 .long 0x9ef68d35, 0xd7a4825c
435 .quad 0x109ffedc0,0x08d96551c 432 .long 0x0c139b31, 0x8d96551c
436 .quad 0x0f2271e60,0x10f5ff2ba 433 .long 0xf2271e60, 0x0ab3844b
437 .quad 0x00b0bf8ca,0x00bf80dd2 434 .long 0x0b0bf8ca, 0x0bf80dd2
438 .quad 0x123888b7a,0x00167d312 435 .long 0x2664fd8b, 0x0167d312
439 .quad 0x1e888f7dc,0x18dcddd1c 436 .long 0xed64812d, 0x8821abed
440 .quad 0x002ee03b2,0x0f6076544 437 .long 0x02ee03b2, 0xf6076544
441 .quad 0x183e8d8fe,0x06a45d2b2 438 .long 0x8604ae0f, 0x6a45d2b2
442 .quad 0x133d7a042,0x026f6a60a 439 .long 0x363bd6b3, 0x26f6a60a
443 .quad 0x116b0f50c,0x1dd3e10e8 440 .long 0x135c83fd, 0xd8d26619
444 .quad 0x05fabe670,0x1a2adb74e 441 .long 0x5fabe670, 0xa741c1bf
445 .quad 0x130004488,0x0de87806c 442 .long 0x35ec3279, 0xde87806c
446 .quad 0x000bcf5f6,0x19d34af3a 443 .long 0x00bcf5f6, 0x98d8d9cb
447 .quad 0x18f0c7078,0x014338754 444 .long 0x8ae00689, 0x14338754
448 .quad 0x017f27698,0x049c3cc9c 445 .long 0x17f27698, 0x49c3cc9c
449 .quad 0x058ca5f00,0x15e3e77ee 446 .long 0x58ca5f00, 0x5bd2011f
450 .quad 0x1af900c24,0x068bce87a 447 .long 0xaa7c7ad5, 0x68bce87a
451 .quad 0x0b5cfca28,0x0dd07448e 448 .long 0xb5cfca28, 0xdd07448e
452 .quad 0x0ded288f8,0x1524fa6c6 449 .long 0xded288f8, 0x57a3d037
453 .quad 0x059f229bc,0x1d8048348 450 .long 0x59f229bc, 0xdde8f5b9
454 .quad 0x06d390dec,0x16cba8aca 451 .long 0x6d390dec, 0x6956fc3b
455 .quad 0x037170390,0x0a3e3e02c 452 .long 0x37170390, 0xa3e3e02c
456 .quad 0x06353c1cc,0x042d98888 453 .long 0x6353c1cc, 0x42d98888
457 .quad 0x0c4584f5c,0x0d73c7bea 454 .long 0xc4584f5c, 0xd73c7bea
458 .quad 0x1f16a3418,0x1329d9f7e 455 .long 0xf48642e9, 0x3771e98f
459 .quad 0x0531377e2,0x185137662 456 .long 0x531377e2, 0x80ff0093
460 .quad 0x1d8d9ca7c,0x1b1c69528 457 .long 0xdd35bc8d, 0xb42ae3d9
461 .quad 0x0b25b29f2,0x18a08b5bc 458 .long 0xb25b29f2, 0x8fe4c34d
462 .quad 0x19fb2a8b0,0x02178513a 459 .long 0x9a5ede41, 0x2178513a
463 .quad 0x1a08fe6ac,0x1da758ae0 460 .long 0xa563905d, 0xdf99fc11
464 .quad 0x045cddf4e,0x0e0ac139e 461 .long 0x45cddf4e, 0xe0ac139e
465 .quad 0x1a91647f2,0x169cf9eb0 462 .long 0xacfa3103, 0x6c23e841
466 .quad 0x1a0f717c4,0x0170076fa 463 .long 0xa51b6135, 0x170076fa
diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
new file mode 100644
index 000000000000..038f6ae87c5e
--- /dev/null
+++ b/arch/x86/crypto/des3_ede-asm_64.S
@@ -0,0 +1,805 @@
1/*
2 * des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher
3 *
4 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 */
16
17#include <linux/linkage.h>
18
19.file "des3_ede-asm_64.S"
20.text
21
22#define s1 .L_s1
23#define s2 ((s1) + (64*8))
24#define s3 ((s2) + (64*8))
25#define s4 ((s3) + (64*8))
26#define s5 ((s4) + (64*8))
27#define s6 ((s5) + (64*8))
28#define s7 ((s6) + (64*8))
29#define s8 ((s7) + (64*8))
30
31/* register macros */
32#define CTX %rdi
33
34#define RL0 %r8
35#define RL1 %r9
36#define RL2 %r10
37
38#define RL0d %r8d
39#define RL1d %r9d
40#define RL2d %r10d
41
42#define RR0 %r11
43#define RR1 %r12
44#define RR2 %r13
45
46#define RR0d %r11d
47#define RR1d %r12d
48#define RR2d %r13d
49
50#define RW0 %rax
51#define RW1 %rbx
52#define RW2 %rcx
53
54#define RW0d %eax
55#define RW1d %ebx
56#define RW2d %ecx
57
58#define RW0bl %al
59#define RW1bl %bl
60#define RW2bl %cl
61
62#define RW0bh %ah
63#define RW1bh %bh
64#define RW2bh %ch
65
66#define RT0 %r15
67#define RT1 %rbp
68#define RT2 %r14
69#define RT3 %rdx
70
71#define RT0d %r15d
72#define RT1d %ebp
73#define RT2d %r14d
74#define RT3d %edx
75
76/***********************************************************************
77 * 1-way 3DES
78 ***********************************************************************/
79#define do_permutation(a, b, offset, mask) \
80 movl a, RT0d; \
81 shrl $(offset), RT0d; \
82 xorl b, RT0d; \
83 andl $(mask), RT0d; \
84 xorl RT0d, b; \
85 shll $(offset), RT0d; \
86 xorl RT0d, a;
87
88#define expand_to_64bits(val, mask) \
89 movl val##d, RT0d; \
90 rorl $4, RT0d; \
91 shlq $32, RT0; \
92 orq RT0, val; \
93 andq mask, val;
94
95#define compress_to_64bits(val) \
96 movq val, RT0; \
97 shrq $32, RT0; \
98 roll $4, RT0d; \
99 orl RT0d, val##d;
100
101#define initial_permutation(left, right) \
102 do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \
103 do_permutation(left##d, right##d, 16, 0x0000ffff); \
104 do_permutation(right##d, left##d, 2, 0x33333333); \
105 do_permutation(right##d, left##d, 8, 0x00ff00ff); \
106 movabs $0x3f3f3f3f3f3f3f3f, RT3; \
107 movl left##d, RW0d; \
108 roll $1, right##d; \
109 xorl right##d, RW0d; \
110 andl $0xaaaaaaaa, RW0d; \
111 xorl RW0d, left##d; \
112 xorl RW0d, right##d; \
113 roll $1, left##d; \
114 expand_to_64bits(right, RT3); \
115 expand_to_64bits(left, RT3);
116
117#define final_permutation(left, right) \
118 compress_to_64bits(right); \
119 compress_to_64bits(left); \
120 movl right##d, RW0d; \
121 rorl $1, left##d; \
122 xorl left##d, RW0d; \
123 andl $0xaaaaaaaa, RW0d; \
124 xorl RW0d, right##d; \
125 xorl RW0d, left##d; \
126 rorl $1, right##d; \
127 do_permutation(right##d, left##d, 8, 0x00ff00ff); \
128 do_permutation(right##d, left##d, 2, 0x33333333); \
129 do_permutation(left##d, right##d, 16, 0x0000ffff); \
130 do_permutation(left##d, right##d, 4, 0x0f0f0f0f);
131
132#define round1(n, from, to, load_next_key) \
133 xorq from, RW0; \
134 \
135 movzbl RW0bl, RT0d; \
136 movzbl RW0bh, RT1d; \
137 shrq $16, RW0; \
138 movzbl RW0bl, RT2d; \
139 movzbl RW0bh, RT3d; \
140 shrq $16, RW0; \
141 movq s8(, RT0, 8), RT0; \
142 xorq s6(, RT1, 8), to; \
143 movzbl RW0bl, RL1d; \
144 movzbl RW0bh, RT1d; \
145 shrl $16, RW0d; \
146 xorq s4(, RT2, 8), RT0; \
147 xorq s2(, RT3, 8), to; \
148 movzbl RW0bl, RT2d; \
149 movzbl RW0bh, RT3d; \
150 xorq s7(, RL1, 8), RT0; \
151 xorq s5(, RT1, 8), to; \
152 xorq s3(, RT2, 8), RT0; \
153 load_next_key(n, RW0); \
154 xorq RT0, to; \
155 xorq s1(, RT3, 8), to; \
156
157#define load_next_key(n, RWx) \
158 movq (((n) + 1) * 8)(CTX), RWx;
159
160#define dummy2(a, b) /*_*/
161
162#define read_block(io, left, right) \
163 movl (io), left##d; \
164 movl 4(io), right##d; \
165 bswapl left##d; \
166 bswapl right##d;
167
168#define write_block(io, left, right) \
169 bswapl left##d; \
170 bswapl right##d; \
171 movl left##d, (io); \
172 movl right##d, 4(io);
173
174ENTRY(des3_ede_x86_64_crypt_blk)
175 /* input:
176 * %rdi: round keys, CTX
177 * %rsi: dst
178 * %rdx: src
179 */
180 pushq %rbp;
181 pushq %rbx;
182 pushq %r12;
183 pushq %r13;
184 pushq %r14;
185 pushq %r15;
186
187 read_block(%rdx, RL0, RR0);
188 initial_permutation(RL0, RR0);
189
190 movq (CTX), RW0;
191
192 round1(0, RR0, RL0, load_next_key);
193 round1(1, RL0, RR0, load_next_key);
194 round1(2, RR0, RL0, load_next_key);
195 round1(3, RL0, RR0, load_next_key);
196 round1(4, RR0, RL0, load_next_key);
197 round1(5, RL0, RR0, load_next_key);
198 round1(6, RR0, RL0, load_next_key);
199 round1(7, RL0, RR0, load_next_key);
200 round1(8, RR0, RL0, load_next_key);
201 round1(9, RL0, RR0, load_next_key);
202 round1(10, RR0, RL0, load_next_key);
203 round1(11, RL0, RR0, load_next_key);
204 round1(12, RR0, RL0, load_next_key);
205 round1(13, RL0, RR0, load_next_key);
206 round1(14, RR0, RL0, load_next_key);
207 round1(15, RL0, RR0, load_next_key);
208
209 round1(16+0, RL0, RR0, load_next_key);
210 round1(16+1, RR0, RL0, load_next_key);
211 round1(16+2, RL0, RR0, load_next_key);
212 round1(16+3, RR0, RL0, load_next_key);
213 round1(16+4, RL0, RR0, load_next_key);
214 round1(16+5, RR0, RL0, load_next_key);
215 round1(16+6, RL0, RR0, load_next_key);
216 round1(16+7, RR0, RL0, load_next_key);
217 round1(16+8, RL0, RR0, load_next_key);
218 round1(16+9, RR0, RL0, load_next_key);
219 round1(16+10, RL0, RR0, load_next_key);
220 round1(16+11, RR0, RL0, load_next_key);
221 round1(16+12, RL0, RR0, load_next_key);
222 round1(16+13, RR0, RL0, load_next_key);
223 round1(16+14, RL0, RR0, load_next_key);
224 round1(16+15, RR0, RL0, load_next_key);
225
226 round1(32+0, RR0, RL0, load_next_key);
227 round1(32+1, RL0, RR0, load_next_key);
228 round1(32+2, RR0, RL0, load_next_key);
229 round1(32+3, RL0, RR0, load_next_key);
230 round1(32+4, RR0, RL0, load_next_key);
231 round1(32+5, RL0, RR0, load_next_key);
232 round1(32+6, RR0, RL0, load_next_key);
233 round1(32+7, RL0, RR0, load_next_key);
234 round1(32+8, RR0, RL0, load_next_key);
235 round1(32+9, RL0, RR0, load_next_key);
236 round1(32+10, RR0, RL0, load_next_key);
237 round1(32+11, RL0, RR0, load_next_key);
238 round1(32+12, RR0, RL0, load_next_key);
239 round1(32+13, RL0, RR0, load_next_key);
240 round1(32+14, RR0, RL0, load_next_key);
241 round1(32+15, RL0, RR0, dummy2);
242
243 final_permutation(RR0, RL0);
244 write_block(%rsi, RR0, RL0);
245
246 popq %r15;
247 popq %r14;
248 popq %r13;
249 popq %r12;
250 popq %rbx;
251 popq %rbp;
252
253 ret;
254ENDPROC(des3_ede_x86_64_crypt_blk)
255
256/***********************************************************************
257 * 3-way 3DES
258 ***********************************************************************/
259#define expand_to_64bits(val, mask) \
260 movl val##d, RT0d; \
261 rorl $4, RT0d; \
262 shlq $32, RT0; \
263 orq RT0, val; \
264 andq mask, val;
265
266#define compress_to_64bits(val) \
267 movq val, RT0; \
268 shrq $32, RT0; \
269 roll $4, RT0d; \
270 orl RT0d, val##d;
271
272#define initial_permutation3(left, right) \
273 do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
274 do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
275 do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
276 do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
277 do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \
278 do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
279 \
280 do_permutation(right##0d, left##0d, 2, 0x33333333); \
281 do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
282 do_permutation(right##1d, left##1d, 2, 0x33333333); \
283 do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
284 do_permutation(right##2d, left##2d, 2, 0x33333333); \
285 do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
286 \
287 movabs $0x3f3f3f3f3f3f3f3f, RT3; \
288 \
289 movl left##0d, RW0d; \
290 roll $1, right##0d; \
291 xorl right##0d, RW0d; \
292 andl $0xaaaaaaaa, RW0d; \
293 xorl RW0d, left##0d; \
294 xorl RW0d, right##0d; \
295 roll $1, left##0d; \
296 expand_to_64bits(right##0, RT3); \
297 expand_to_64bits(left##0, RT3); \
298 movl left##1d, RW1d; \
299 roll $1, right##1d; \
300 xorl right##1d, RW1d; \
301 andl $0xaaaaaaaa, RW1d; \
302 xorl RW1d, left##1d; \
303 xorl RW1d, right##1d; \
304 roll $1, left##1d; \
305 expand_to_64bits(right##1, RT3); \
306 expand_to_64bits(left##1, RT3); \
307 movl left##2d, RW2d; \
308 roll $1, right##2d; \
309 xorl right##2d, RW2d; \
310 andl $0xaaaaaaaa, RW2d; \
311 xorl RW2d, left##2d; \
312 xorl RW2d, right##2d; \
313 roll $1, left##2d; \
314 expand_to_64bits(right##2, RT3); \
315 expand_to_64bits(left##2, RT3);
316
317#define final_permutation3(left, right) \
318 compress_to_64bits(right##0); \
319 compress_to_64bits(left##0); \
320 movl right##0d, RW0d; \
321 rorl $1, left##0d; \
322 xorl left##0d, RW0d; \
323 andl $0xaaaaaaaa, RW0d; \
324 xorl RW0d, right##0d; \
325 xorl RW0d, left##0d; \
326 rorl $1, right##0d; \
327 compress_to_64bits(right##1); \
328 compress_to_64bits(left##1); \
329 movl right##1d, RW1d; \
330 rorl $1, left##1d; \
331 xorl left##1d, RW1d; \
332 andl $0xaaaaaaaa, RW1d; \
333 xorl RW1d, right##1d; \
334 xorl RW1d, left##1d; \
335 rorl $1, right##1d; \
336 compress_to_64bits(right##2); \
337 compress_to_64bits(left##2); \
338 movl right##2d, RW2d; \
339 rorl $1, left##2d; \
340 xorl left##2d, RW2d; \
341 andl $0xaaaaaaaa, RW2d; \
342 xorl RW2d, right##2d; \
343 xorl RW2d, left##2d; \
344 rorl $1, right##2d; \
345 \
346 do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
347 do_permutation(right##0d, left##0d, 2, 0x33333333); \
348 do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
349 do_permutation(right##1d, left##1d, 2, 0x33333333); \
350 do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
351 do_permutation(right##2d, left##2d, 2, 0x33333333); \
352 \
353 do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
354 do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
355 do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
356 do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
357 do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
358 do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f);
359
360#define round3(n, from, to, load_next_key, do_movq) \
361 xorq from##0, RW0; \
362 movzbl RW0bl, RT3d; \
363 movzbl RW0bh, RT1d; \
364 shrq $16, RW0; \
365 xorq s8(, RT3, 8), to##0; \
366 xorq s6(, RT1, 8), to##0; \
367 movzbl RW0bl, RT3d; \
368 movzbl RW0bh, RT1d; \
369 shrq $16, RW0; \
370 xorq s4(, RT3, 8), to##0; \
371 xorq s2(, RT1, 8), to##0; \
372 movzbl RW0bl, RT3d; \
373 movzbl RW0bh, RT1d; \
374 shrl $16, RW0d; \
375 xorq s7(, RT3, 8), to##0; \
376 xorq s5(, RT1, 8), to##0; \
377 movzbl RW0bl, RT3d; \
378 movzbl RW0bh, RT1d; \
379 load_next_key(n, RW0); \
380 xorq s3(, RT3, 8), to##0; \
381 xorq s1(, RT1, 8), to##0; \
382 xorq from##1, RW1; \
383 movzbl RW1bl, RT3d; \
384 movzbl RW1bh, RT1d; \
385 shrq $16, RW1; \
386 xorq s8(, RT3, 8), to##1; \
387 xorq s6(, RT1, 8), to##1; \
388 movzbl RW1bl, RT3d; \
389 movzbl RW1bh, RT1d; \
390 shrq $16, RW1; \
391 xorq s4(, RT3, 8), to##1; \
392 xorq s2(, RT1, 8), to##1; \
393 movzbl RW1bl, RT3d; \
394 movzbl RW1bh, RT1d; \
395 shrl $16, RW1d; \
396 xorq s7(, RT3, 8), to##1; \
397 xorq s5(, RT1, 8), to##1; \
398 movzbl RW1bl, RT3d; \
399 movzbl RW1bh, RT1d; \
400 do_movq(RW0, RW1); \
401 xorq s3(, RT3, 8), to##1; \
402 xorq s1(, RT1, 8), to##1; \
403 xorq from##2, RW2; \
404 movzbl RW2bl, RT3d; \
405 movzbl RW2bh, RT1d; \
406 shrq $16, RW2; \
407 xorq s8(, RT3, 8), to##2; \
408 xorq s6(, RT1, 8), to##2; \
409 movzbl RW2bl, RT3d; \
410 movzbl RW2bh, RT1d; \
411 shrq $16, RW2; \
412 xorq s4(, RT3, 8), to##2; \
413 xorq s2(, RT1, 8), to##2; \
414 movzbl RW2bl, RT3d; \
415 movzbl RW2bh, RT1d; \
416 shrl $16, RW2d; \
417 xorq s7(, RT3, 8), to##2; \
418 xorq s5(, RT1, 8), to##2; \
419 movzbl RW2bl, RT3d; \
420 movzbl RW2bh, RT1d; \
421 do_movq(RW0, RW2); \
422 xorq s3(, RT3, 8), to##2; \
423 xorq s1(, RT1, 8), to##2;
424
425#define __movq(src, dst) \
426 movq src, dst;
427
428ENTRY(des3_ede_x86_64_crypt_blk_3way)
429 /* input:
430 * %rdi: ctx, round keys
431 * %rsi: dst (3 blocks)
432 * %rdx: src (3 blocks)
433 */
434
435 pushq %rbp;
436 pushq %rbx;
437 pushq %r12;
438 pushq %r13;
439 pushq %r14;
440 pushq %r15;
441
442 /* load input */
443 movl 0 * 4(%rdx), RL0d;
444 movl 1 * 4(%rdx), RR0d;
445 movl 2 * 4(%rdx), RL1d;
446 movl 3 * 4(%rdx), RR1d;
447 movl 4 * 4(%rdx), RL2d;
448 movl 5 * 4(%rdx), RR2d;
449
450 bswapl RL0d;
451 bswapl RR0d;
452 bswapl RL1d;
453 bswapl RR1d;
454 bswapl RL2d;
455 bswapl RR2d;
456
457 initial_permutation3(RL, RR);
458
459 movq 0(CTX), RW0;
460 movq RW0, RW1;
461 movq RW0, RW2;
462
463 round3(0, RR, RL, load_next_key, __movq);
464 round3(1, RL, RR, load_next_key, __movq);
465 round3(2, RR, RL, load_next_key, __movq);
466 round3(3, RL, RR, load_next_key, __movq);
467 round3(4, RR, RL, load_next_key, __movq);
468 round3(5, RL, RR, load_next_key, __movq);
469 round3(6, RR, RL, load_next_key, __movq);
470 round3(7, RL, RR, load_next_key, __movq);
471 round3(8, RR, RL, load_next_key, __movq);
472 round3(9, RL, RR, load_next_key, __movq);
473 round3(10, RR, RL, load_next_key, __movq);
474 round3(11, RL, RR, load_next_key, __movq);
475 round3(12, RR, RL, load_next_key, __movq);
476 round3(13, RL, RR, load_next_key, __movq);
477 round3(14, RR, RL, load_next_key, __movq);
478 round3(15, RL, RR, load_next_key, __movq);
479
480 round3(16+0, RL, RR, load_next_key, __movq);
481 round3(16+1, RR, RL, load_next_key, __movq);
482 round3(16+2, RL, RR, load_next_key, __movq);
483 round3(16+3, RR, RL, load_next_key, __movq);
484 round3(16+4, RL, RR, load_next_key, __movq);
485 round3(16+5, RR, RL, load_next_key, __movq);
486 round3(16+6, RL, RR, load_next_key, __movq);
487 round3(16+7, RR, RL, load_next_key, __movq);
488 round3(16+8, RL, RR, load_next_key, __movq);
489 round3(16+9, RR, RL, load_next_key, __movq);
490 round3(16+10, RL, RR, load_next_key, __movq);
491 round3(16+11, RR, RL, load_next_key, __movq);
492 round3(16+12, RL, RR, load_next_key, __movq);
493 round3(16+13, RR, RL, load_next_key, __movq);
494 round3(16+14, RL, RR, load_next_key, __movq);
495 round3(16+15, RR, RL, load_next_key, __movq);
496
497 round3(32+0, RR, RL, load_next_key, __movq);
498 round3(32+1, RL, RR, load_next_key, __movq);
499 round3(32+2, RR, RL, load_next_key, __movq);
500 round3(32+3, RL, RR, load_next_key, __movq);
501 round3(32+4, RR, RL, load_next_key, __movq);
502 round3(32+5, RL, RR, load_next_key, __movq);
503 round3(32+6, RR, RL, load_next_key, __movq);
504 round3(32+7, RL, RR, load_next_key, __movq);
505 round3(32+8, RR, RL, load_next_key, __movq);
506 round3(32+9, RL, RR, load_next_key, __movq);
507 round3(32+10, RR, RL, load_next_key, __movq);
508 round3(32+11, RL, RR, load_next_key, __movq);
509 round3(32+12, RR, RL, load_next_key, __movq);
510 round3(32+13, RL, RR, load_next_key, __movq);
511 round3(32+14, RR, RL, load_next_key, __movq);
512 round3(32+15, RL, RR, dummy2, dummy2);
513
514 final_permutation3(RR, RL);
515
516 bswapl RR0d;
517 bswapl RL0d;
518 bswapl RR1d;
519 bswapl RL1d;
520 bswapl RR2d;
521 bswapl RL2d;
522
523 movl RR0d, 0 * 4(%rsi);
524 movl RL0d, 1 * 4(%rsi);
525 movl RR1d, 2 * 4(%rsi);
526 movl RL1d, 3 * 4(%rsi);
527 movl RR2d, 4 * 4(%rsi);
528 movl RL2d, 5 * 4(%rsi);
529
530 popq %r15;
531 popq %r14;
532 popq %r13;
533 popq %r12;
534 popq %rbx;
535 popq %rbp;
536
537 ret;
538ENDPROC(des3_ede_x86_64_crypt_blk_3way)
539
540.data
541.align 16
542.L_s1:
543 .quad 0x0010100001010400, 0x0000000000000000
544 .quad 0x0000100000010000, 0x0010100001010404
545 .quad 0x0010100001010004, 0x0000100000010404
546 .quad 0x0000000000000004, 0x0000100000010000
547 .quad 0x0000000000000400, 0x0010100001010400
548 .quad 0x0010100001010404, 0x0000000000000400
549 .quad 0x0010000001000404, 0x0010100001010004
550 .quad 0x0010000001000000, 0x0000000000000004
551 .quad 0x0000000000000404, 0x0010000001000400
552 .quad 0x0010000001000400, 0x0000100000010400
553 .quad 0x0000100000010400, 0x0010100001010000
554 .quad 0x0010100001010000, 0x0010000001000404
555 .quad 0x0000100000010004, 0x0010000001000004
556 .quad 0x0010000001000004, 0x0000100000010004
557 .quad 0x0000000000000000, 0x0000000000000404
558 .quad 0x0000100000010404, 0x0010000001000000
559 .quad 0x0000100000010000, 0x0010100001010404
560 .quad 0x0000000000000004, 0x0010100001010000
561 .quad 0x0010100001010400, 0x0010000001000000
562 .quad 0x0010000001000000, 0x0000000000000400
563 .quad 0x0010100001010004, 0x0000100000010000
564 .quad 0x0000100000010400, 0x0010000001000004
565 .quad 0x0000000000000400, 0x0000000000000004
566 .quad 0x0010000001000404, 0x0000100000010404
567 .quad 0x0010100001010404, 0x0000100000010004
568 .quad 0x0010100001010000, 0x0010000001000404
569 .quad 0x0010000001000004, 0x0000000000000404
570 .quad 0x0000100000010404, 0x0010100001010400
571 .quad 0x0000000000000404, 0x0010000001000400
572 .quad 0x0010000001000400, 0x0000000000000000
573 .quad 0x0000100000010004, 0x0000100000010400
574 .quad 0x0000000000000000, 0x0010100001010004
575.L_s2:
576 .quad 0x0801080200100020, 0x0800080000000000
577 .quad 0x0000080000000000, 0x0001080200100020
578 .quad 0x0001000000100000, 0x0000000200000020
579 .quad 0x0801000200100020, 0x0800080200000020
580 .quad 0x0800000200000020, 0x0801080200100020
581 .quad 0x0801080000100000, 0x0800000000000000
582 .quad 0x0800080000000000, 0x0001000000100000
583 .quad 0x0000000200000020, 0x0801000200100020
584 .quad 0x0001080000100000, 0x0001000200100020
585 .quad 0x0800080200000020, 0x0000000000000000
586 .quad 0x0800000000000000, 0x0000080000000000
587 .quad 0x0001080200100020, 0x0801000000100000
588 .quad 0x0001000200100020, 0x0800000200000020
589 .quad 0x0000000000000000, 0x0001080000100000
590 .quad 0x0000080200000020, 0x0801080000100000
591 .quad 0x0801000000100000, 0x0000080200000020
592 .quad 0x0000000000000000, 0x0001080200100020
593 .quad 0x0801000200100020, 0x0001000000100000
594 .quad 0x0800080200000020, 0x0801000000100000
595 .quad 0x0801080000100000, 0x0000080000000000
596 .quad 0x0801000000100000, 0x0800080000000000
597 .quad 0x0000000200000020, 0x0801080200100020
598 .quad 0x0001080200100020, 0x0000000200000020
599 .quad 0x0000080000000000, 0x0800000000000000
600 .quad 0x0000080200000020, 0x0801080000100000
601 .quad 0x0001000000100000, 0x0800000200000020
602 .quad 0x0001000200100020, 0x0800080200000020
603 .quad 0x0800000200000020, 0x0001000200100020
604 .quad 0x0001080000100000, 0x0000000000000000
605 .quad 0x0800080000000000, 0x0000080200000020
606 .quad 0x0800000000000000, 0x0801000200100020
607 .quad 0x0801080200100020, 0x0001080000100000
608.L_s3:
609 .quad 0x0000002000000208, 0x0000202008020200
610 .quad 0x0000000000000000, 0x0000200008020008
611 .quad 0x0000002008000200, 0x0000000000000000
612 .quad 0x0000202000020208, 0x0000002008000200
613 .quad 0x0000200000020008, 0x0000000008000008
614 .quad 0x0000000008000008, 0x0000200000020000
615 .quad 0x0000202008020208, 0x0000200000020008
616 .quad 0x0000200008020000, 0x0000002000000208
617 .quad 0x0000000008000000, 0x0000000000000008
618 .quad 0x0000202008020200, 0x0000002000000200
619 .quad 0x0000202000020200, 0x0000200008020000
620 .quad 0x0000200008020008, 0x0000202000020208
621 .quad 0x0000002008000208, 0x0000202000020200
622 .quad 0x0000200000020000, 0x0000002008000208
623 .quad 0x0000000000000008, 0x0000202008020208
624 .quad 0x0000002000000200, 0x0000000008000000
625 .quad 0x0000202008020200, 0x0000000008000000
626 .quad 0x0000200000020008, 0x0000002000000208
627 .quad 0x0000200000020000, 0x0000202008020200
628 .quad 0x0000002008000200, 0x0000000000000000
629 .quad 0x0000002000000200, 0x0000200000020008
630 .quad 0x0000202008020208, 0x0000002008000200
631 .quad 0x0000000008000008, 0x0000002000000200
632 .quad 0x0000000000000000, 0x0000200008020008
633 .quad 0x0000002008000208, 0x0000200000020000
634 .quad 0x0000000008000000, 0x0000202008020208
635 .quad 0x0000000000000008, 0x0000202000020208
636 .quad 0x0000202000020200, 0x0000000008000008
637 .quad 0x0000200008020000, 0x0000002008000208
638 .quad 0x0000002000000208, 0x0000200008020000
639 .quad 0x0000202000020208, 0x0000000000000008
640 .quad 0x0000200008020008, 0x0000202000020200
641.L_s4:
642 .quad 0x1008020000002001, 0x1000020800002001
643 .quad 0x1000020800002001, 0x0000000800000000
644 .quad 0x0008020800002000, 0x1008000800000001
645 .quad 0x1008000000000001, 0x1000020000002001
646 .quad 0x0000000000000000, 0x0008020000002000
647 .quad 0x0008020000002000, 0x1008020800002001
648 .quad 0x1000000800000001, 0x0000000000000000
649 .quad 0x0008000800000000, 0x1008000000000001
650 .quad 0x1000000000000001, 0x0000020000002000
651 .quad 0x0008000000000000, 0x1008020000002001
652 .quad 0x0000000800000000, 0x0008000000000000
653 .quad 0x1000020000002001, 0x0000020800002000
654 .quad 0x1008000800000001, 0x1000000000000001
655 .quad 0x0000020800002000, 0x0008000800000000
656 .quad 0x0000020000002000, 0x0008020800002000
657 .quad 0x1008020800002001, 0x1000000800000001
658 .quad 0x0008000800000000, 0x1008000000000001
659 .quad 0x0008020000002000, 0x1008020800002001
660 .quad 0x1000000800000001, 0x0000000000000000
661 .quad 0x0000000000000000, 0x0008020000002000
662 .quad 0x0000020800002000, 0x0008000800000000
663 .quad 0x1008000800000001, 0x1000000000000001
664 .quad 0x1008020000002001, 0x1000020800002001
665 .quad 0x1000020800002001, 0x0000000800000000
666 .quad 0x1008020800002001, 0x1000000800000001
667 .quad 0x1000000000000001, 0x0000020000002000
668 .quad 0x1008000000000001, 0x1000020000002001
669 .quad 0x0008020800002000, 0x1008000800000001
670 .quad 0x1000020000002001, 0x0000020800002000
671 .quad 0x0008000000000000, 0x1008020000002001
672 .quad 0x0000000800000000, 0x0008000000000000
673 .quad 0x0000020000002000, 0x0008020800002000
674.L_s5:
675 .quad 0x0000001000000100, 0x0020001002080100
676 .quad 0x0020000002080000, 0x0420001002000100
677 .quad 0x0000000000080000, 0x0000001000000100
678 .quad 0x0400000000000000, 0x0020000002080000
679 .quad 0x0400001000080100, 0x0000000000080000
680 .quad 0x0020001002000100, 0x0400001000080100
681 .quad 0x0420001002000100, 0x0420000002080000
682 .quad 0x0000001000080100, 0x0400000000000000
683 .quad 0x0020000002000000, 0x0400000000080000
684 .quad 0x0400000000080000, 0x0000000000000000
685 .quad 0x0400001000000100, 0x0420001002080100
686 .quad 0x0420001002080100, 0x0020001002000100
687 .quad 0x0420000002080000, 0x0400001000000100
688 .quad 0x0000000000000000, 0x0420000002000000
689 .quad 0x0020001002080100, 0x0020000002000000
690 .quad 0x0420000002000000, 0x0000001000080100
691 .quad 0x0000000000080000, 0x0420001002000100
692 .quad 0x0000001000000100, 0x0020000002000000
693 .quad 0x0400000000000000, 0x0020000002080000
694 .quad 0x0420001002000100, 0x0400001000080100
695 .quad 0x0020001002000100, 0x0400000000000000
696 .quad 0x0420000002080000, 0x0020001002080100
697 .quad 0x0400001000080100, 0x0000001000000100
698 .quad 0x0020000002000000, 0x0420000002080000
699 .quad 0x0420001002080100, 0x0000001000080100
700 .quad 0x0420000002000000, 0x0420001002080100
701 .quad 0x0020000002080000, 0x0000000000000000
702 .quad 0x0400000000080000, 0x0420000002000000
703 .quad 0x0000001000080100, 0x0020001002000100
704 .quad 0x0400001000000100, 0x0000000000080000
705 .quad 0x0000000000000000, 0x0400000000080000
706 .quad 0x0020001002080100, 0x0400001000000100
707.L_s6:
708 .quad 0x0200000120000010, 0x0204000020000000
709 .quad 0x0000040000000000, 0x0204040120000010
710 .quad 0x0204000020000000, 0x0000000100000010
711 .quad 0x0204040120000010, 0x0004000000000000
712 .quad 0x0200040020000000, 0x0004040100000010
713 .quad 0x0004000000000000, 0x0200000120000010
714 .quad 0x0004000100000010, 0x0200040020000000
715 .quad 0x0200000020000000, 0x0000040100000010
716 .quad 0x0000000000000000, 0x0004000100000010
717 .quad 0x0200040120000010, 0x0000040000000000
718 .quad 0x0004040000000000, 0x0200040120000010
719 .quad 0x0000000100000010, 0x0204000120000010
720 .quad 0x0204000120000010, 0x0000000000000000
721 .quad 0x0004040100000010, 0x0204040020000000
722 .quad 0x0000040100000010, 0x0004040000000000
723 .quad 0x0204040020000000, 0x0200000020000000
724 .quad 0x0200040020000000, 0x0000000100000010
725 .quad 0x0204000120000010, 0x0004040000000000
726 .quad 0x0204040120000010, 0x0004000000000000
727 .quad 0x0000040100000010, 0x0200000120000010
728 .quad 0x0004000000000000, 0x0200040020000000
729 .quad 0x0200000020000000, 0x0000040100000010
730 .quad 0x0200000120000010, 0x0204040120000010
731 .quad 0x0004040000000000, 0x0204000020000000
732 .quad 0x0004040100000010, 0x0204040020000000
733 .quad 0x0000000000000000, 0x0204000120000010
734 .quad 0x0000000100000010, 0x0000040000000000
735 .quad 0x0204000020000000, 0x0004040100000010
736 .quad 0x0000040000000000, 0x0004000100000010
737 .quad 0x0200040120000010, 0x0000000000000000
738 .quad 0x0204040020000000, 0x0200000020000000
739 .quad 0x0004000100000010, 0x0200040120000010
740.L_s7:
741 .quad 0x0002000000200000, 0x2002000004200002
742 .quad 0x2000000004000802, 0x0000000000000000
743 .quad 0x0000000000000800, 0x2000000004000802
744 .quad 0x2002000000200802, 0x0002000004200800
745 .quad 0x2002000004200802, 0x0002000000200000
746 .quad 0x0000000000000000, 0x2000000004000002
747 .quad 0x2000000000000002, 0x0000000004000000
748 .quad 0x2002000004200002, 0x2000000000000802
749 .quad 0x0000000004000800, 0x2002000000200802
750 .quad 0x2002000000200002, 0x0000000004000800
751 .quad 0x2000000004000002, 0x0002000004200000
752 .quad 0x0002000004200800, 0x2002000000200002
753 .quad 0x0002000004200000, 0x0000000000000800
754 .quad 0x2000000000000802, 0x2002000004200802
755 .quad 0x0002000000200800, 0x2000000000000002
756 .quad 0x0000000004000000, 0x0002000000200800
757 .quad 0x0000000004000000, 0x0002000000200800
758 .quad 0x0002000000200000, 0x2000000004000802
759 .quad 0x2000000004000802, 0x2002000004200002
760 .quad 0x2002000004200002, 0x2000000000000002
761 .quad 0x2002000000200002, 0x0000000004000000
762 .quad 0x0000000004000800, 0x0002000000200000
763 .quad 0x0002000004200800, 0x2000000000000802
764 .quad 0x2002000000200802, 0x0002000004200800
765 .quad 0x2000000000000802, 0x2000000004000002
766 .quad 0x2002000004200802, 0x0002000004200000
767 .quad 0x0002000000200800, 0x0000000000000000
768 .quad 0x2000000000000002, 0x2002000004200802
769 .quad 0x0000000000000000, 0x2002000000200802
770 .quad 0x0002000004200000, 0x0000000000000800
771 .quad 0x2000000004000002, 0x0000000004000800
772 .quad 0x0000000000000800, 0x2002000000200002
773.L_s8:
774 .quad 0x0100010410001000, 0x0000010000001000
775 .quad 0x0000000000040000, 0x0100010410041000
776 .quad 0x0100000010000000, 0x0100010410001000
777 .quad 0x0000000400000000, 0x0100000010000000
778 .quad 0x0000000400040000, 0x0100000010040000
779 .quad 0x0100010410041000, 0x0000010000041000
780 .quad 0x0100010010041000, 0x0000010400041000
781 .quad 0x0000010000001000, 0x0000000400000000
782 .quad 0x0100000010040000, 0x0100000410000000
783 .quad 0x0100010010001000, 0x0000010400001000
784 .quad 0x0000010000041000, 0x0000000400040000
785 .quad 0x0100000410040000, 0x0100010010041000
786 .quad 0x0000010400001000, 0x0000000000000000
787 .quad 0x0000000000000000, 0x0100000410040000
788 .quad 0x0100000410000000, 0x0100010010001000
789 .quad 0x0000010400041000, 0x0000000000040000
790 .quad 0x0000010400041000, 0x0000000000040000
791 .quad 0x0100010010041000, 0x0000010000001000
792 .quad 0x0000000400000000, 0x0100000410040000
793 .quad 0x0000010000001000, 0x0000010400041000
794 .quad 0x0100010010001000, 0x0000000400000000
795 .quad 0x0100000410000000, 0x0100000010040000
796 .quad 0x0100000410040000, 0x0100000010000000
797 .quad 0x0000000000040000, 0x0100010410001000
798 .quad 0x0000000000000000, 0x0100010410041000
799 .quad 0x0000000400040000, 0x0100000410000000
800 .quad 0x0100000010040000, 0x0100010010001000
801 .quad 0x0100010410001000, 0x0000000000000000
802 .quad 0x0100010410041000, 0x0000010000041000
803 .quad 0x0000010000041000, 0x0000010400001000
804 .quad 0x0000010400001000, 0x0000000400040000
805 .quad 0x0100000010000000, 0x0100010010041000
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
new file mode 100644
index 000000000000..0e9c0668fe4e
--- /dev/null
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -0,0 +1,509 @@
1/*
2 * Glue Code for assembler optimized version of 3DES
3 *
4 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
8 * CTR part based on code (crypto/ctr.c) by:
9 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 */
22
23#include <asm/processor.h>
24#include <crypto/des.h>
25#include <linux/crypto.h>
26#include <linux/init.h>
27#include <linux/module.h>
28#include <linux/types.h>
29#include <crypto/algapi.h>
30
31struct des3_ede_x86_ctx {
32 u32 enc_expkey[DES3_EDE_EXPKEY_WORDS];
33 u32 dec_expkey[DES3_EDE_EXPKEY_WORDS];
34};
35
36/* regular block cipher functions */
37asmlinkage void des3_ede_x86_64_crypt_blk(const u32 *expkey, u8 *dst,
38 const u8 *src);
39
40/* 3-way parallel cipher functions */
41asmlinkage void des3_ede_x86_64_crypt_blk_3way(const u32 *expkey, u8 *dst,
42 const u8 *src);
43
44static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
45 const u8 *src)
46{
47 u32 *enc_ctx = ctx->enc_expkey;
48
49 des3_ede_x86_64_crypt_blk(enc_ctx, dst, src);
50}
51
52static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
53 const u8 *src)
54{
55 u32 *dec_ctx = ctx->dec_expkey;
56
57 des3_ede_x86_64_crypt_blk(dec_ctx, dst, src);
58}
59
60static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
61 const u8 *src)
62{
63 u32 *enc_ctx = ctx->enc_expkey;
64
65 des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src);
66}
67
68static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
69 const u8 *src)
70{
71 u32 *dec_ctx = ctx->dec_expkey;
72
73 des3_ede_x86_64_crypt_blk_3way(dec_ctx, dst, src);
74}
75
76static void des3_ede_x86_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
77{
78 des3_ede_enc_blk(crypto_tfm_ctx(tfm), dst, src);
79}
80
81static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
82{
83 des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src);
84}
85
86static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
87 const u32 *expkey)
88{
89 unsigned int bsize = DES3_EDE_BLOCK_SIZE;
90 unsigned int nbytes;
91 int err;
92
93 err = blkcipher_walk_virt(desc, walk);
94
95 while ((nbytes = walk->nbytes)) {
96 u8 *wsrc = walk->src.virt.addr;
97 u8 *wdst = walk->dst.virt.addr;
98
99 /* Process four block batch */
100 if (nbytes >= bsize * 3) {
101 do {
102 des3_ede_x86_64_crypt_blk_3way(expkey, wdst,
103 wsrc);
104
105 wsrc += bsize * 3;
106 wdst += bsize * 3;
107 nbytes -= bsize * 3;
108 } while (nbytes >= bsize * 3);
109
110 if (nbytes < bsize)
111 goto done;
112 }
113
114 /* Handle leftovers */
115 do {
116 des3_ede_x86_64_crypt_blk(expkey, wdst, wsrc);
117
118 wsrc += bsize;
119 wdst += bsize;
120 nbytes -= bsize;
121 } while (nbytes >= bsize);
122
123done:
124 err = blkcipher_walk_done(desc, walk, nbytes);
125 }
126
127 return err;
128}
129
130static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
131 struct scatterlist *src, unsigned int nbytes)
132{
133 struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
134 struct blkcipher_walk walk;
135
136 blkcipher_walk_init(&walk, dst, src, nbytes);
137 return ecb_crypt(desc, &walk, ctx->enc_expkey);
138}
139
140static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
141 struct scatterlist *src, unsigned int nbytes)
142{
143 struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
144 struct blkcipher_walk walk;
145
146 blkcipher_walk_init(&walk, dst, src, nbytes);
147 return ecb_crypt(desc, &walk, ctx->dec_expkey);
148}
149
150static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
151 struct blkcipher_walk *walk)
152{
153 struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
154 unsigned int bsize = DES3_EDE_BLOCK_SIZE;
155 unsigned int nbytes = walk->nbytes;
156 u64 *src = (u64 *)walk->src.virt.addr;
157 u64 *dst = (u64 *)walk->dst.virt.addr;
158 u64 *iv = (u64 *)walk->iv;
159
160 do {
161 *dst = *src ^ *iv;
162 des3_ede_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
163 iv = dst;
164
165 src += 1;
166 dst += 1;
167 nbytes -= bsize;
168 } while (nbytes >= bsize);
169
170 *(u64 *)walk->iv = *iv;
171 return nbytes;
172}
173
174static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
175 struct scatterlist *src, unsigned int nbytes)
176{
177 struct blkcipher_walk walk;
178 int err;
179
180 blkcipher_walk_init(&walk, dst, src, nbytes);
181 err = blkcipher_walk_virt(desc, &walk);
182
183 while ((nbytes = walk.nbytes)) {
184 nbytes = __cbc_encrypt(desc, &walk);
185 err = blkcipher_walk_done(desc, &walk, nbytes);
186 }
187
188 return err;
189}
190
191static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
192 struct blkcipher_walk *walk)
193{
194 struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
195 unsigned int bsize = DES3_EDE_BLOCK_SIZE;
196 unsigned int nbytes = walk->nbytes;
197 u64 *src = (u64 *)walk->src.virt.addr;
198 u64 *dst = (u64 *)walk->dst.virt.addr;
199 u64 ivs[3 - 1];
200 u64 last_iv;
201
202 /* Start of the last block. */
203 src += nbytes / bsize - 1;
204 dst += nbytes / bsize - 1;
205
206 last_iv = *src;
207
208 /* Process four block batch */
209 if (nbytes >= bsize * 3) {
210 do {
211 nbytes -= bsize * 3 - bsize;
212 src -= 3 - 1;
213 dst -= 3 - 1;
214
215 ivs[0] = src[0];
216 ivs[1] = src[1];
217
218 des3_ede_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
219
220 dst[1] ^= ivs[0];
221 dst[2] ^= ivs[1];
222
223 nbytes -= bsize;
224 if (nbytes < bsize)
225 goto done;
226
227 *dst ^= *(src - 1);
228 src -= 1;
229 dst -= 1;
230 } while (nbytes >= bsize * 3);
231 }
232
233 /* Handle leftovers */
234 for (;;) {
235 des3_ede_dec_blk(ctx, (u8 *)dst, (u8 *)src);
236
237 nbytes -= bsize;
238 if (nbytes < bsize)
239 break;
240
241 *dst ^= *(src - 1);
242 src -= 1;
243 dst -= 1;
244 }
245
246done:
247 *dst ^= *(u64 *)walk->iv;
248 *(u64 *)walk->iv = last_iv;
249
250 return nbytes;
251}
252
253static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
254 struct scatterlist *src, unsigned int nbytes)
255{
256 struct blkcipher_walk walk;
257 int err;
258
259 blkcipher_walk_init(&walk, dst, src, nbytes);
260 err = blkcipher_walk_virt(desc, &walk);
261
262 while ((nbytes = walk.nbytes)) {
263 nbytes = __cbc_decrypt(desc, &walk);
264 err = blkcipher_walk_done(desc, &walk, nbytes);
265 }
266
267 return err;
268}
269
270static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
271 struct blkcipher_walk *walk)
272{
273 u8 *ctrblk = walk->iv;
274 u8 keystream[DES3_EDE_BLOCK_SIZE];
275 u8 *src = walk->src.virt.addr;
276 u8 *dst = walk->dst.virt.addr;
277 unsigned int nbytes = walk->nbytes;
278
279 des3_ede_enc_blk(ctx, keystream, ctrblk);
280 crypto_xor(keystream, src, nbytes);
281 memcpy(dst, keystream, nbytes);
282
283 crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
284}
285
286static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
287 struct blkcipher_walk *walk)
288{
289 struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
290 unsigned int bsize = DES3_EDE_BLOCK_SIZE;
291 unsigned int nbytes = walk->nbytes;
292 __be64 *src = (__be64 *)walk->src.virt.addr;
293 __be64 *dst = (__be64 *)walk->dst.virt.addr;
294 u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
295 __be64 ctrblocks[3];
296
297 /* Process four block batch */
298 if (nbytes >= bsize * 3) {
299 do {
300 /* create ctrblks for parallel encrypt */
301 ctrblocks[0] = cpu_to_be64(ctrblk++);
302 ctrblocks[1] = cpu_to_be64(ctrblk++);
303 ctrblocks[2] = cpu_to_be64(ctrblk++);
304
305 des3_ede_enc_blk_3way(ctx, (u8 *)ctrblocks,
306 (u8 *)ctrblocks);
307
308 dst[0] = src[0] ^ ctrblocks[0];
309 dst[1] = src[1] ^ ctrblocks[1];
310 dst[2] = src[2] ^ ctrblocks[2];
311
312 src += 3;
313 dst += 3;
314 } while ((nbytes -= bsize * 3) >= bsize * 3);
315
316 if (nbytes < bsize)
317 goto done;
318 }
319
320 /* Handle leftovers */
321 do {
322 ctrblocks[0] = cpu_to_be64(ctrblk++);
323
324 des3_ede_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
325
326 dst[0] = src[0] ^ ctrblocks[0];
327
328 src += 1;
329 dst += 1;
330 } while ((nbytes -= bsize) >= bsize);
331
332done:
333 *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
334 return nbytes;
335}
336
337static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
338 struct scatterlist *src, unsigned int nbytes)
339{
340 struct blkcipher_walk walk;
341 int err;
342
343 blkcipher_walk_init(&walk, dst, src, nbytes);
344 err = blkcipher_walk_virt_block(desc, &walk, DES3_EDE_BLOCK_SIZE);
345
346 while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) {
347 nbytes = __ctr_crypt(desc, &walk);
348 err = blkcipher_walk_done(desc, &walk, nbytes);
349 }
350
351 if (walk.nbytes) {
352 ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
353 err = blkcipher_walk_done(desc, &walk, 0);
354 }
355
356 return err;
357}
358
359static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key,
360 unsigned int keylen)
361{
362 struct des3_ede_x86_ctx *ctx = crypto_tfm_ctx(tfm);
363 u32 i, j, tmp;
364 int err;
365
366 /* Generate encryption context using generic implementation. */
367 err = __des3_ede_setkey(ctx->enc_expkey, &tfm->crt_flags, key, keylen);
368 if (err < 0)
369 return err;
370
371 /* Fix encryption context for this implementation and form decryption
372 * context. */
373 j = DES3_EDE_EXPKEY_WORDS - 2;
374 for (i = 0; i < DES3_EDE_EXPKEY_WORDS; i += 2, j -= 2) {
375 tmp = ror32(ctx->enc_expkey[i + 1], 4);
376 ctx->enc_expkey[i + 1] = tmp;
377
378 ctx->dec_expkey[j + 0] = ctx->enc_expkey[i + 0];
379 ctx->dec_expkey[j + 1] = tmp;
380 }
381
382 return 0;
383}
384
385static struct crypto_alg des3_ede_algs[4] = { {
386 .cra_name = "des3_ede",
387 .cra_driver_name = "des3_ede-asm",
388 .cra_priority = 200,
389 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
390 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
391 .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
392 .cra_alignmask = 0,
393 .cra_module = THIS_MODULE,
394 .cra_u = {
395 .cipher = {
396 .cia_min_keysize = DES3_EDE_KEY_SIZE,
397 .cia_max_keysize = DES3_EDE_KEY_SIZE,
398 .cia_setkey = des3_ede_x86_setkey,
399 .cia_encrypt = des3_ede_x86_encrypt,
400 .cia_decrypt = des3_ede_x86_decrypt,
401 }
402 }
403}, {
404 .cra_name = "ecb(des3_ede)",
405 .cra_driver_name = "ecb-des3_ede-asm",
406 .cra_priority = 300,
407 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
408 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
409 .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
410 .cra_alignmask = 0,
411 .cra_type = &crypto_blkcipher_type,
412 .cra_module = THIS_MODULE,
413 .cra_u = {
414 .blkcipher = {
415 .min_keysize = DES3_EDE_KEY_SIZE,
416 .max_keysize = DES3_EDE_KEY_SIZE,
417 .setkey = des3_ede_x86_setkey,
418 .encrypt = ecb_encrypt,
419 .decrypt = ecb_decrypt,
420 },
421 },
422}, {
423 .cra_name = "cbc(des3_ede)",
424 .cra_driver_name = "cbc-des3_ede-asm",
425 .cra_priority = 300,
426 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
427 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
428 .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
429 .cra_alignmask = 0,
430 .cra_type = &crypto_blkcipher_type,
431 .cra_module = THIS_MODULE,
432 .cra_u = {
433 .blkcipher = {
434 .min_keysize = DES3_EDE_KEY_SIZE,
435 .max_keysize = DES3_EDE_KEY_SIZE,
436 .ivsize = DES3_EDE_BLOCK_SIZE,
437 .setkey = des3_ede_x86_setkey,
438 .encrypt = cbc_encrypt,
439 .decrypt = cbc_decrypt,
440 },
441 },
442}, {
443 .cra_name = "ctr(des3_ede)",
444 .cra_driver_name = "ctr-des3_ede-asm",
445 .cra_priority = 300,
446 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
447 .cra_blocksize = 1,
448 .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
449 .cra_alignmask = 0,
450 .cra_type = &crypto_blkcipher_type,
451 .cra_module = THIS_MODULE,
452 .cra_u = {
453 .blkcipher = {
454 .min_keysize = DES3_EDE_KEY_SIZE,
455 .max_keysize = DES3_EDE_KEY_SIZE,
456 .ivsize = DES3_EDE_BLOCK_SIZE,
457 .setkey = des3_ede_x86_setkey,
458 .encrypt = ctr_crypt,
459 .decrypt = ctr_crypt,
460 },
461 },
462} };
463
464static bool is_blacklisted_cpu(void)
465{
466 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
467 return false;
468
469 if (boot_cpu_data.x86 == 0x0f) {
470 /*
471 * On Pentium 4, des3_ede-x86_64 is slower than generic C
472 * implementation because use of 64bit rotates (which are really
473 * slow on P4). Therefore blacklist P4s.
474 */
475 return true;
476 }
477
478 return false;
479}
480
481static int force;
482module_param(force, int, 0);
483MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
484
485static int __init des3_ede_x86_init(void)
486{
487 if (!force && is_blacklisted_cpu()) {
488 pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n");
489 return -ENODEV;
490 }
491
492 return crypto_register_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
493}
494
495static void __exit des3_ede_x86_fini(void)
496{
497 crypto_unregister_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
498}
499
500module_init(des3_ede_x86_init);
501module_exit(des3_ede_x86_fini);
502
503MODULE_LICENSE("GPL");
504MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized");
505MODULE_ALIAS("des3_ede");
506MODULE_ALIAS("des3_ede-asm");
507MODULE_ALIAS("des");
508MODULE_ALIAS("des-asm");
509MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>");
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 19b0ebafcd3e..79752f2bdec5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -99,7 +99,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
99{ 99{
100 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); 100 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
101 101
102 alternative_io("movl %0, %1", "xchgl %0, %1", X86_FEATURE_11AP, 102 alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP,
103 ASM_OUTPUT2("=r" (v), "=m" (*addr)), 103 ASM_OUTPUT2("=r" (v), "=m" (*addr)),
104 ASM_OUTPUT2("0" (v), "m" (*addr))); 104 ASM_OUTPUT2("0" (v), "m" (*addr)));
105} 105}
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 5c7198cca5ed..0f4460b5636d 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -99,7 +99,7 @@
99#if defined(CONFIG_X86_PPRO_FENCE) 99#if defined(CONFIG_X86_PPRO_FENCE)
100 100
101/* 101/*
102 * For either of these options x86 doesn't have a strong TSO memory 102 * For this option x86 doesn't have a strong TSO memory
103 * model and we should fall back to full barriers. 103 * model and we should fall back to full barriers.
104 */ 104 */
105 105
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index d47786acb016..99c105d78b7e 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -4,6 +4,8 @@
4#include <linux/compiler.h> 4#include <linux/compiler.h>
5#include <asm/alternative.h> /* Provides LOCK_PREFIX */ 5#include <asm/alternative.h> /* Provides LOCK_PREFIX */
6 6
7#define __HAVE_ARCH_CMPXCHG 1
8
7/* 9/*
8 * Non-existant functions to indicate usage errors at link time 10 * Non-existant functions to indicate usage errors at link time
9 * (or compile-time if the compiler implements __compiletime_error(). 11 * (or compile-time if the compiler implements __compiletime_error().
@@ -143,7 +145,6 @@ extern void __add_wrong_size(void)
143# include <asm/cmpxchg_64.h> 145# include <asm/cmpxchg_64.h>
144#endif 146#endif
145 147
146#ifdef __HAVE_ARCH_CMPXCHG
147#define cmpxchg(ptr, old, new) \ 148#define cmpxchg(ptr, old, new) \
148 __cmpxchg(ptr, old, new, sizeof(*(ptr))) 149 __cmpxchg(ptr, old, new, sizeof(*(ptr)))
149 150
@@ -152,7 +153,6 @@ extern void __add_wrong_size(void)
152 153
153#define cmpxchg_local(ptr, old, new) \ 154#define cmpxchg_local(ptr, old, new) \
154 __cmpxchg_local(ptr, old, new, sizeof(*(ptr))) 155 __cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
155#endif
156 156
157/* 157/*
158 * xadd() adds "inc" to "*ptr" and atomically returns the previous 158 * xadd() adds "inc" to "*ptr" and atomically returns the previous
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index f8bf2eecab86..f7e142926481 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -34,8 +34,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 value)
34 : "memory"); 34 : "memory");
35} 35}
36 36
37#define __HAVE_ARCH_CMPXCHG 1
38
39#ifdef CONFIG_X86_CMPXCHG64 37#ifdef CONFIG_X86_CMPXCHG64
40#define cmpxchg64(ptr, o, n) \ 38#define cmpxchg64(ptr, o, n) \
41 ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \ 39 ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 614be87f1a9b..1af94697aae5 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -6,8 +6,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
6 *ptr = val; 6 *ptr = val;
7} 7}
8 8
9#define __HAVE_ARCH_CMPXCHG 1
10
11#define cmpxchg64(ptr, o, n) \ 9#define cmpxchg64(ptr, o, n) \
12({ \ 10({ \
13 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 11 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index e265ff95d16d..bb9b258d60e7 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -8,7 +8,7 @@
8#include <asm/required-features.h> 8#include <asm/required-features.h>
9#endif 9#endif
10 10
11#define NCAPINTS 10 /* N 32-bit words worth of info */ 11#define NCAPINTS 11 /* N 32-bit words worth of info */
12#define NBUGINTS 1 /* N 32-bit bug flags */ 12#define NBUGINTS 1 /* N 32-bit bug flags */
13 13
14/* 14/*
@@ -18,213 +18,218 @@
18 */ 18 */
19 19
20/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ 20/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
21#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ 21#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
22#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */ 22#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
23#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ 23#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
24#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ 24#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
25#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ 25#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
26#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */ 26#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
27#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ 27#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
28#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Exception */ 28#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
29#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ 29#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
30#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ 30#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
31#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ 31#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
32#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ 32#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
33#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ 33#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
34#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ 34#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
35#define X86_FEATURE_CMOV (0*32+15) /* CMOV instructions */ 35#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
36 /* (plus FCMOVcc, FCOMI with FPU) */ 36 /* (plus FCMOVcc, FCOMI with FPU) */
37#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ 37#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
38#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ 38#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
39#define X86_FEATURE_PN (0*32+18) /* Processor serial number */ 39#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
40#define X86_FEATURE_CLFLUSH (0*32+19) /* CLFLUSH instruction */ 40#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
41#define X86_FEATURE_DS (0*32+21) /* "dts" Debug Store */ 41#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
42#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ 42#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
43#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ 43#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
44#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ 44#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
45#define X86_FEATURE_XMM (0*32+25) /* "sse" */ 45#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
46#define X86_FEATURE_XMM2 (0*32+26) /* "sse2" */ 46#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
47#define X86_FEATURE_SELFSNOOP (0*32+27) /* "ss" CPU self snoop */ 47#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
48#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */ 48#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
49#define X86_FEATURE_ACC (0*32+29) /* "tm" Automatic clock control */ 49#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
50#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ 50#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
51#define X86_FEATURE_PBE (0*32+31) /* Pending Break Enable */ 51#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
52 52
53/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ 53/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
54/* Don't duplicate feature flags which are redundant with Intel! */ 54/* Don't duplicate feature flags which are redundant with Intel! */
55#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ 55#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
56#define X86_FEATURE_MP (1*32+19) /* MP Capable. */ 56#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
57#define X86_FEATURE_NX (1*32+20) /* Execute Disable */ 57#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
58#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ 58#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
59#define X86_FEATURE_FXSR_OPT (1*32+25) /* FXSAVE/FXRSTOR optimizations */ 59#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
60#define X86_FEATURE_GBPAGES (1*32+26) /* "pdpe1gb" GB pages */ 60#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
61#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ 61#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
62#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ 62#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
63#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ 63#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
64#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ 64#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
65 65
66/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ 66/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
67#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */ 67#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
68#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */ 68#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
69#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */ 69#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
70 70
71/* Other features, Linux-defined mapping, word 3 */ 71/* Other features, Linux-defined mapping, word 3 */
72/* This range is used for feature bits which conflict or are synthesized */ 72/* This range is used for feature bits which conflict or are synthesized */
73#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */ 73#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
74#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */ 74#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
75#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ 75#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
76#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ 76#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
77/* cpu types for specific tunings: */ 77/* cpu types for specific tunings: */
78#define X86_FEATURE_K8 (3*32+ 4) /* "" Opteron, Athlon64 */ 78#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
79#define X86_FEATURE_K7 (3*32+ 5) /* "" Athlon */ 79#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
80#define X86_FEATURE_P3 (3*32+ 6) /* "" P3 */ 80#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
81#define X86_FEATURE_P4 (3*32+ 7) /* "" P4 */ 81#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
82#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */ 82#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
83#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ 83#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
84#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */ 84/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */
85#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ 85#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
86#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ 86#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
87#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ 87#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
88#define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */ 88#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
89#define X86_FEATURE_SYSENTER32 (3*32+15) /* "" sysenter in ia32 userspace */ 89#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
90#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well */ 90#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
91#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* "" Mfence synchronizes RDTSC */ 91#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
92#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */ 92#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
93#define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */ 93/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
94#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ 94#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
95#define X86_FEATURE_ALWAYS (3*32+21) /* "" Always-present feature */ 95#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
96#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ 96#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
97#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ 97#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
98#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ 98#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
99#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ 99/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
100#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ 100#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
101#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ 101#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
102#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ 102#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
103#define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */ 103#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
104#define X86_FEATURE_NONSTOP_TSC_S3 (3*32+30) /* TSC doesn't stop in S3 state */ 104#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
105 105
106/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 106/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
107#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ 107#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
108#define X86_FEATURE_PCLMULQDQ (4*32+ 1) /* PCLMULQDQ instruction */ 108#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
109#define X86_FEATURE_DTES64 (4*32+ 2) /* 64-bit Debug Store */ 109#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
110#define X86_FEATURE_MWAIT (4*32+ 3) /* "monitor" Monitor/Mwait support */ 110#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
111#define X86_FEATURE_DSCPL (4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ 111#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
112#define X86_FEATURE_VMX (4*32+ 5) /* Hardware virtualization */ 112#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
113#define X86_FEATURE_SMX (4*32+ 6) /* Safer mode */ 113#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
114#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */ 114#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
115#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */ 115#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
116#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ 116#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
117#define X86_FEATURE_CID (4*32+10) /* Context ID */ 117#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
118#define X86_FEATURE_FMA (4*32+12) /* Fused multiply-add */ 118#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
119#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ 119#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
120#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ 120#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
121#define X86_FEATURE_PDCM (4*32+15) /* Performance Capabilities */ 121#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
122#define X86_FEATURE_PCID (4*32+17) /* Process Context Identifiers */ 122#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
123#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ 123#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
124#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ 124#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
125#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ 125#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
126#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ 126#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
127#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ 127#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
128#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ 128#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
129#define X86_FEATURE_TSC_DEADLINE_TIMER (4*32+24) /* Tsc deadline timer */ 129#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
130#define X86_FEATURE_AES (4*32+25) /* AES instructions */ 130#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
131#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ 131#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
132#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ 132#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
133#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ 133#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
134#define X86_FEATURE_F16C (4*32+29) /* 16-bit fp conversions */ 134#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
135#define X86_FEATURE_RDRAND (4*32+30) /* The RDRAND instruction */ 135#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
136#define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */ 136#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
137 137
138/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ 138/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
139#define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */ 139#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
140#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* "rng_en" RNG enabled */ 140#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
141#define X86_FEATURE_XCRYPT (5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ 141#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
142#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* "ace_en" on-CPU crypto enabled */ 142#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
143#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */ 143#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
144#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */ 144#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
145#define X86_FEATURE_PHE (5*32+10) /* PadLock Hash Engine */ 145#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
146#define X86_FEATURE_PHE_EN (5*32+11) /* PHE enabled */ 146#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
147#define X86_FEATURE_PMM (5*32+12) /* PadLock Montgomery Multiplier */ 147#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
148#define X86_FEATURE_PMM_EN (5*32+13) /* PMM enabled */ 148#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
149 149
150/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ 150/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
151#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ 151#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
152#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ 152#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
153#define X86_FEATURE_SVM (6*32+ 2) /* Secure virtual machine */ 153#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
154#define X86_FEATURE_EXTAPIC (6*32+ 3) /* Extended APIC space */ 154#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
155#define X86_FEATURE_CR8_LEGACY (6*32+ 4) /* CR8 in 32-bit mode */ 155#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
156#define X86_FEATURE_ABM (6*32+ 5) /* Advanced bit manipulation */ 156#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
157#define X86_FEATURE_SSE4A (6*32+ 6) /* SSE-4A */ 157#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
158#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */ 158#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
159#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */ 159#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
160#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */ 160#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
161#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */ 161#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
162#define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */ 162#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
163#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ 163#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
164#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ 164#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
165#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */ 165#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
166#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */ 166#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
167#define X86_FEATURE_TCE (6*32+17) /* translation cache extension */ 167#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
168#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ 168#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
169#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ 169#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
170#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ 170#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
171#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ 171#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
172#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */ 172#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
173#define X86_FEATURE_PERFCTR_L2 (6*32+28) /* L2 performance counter extensions */ 173#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
174 174
175/* 175/*
176 * Auxiliary flags: Linux defined - For features scattered in various 176 * Auxiliary flags: Linux defined - For features scattered in various
177 * CPUID levels like 0x6, 0xA etc, word 7 177 * CPUID levels like 0x6, 0xA etc, word 7
178 */ 178 */
179#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ 179#define X86_FEATURE_IDA ( 7*32+ 0) /* Intel Dynamic Acceleration */
180#define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ 180#define X86_FEATURE_ARAT ( 7*32+ 1) /* Always Running APIC Timer */
181#define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */ 181#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
182#define X86_FEATURE_EPB (7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ 182#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
183#define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ 183#define X86_FEATURE_PLN ( 7*32+ 5) /* Intel Power Limit Notification */
184#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ 184#define X86_FEATURE_PTS ( 7*32+ 6) /* Intel Package Thermal Status */
185#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ 185#define X86_FEATURE_DTHERM ( 7*32+ 7) /* Digital Thermal Sensor */
186#define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */ 186#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
187#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */ 187#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
188#define X86_FEATURE_PROC_FEEDBACK (7*32+ 9) /* AMD ProcFeedbackInterface */
189 188
190/* Virtualization flags: Linux defined, word 8 */ 189/* Virtualization flags: Linux defined, word 8 */
191#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ 190#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
192#define X86_FEATURE_VNMI (8*32+ 1) /* Intel Virtual NMI */ 191#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
193#define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */ 192#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
194#define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */ 193#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
195#define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */ 194#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
196#define X86_FEATURE_NPT (8*32+ 5) /* AMD Nested Page Table support */ 195#define X86_FEATURE_NPT ( 8*32+ 5) /* AMD Nested Page Table support */
197#define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */ 196#define X86_FEATURE_LBRV ( 8*32+ 6) /* AMD LBR Virtualization support */
198#define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */ 197#define X86_FEATURE_SVML ( 8*32+ 7) /* "svm_lock" AMD SVM locking MSR */
199#define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */ 198#define X86_FEATURE_NRIPS ( 8*32+ 8) /* "nrip_save" AMD SVM next_rip save */
200#define X86_FEATURE_TSCRATEMSR (8*32+ 9) /* "tsc_scale" AMD TSC scaling support */ 199#define X86_FEATURE_TSCRATEMSR ( 8*32+ 9) /* "tsc_scale" AMD TSC scaling support */
201#define X86_FEATURE_VMCBCLEAN (8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */ 200#define X86_FEATURE_VMCBCLEAN ( 8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */
202#define X86_FEATURE_FLUSHBYASID (8*32+11) /* AMD flush-by-ASID support */ 201#define X86_FEATURE_FLUSHBYASID ( 8*32+11) /* AMD flush-by-ASID support */
203#define X86_FEATURE_DECODEASSISTS (8*32+12) /* AMD Decode Assists support */ 202#define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */
204#define X86_FEATURE_PAUSEFILTER (8*32+13) /* AMD filtered pause intercept */ 203#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
205#define X86_FEATURE_PFTHRESHOLD (8*32+14) /* AMD pause filter threshold */ 204#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
206 205
207 206
208/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ 207/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
209#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ 208#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
210#define X86_FEATURE_TSC_ADJUST (9*32+ 1) /* TSC adjustment MSR 0x3b */ 209#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
211#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */ 210#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
212#define X86_FEATURE_HLE (9*32+ 4) /* Hardware Lock Elision */ 211#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
213#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ 212#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
214#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ 213#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
215#define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */ 214#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
216#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ 215#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
217#define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */ 216#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
218#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ 217#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
219#define X86_FEATURE_MPX (9*32+14) /* Memory Protection Extension */ 218#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
220#define X86_FEATURE_AVX512F (9*32+16) /* AVX-512 Foundation */ 219#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
221#define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ 220#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
222#define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ 221#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
223#define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */ 222#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
224#define X86_FEATURE_CLFLUSHOPT (9*32+23) /* CLFLUSHOPT instruction */ 223#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
225#define X86_FEATURE_AVX512PF (9*32+26) /* AVX-512 Prefetch */ 224#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
226#define X86_FEATURE_AVX512ER (9*32+27) /* AVX-512 Exponential and Reciprocal */ 225#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
227#define X86_FEATURE_AVX512CD (9*32+28) /* AVX-512 Conflict Detection */ 226#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
227
228/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
229#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
230#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
231#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
232#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
228 233
229/* 234/*
230 * BUG word(s) 235 * BUG word(s)
@@ -234,8 +239,11 @@
234#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ 239#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
235#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ 240#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
236#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ 241#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
237#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* AMD Erratum 383 */ 242#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
238#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* AMD Erratum 400 */ 243#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
244#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
245#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
246#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
239 247
240#if defined(__KERNEL__) && !defined(__ASSEMBLY__) 248#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
241 249
@@ -245,6 +253,12 @@
245extern const char * const x86_cap_flags[NCAPINTS*32]; 253extern const char * const x86_cap_flags[NCAPINTS*32];
246extern const char * const x86_power_flags[32]; 254extern const char * const x86_power_flags[32];
247 255
256/*
257 * In order to save room, we index into this array by doing
258 * X86_BUG_<name> - NCAPINTS*32.
259 */
260extern const char * const x86_bug_flags[NBUGINTS*32];
261
248#define test_cpu_cap(c, bit) \ 262#define test_cpu_cap(c, bit) \
249 test_bit(bit, (unsigned long *)((c)->x86_capability)) 263 test_bit(bit, (unsigned long *)((c)->x86_capability))
250 264
@@ -301,7 +315,6 @@ extern const char * const x86_power_flags[32];
301#define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) 315#define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX)
302#define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) 316#define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2)
303#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) 317#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
304#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP)
305#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) 318#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)
306#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR) 319#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR)
307#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR) 320#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR)
@@ -328,6 +341,7 @@ extern const char * const x86_power_flags[32];
328#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) 341#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
329#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) 342#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
330#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT) 343#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT)
344#define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES)
331#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) 345#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
332#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) 346#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
333#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) 347#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
@@ -347,9 +361,6 @@ extern const char * const x86_power_flags[32];
347#undef cpu_has_pae 361#undef cpu_has_pae
348#define cpu_has_pae ___BUG___ 362#define cpu_has_pae ___BUG___
349 363
350#undef cpu_has_mp
351#define cpu_has_mp 1
352
353#undef cpu_has_k6_mtrr 364#undef cpu_has_k6_mtrr
354#define cpu_has_k6_mtrr 0 365#define cpu_has_k6_mtrr 0
355 366
@@ -539,20 +550,20 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
539#define static_cpu_has_safe(bit) boot_cpu_has(bit) 550#define static_cpu_has_safe(bit) boot_cpu_has(bit)
540#endif 551#endif
541 552
542#define cpu_has_bug(c, bit) cpu_has(c, (bit)) 553#define cpu_has_bug(c, bit) cpu_has(c, (bit))
543#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) 554#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit))
544#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit)); 555#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit))
545 556
546#define static_cpu_has_bug(bit) static_cpu_has((bit)) 557#define static_cpu_has_bug(bit) static_cpu_has((bit))
547#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) 558#define static_cpu_has_bug_safe(bit) static_cpu_has_safe((bit))
559#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
548 560
549#define MAX_CPU_FEATURES (NCAPINTS * 32) 561#define MAX_CPU_FEATURES (NCAPINTS * 32)
550#define cpu_have_feature boot_cpu_has 562#define cpu_have_feature boot_cpu_has
551 563
552#define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X" 564#define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X"
553#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ 565#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
554 boot_cpu_data.x86_model 566 boot_cpu_data.x86_model
555 567
556#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ 568#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
557
558#endif /* _ASM_X86_CPUFEATURE_H */ 569#endif /* _ASM_X86_CPUFEATURE_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 1eb5f6433ad8..044a2fd3c5fe 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -104,6 +104,8 @@ extern void __init runtime_code_page_mkexec(void);
104extern void __init efi_runtime_mkexec(void); 104extern void __init efi_runtime_mkexec(void);
105extern void __init efi_dump_pagetable(void); 105extern void __init efi_dump_pagetable(void);
106extern void __init efi_apply_memmap_quirks(void); 106extern void __init efi_apply_memmap_quirks(void);
107extern int __init efi_reuse_config(u64 tables, int nr_tables);
108extern void efi_delete_dummy_variable(void);
107 109
108struct efi_setup_data { 110struct efi_setup_data {
109 u64 fw_vendor; 111 u64 fw_vendor;
@@ -156,6 +158,33 @@ static inline efi_status_t efi_thunk_set_virtual_address_map(
156 return EFI_SUCCESS; 158 return EFI_SUCCESS;
157} 159}
158#endif /* CONFIG_EFI_MIXED */ 160#endif /* CONFIG_EFI_MIXED */
161
162
163/* arch specific definitions used by the stub code */
164
165struct efi_config {
166 u64 image_handle;
167 u64 table;
168 u64 allocate_pool;
169 u64 allocate_pages;
170 u64 get_memory_map;
171 u64 free_pool;
172 u64 free_pages;
173 u64 locate_handle;
174 u64 handle_protocol;
175 u64 exit_boot_services;
176 u64 text_output;
177 efi_status_t (*call)(unsigned long, ...);
178 bool is64;
179} __packed;
180
181extern struct efi_config *efi_early;
182
183#define efi_call_early(f, ...) \
184 efi_early->call(efi_early->f, __VA_ARGS__);
185
186extern bool efi_reboot_required(void);
187
159#else 188#else
160/* 189/*
161 * IF EFI is not configured, have the EFI calls return -ENOSYS. 190 * IF EFI is not configured, have the EFI calls return -ENOSYS.
@@ -168,6 +197,10 @@ static inline efi_status_t efi_thunk_set_virtual_address_map(
168#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS) 197#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS)
169#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS) 198#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS)
170static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} 199static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
200static inline bool efi_reboot_required(void)
201{
202 return false;
203}
171#endif /* CONFIG_EFI */ 204#endif /* CONFIG_EFI */
172 205
173#endif /* _ASM_X86_EFI_H */ 206#endif /* _ASM_X86_EFI_H */
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 115e3689cd53..e3b85422cf12 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -293,7 +293,7 @@ static inline int restore_fpu_checking(struct task_struct *tsk)
293 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception 293 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
294 is pending. Clear the x87 state here by setting it to fixed 294 is pending. Clear the x87 state here by setting it to fixed
295 values. "m" is a random variable that should be in L1 */ 295 values. "m" is a random variable that should be in L1 */
296 if (unlikely(static_cpu_has_safe(X86_FEATURE_FXSAVE_LEAK))) { 296 if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
297 asm volatile( 297 asm volatile(
298 "fnclex\n\t" 298 "fnclex\n\t"
299 "emms\n\t" 299 "emms\n\t"
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 0525a8bdf65d..e1f7fecaa7d6 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -68,6 +68,8 @@ struct dyn_arch_ftrace {
68 68
69int ftrace_int3_handler(struct pt_regs *regs); 69int ftrace_int3_handler(struct pt_regs *regs);
70 70
71#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR
72
71#endif /* CONFIG_DYNAMIC_FTRACE */ 73#endif /* CONFIG_DYNAMIC_FTRACE */
72#endif /* __ASSEMBLY__ */ 74#endif /* __ASSEMBLY__ */
73#endif /* CONFIG_FUNCTION_TRACER */ 75#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index bba3cf88e624..0a8b519226b8 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -129,7 +129,7 @@ static inline notrace unsigned long arch_local_irq_save(void)
129 129
130#define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ 130#define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */
131 131
132#define INTERRUPT_RETURN iretq 132#define INTERRUPT_RETURN jmp native_iret
133#define USERGS_SYSRET64 \ 133#define USERGS_SYSRET64 \
134 swapgs; \ 134 swapgs; \
135 sysretq; 135 sysretq;
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index a04fe4eb237d..eb181178fe0b 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -37,6 +37,7 @@ struct x86_instruction_info {
37 u8 modrm_reg; /* index of register used */ 37 u8 modrm_reg; /* index of register used */
38 u8 modrm_rm; /* rm part of modrm */ 38 u8 modrm_rm; /* rm part of modrm */
39 u64 src_val; /* value of source operand */ 39 u64 src_val; /* value of source operand */
40 u64 dst_val; /* value of destination operand */
40 u8 src_bytes; /* size of source operand */ 41 u8 src_bytes; /* size of source operand */
41 u8 dst_bytes; /* size of destination operand */ 42 u8 dst_bytes; /* size of destination operand */
42 u8 ad_bytes; /* size of src/dst address */ 43 u8 ad_bytes; /* size of src/dst address */
@@ -194,6 +195,7 @@ struct x86_emulate_ops {
194 int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); 195 int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
195 int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); 196 int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
196 int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); 197 int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
198 int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc);
197 int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata); 199 int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata);
198 void (*halt)(struct x86_emulate_ctxt *ctxt); 200 void (*halt)(struct x86_emulate_ctxt *ctxt);
199 void (*wbinvd)(struct x86_emulate_ctxt *ctxt); 201 void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
@@ -231,7 +233,7 @@ struct operand {
231 union { 233 union {
232 unsigned long val; 234 unsigned long val;
233 u64 val64; 235 u64 val64;
234 char valptr[sizeof(unsigned long) + 2]; 236 char valptr[sizeof(sse128_t)];
235 sse128_t vec_val; 237 sse128_t vec_val;
236 u64 mm_val; 238 u64 mm_val;
237 void *data; 239 void *data;
@@ -240,8 +242,8 @@ struct operand {
240 242
241struct fetch_cache { 243struct fetch_cache {
242 u8 data[15]; 244 u8 data[15];
243 unsigned long start; 245 u8 *ptr;
244 unsigned long end; 246 u8 *end;
245}; 247};
246 248
247struct read_cache { 249struct read_cache {
@@ -286,30 +288,36 @@ struct x86_emulate_ctxt {
286 u8 opcode_len; 288 u8 opcode_len;
287 u8 b; 289 u8 b;
288 u8 intercept; 290 u8 intercept;
289 u8 lock_prefix;
290 u8 rep_prefix;
291 u8 op_bytes; 291 u8 op_bytes;
292 u8 ad_bytes; 292 u8 ad_bytes;
293 u8 rex_prefix;
294 struct operand src; 293 struct operand src;
295 struct operand src2; 294 struct operand src2;
296 struct operand dst; 295 struct operand dst;
297 bool has_seg_override;
298 u8 seg_override;
299 u64 d;
300 int (*execute)(struct x86_emulate_ctxt *ctxt); 296 int (*execute)(struct x86_emulate_ctxt *ctxt);
301 int (*check_perm)(struct x86_emulate_ctxt *ctxt); 297 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
298 /*
299 * The following six fields are cleared together,
300 * the rest are initialized unconditionally in x86_decode_insn
301 * or elsewhere
302 */
303 bool rip_relative;
304 u8 rex_prefix;
305 u8 lock_prefix;
306 u8 rep_prefix;
307 /* bitmaps of registers in _regs[] that can be read */
308 u32 regs_valid;
309 /* bitmaps of registers in _regs[] that have been written */
310 u32 regs_dirty;
302 /* modrm */ 311 /* modrm */
303 u8 modrm; 312 u8 modrm;
304 u8 modrm_mod; 313 u8 modrm_mod;
305 u8 modrm_reg; 314 u8 modrm_reg;
306 u8 modrm_rm; 315 u8 modrm_rm;
307 u8 modrm_seg; 316 u8 modrm_seg;
308 bool rip_relative; 317 u8 seg_override;
318 u64 d;
309 unsigned long _eip; 319 unsigned long _eip;
310 struct operand memop; 320 struct operand memop;
311 u32 regs_valid; /* bitmaps of registers in _regs[] that can be read */
312 u32 regs_dirty; /* bitmaps of registers in _regs[] that have been written */
313 /* Fields above regs are cleared together. */ 321 /* Fields above regs are cleared together. */
314 unsigned long _regs[NR_VCPU_REGS]; 322 unsigned long _regs[NR_VCPU_REGS];
315 struct operand *memopp; 323 struct operand *memopp;
@@ -407,6 +415,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
407#define EMULATION_OK 0 415#define EMULATION_OK 0
408#define EMULATION_RESTART 1 416#define EMULATION_RESTART 1
409#define EMULATION_INTERCEPTED 2 417#define EMULATION_INTERCEPTED 2
418void init_decode_cache(struct x86_emulate_ctxt *ctxt);
410int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); 419int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
411int emulator_task_switch(struct x86_emulate_ctxt *ctxt, 420int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
412 u16 tss_selector, int idt_index, int reason, 421 u16 tss_selector, int idt_index, int reason,
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 49205d01b9ad..572460175ba5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -152,14 +152,16 @@ enum {
152 152
153#define DR6_BD (1 << 13) 153#define DR6_BD (1 << 13)
154#define DR6_BS (1 << 14) 154#define DR6_BS (1 << 14)
155#define DR6_FIXED_1 0xffff0ff0 155#define DR6_RTM (1 << 16)
156#define DR6_VOLATILE 0x0000e00f 156#define DR6_FIXED_1 0xfffe0ff0
157#define DR6_INIT 0xffff0ff0
158#define DR6_VOLATILE 0x0001e00f
157 159
158#define DR7_BP_EN_MASK 0x000000ff 160#define DR7_BP_EN_MASK 0x000000ff
159#define DR7_GE (1 << 9) 161#define DR7_GE (1 << 9)
160#define DR7_GD (1 << 13) 162#define DR7_GD (1 << 13)
161#define DR7_FIXED_1 0x00000400 163#define DR7_FIXED_1 0x00000400
162#define DR7_VOLATILE 0xffff23ff 164#define DR7_VOLATILE 0xffff2bff
163 165
164/* apic attention bits */ 166/* apic attention bits */
165#define KVM_APIC_CHECK_VAPIC 0 167#define KVM_APIC_CHECK_VAPIC 0
@@ -448,7 +450,7 @@ struct kvm_vcpu_arch {
448 u64 tsc_offset_adjustment; 450 u64 tsc_offset_adjustment;
449 u64 this_tsc_nsec; 451 u64 this_tsc_nsec;
450 u64 this_tsc_write; 452 u64 this_tsc_write;
451 u8 this_tsc_generation; 453 u64 this_tsc_generation;
452 bool tsc_catchup; 454 bool tsc_catchup;
453 bool tsc_always_catchup; 455 bool tsc_always_catchup;
454 s8 virtual_tsc_shift; 456 s8 virtual_tsc_shift;
@@ -591,7 +593,7 @@ struct kvm_arch {
591 u64 cur_tsc_nsec; 593 u64 cur_tsc_nsec;
592 u64 cur_tsc_write; 594 u64 cur_tsc_write;
593 u64 cur_tsc_offset; 595 u64 cur_tsc_offset;
594 u8 cur_tsc_generation; 596 u64 cur_tsc_generation;
595 int nr_vcpus_matched_tsc; 597 int nr_vcpus_matched_tsc;
596 598
597 spinlock_t pvclock_gtod_sync_lock; 599 spinlock_t pvclock_gtod_sync_lock;
@@ -717,7 +719,7 @@ struct kvm_x86_ops {
717 int (*handle_exit)(struct kvm_vcpu *vcpu); 719 int (*handle_exit)(struct kvm_vcpu *vcpu);
718 void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); 720 void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
719 void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); 721 void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
720 u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); 722 u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
721 void (*patch_hypercall)(struct kvm_vcpu *vcpu, 723 void (*patch_hypercall)(struct kvm_vcpu *vcpu,
722 unsigned char *hypercall_addr); 724 unsigned char *hypercall_addr);
723 void (*set_irq)(struct kvm_vcpu *vcpu); 725 void (*set_irq)(struct kvm_vcpu *vcpu);
@@ -1070,6 +1072,7 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
1070bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); 1072bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
1071int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); 1073int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
1072int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); 1074int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
1075int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc);
1073int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); 1076int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
1074void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); 1077void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
1075void kvm_deliver_pmi(struct kvm_vcpu *vcpu); 1078void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index a55c7efcc4ed..0f555cc31984 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -13,7 +13,7 @@
13#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ 13#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */
14#endif 14#endif
15 15
16#if defined(CONFIG_X86_32) && defined(__HAVE_ARCH_CMPXCHG) 16#if defined(CONFIG_X86_32)
17/* 17/*
18 * This lock provides nmi access to the CMOS/RTC registers. It has some 18 * This lock provides nmi access to the CMOS/RTC registers. It has some
19 * special properties. It is owned by a CPU and stores the index register 19 * special properties. It is owned by a CPU and stores the index register
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index be12c534fd59..166af2a8e865 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -3,6 +3,10 @@
3 3
4#include <asm/desc.h> 4#include <asm/desc.h>
5#include <linux/atomic.h> 5#include <linux/atomic.h>
6#include <linux/mm_types.h>
7
8#include <trace/events/tlb.h>
9
6#include <asm/pgalloc.h> 10#include <asm/pgalloc.h>
7#include <asm/tlbflush.h> 11#include <asm/tlbflush.h>
8#include <asm/paravirt.h> 12#include <asm/paravirt.h>
@@ -44,6 +48,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
44 48
45 /* Re-load page tables */ 49 /* Re-load page tables */
46 load_cr3(next->pgd); 50 load_cr3(next->pgd);
51 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
47 52
48 /* Stop flush ipis for the previous mm */ 53 /* Stop flush ipis for the previous mm */
49 cpumask_clear_cpu(cpu, mm_cpumask(prev)); 54 cpumask_clear_cpu(cpu, mm_cpumask(prev));
@@ -71,6 +76,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
71 * to make sure to use no freed page tables. 76 * to make sure to use no freed page tables.
72 */ 77 */
73 load_cr3(next->pgd); 78 load_cr3(next->pgd);
79 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
74 load_LDT_nolock(&next->context); 80 load_LDT_nolock(&next->context);
75 } 81 }
76 } 82 }
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
index 0208c3c2cbc6..85e6cda45a02 100644
--- a/arch/x86/include/asm/mutex_32.h
+++ b/arch/x86/include/asm/mutex_32.h
@@ -100,23 +100,11 @@ do { \
100static inline int __mutex_fastpath_trylock(atomic_t *count, 100static inline int __mutex_fastpath_trylock(atomic_t *count,
101 int (*fail_fn)(atomic_t *)) 101 int (*fail_fn)(atomic_t *))
102{ 102{
103 /* 103 /* cmpxchg because it never induces a false contention state. */
104 * We have two variants here. The cmpxchg based one is the best one
105 * because it never induce a false contention state. It is included
106 * here because architectures using the inc/dec algorithms over the
107 * xchg ones are much more likely to support cmpxchg natively.
108 *
109 * If not we fall back to the spinlock based variant - that is
110 * just as efficient (and simpler) as a 'destructive' probing of
111 * the mutex state would be.
112 */
113#ifdef __HAVE_ARCH_CMPXCHG
114 if (likely(atomic_cmpxchg(count, 1, 0) == 1)) 104 if (likely(atomic_cmpxchg(count, 1, 0) == 1))
115 return 1; 105 return 1;
106
116 return 0; 107 return 0;
117#else
118 return fail_fn(count);
119#endif
120} 108}
121 109
122#endif /* _ASM_X86_MUTEX_32_H */ 110#endif /* _ASM_X86_MUTEX_32_H */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 1da25a5f96f9..a1410db38a1a 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -43,7 +43,7 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
43static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) 43static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
44{ 44{
45 if (!current_set_polling_and_test()) { 45 if (!current_set_polling_and_test()) {
46 if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) { 46 if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) {
47 mb(); 47 mb();
48 clflush((void *)&current_thread_info()->flags); 48 clflush((void *)&current_thread_info()->flags);
49 mb(); 49 mb();
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 851bcdc5db04..fd472181a1d0 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -52,10 +52,9 @@
52 * Compared to the generic __my_cpu_offset version, the following 52 * Compared to the generic __my_cpu_offset version, the following
53 * saves one instruction and avoids clobbering a temp register. 53 * saves one instruction and avoids clobbering a temp register.
54 */ 54 */
55#define raw_cpu_ptr(ptr) \ 55#define arch_raw_cpu_ptr(ptr) \
56({ \ 56({ \
57 unsigned long tcp_ptr__; \ 57 unsigned long tcp_ptr__; \
58 __verify_pcpu_ptr(ptr); \
59 asm volatile("add " __percpu_arg(1) ", %0" \ 58 asm volatile("add " __percpu_arg(1) ", %0" \
60 : "=r" (tcp_ptr__) \ 59 : "=r" (tcp_ptr__) \
61 : "m" (this_cpu_off), "0" (ptr)); \ 60 : "m" (this_cpu_off), "0" (ptr)); \
diff --git a/arch/x86/include/asm/pmc_atom.h b/arch/x86/include/asm/pmc_atom.h
new file mode 100644
index 000000000000..fc7a17c05d35
--- /dev/null
+++ b/arch/x86/include/asm/pmc_atom.h
@@ -0,0 +1,107 @@
1/*
2 * Intel Atom SOC Power Management Controller Header File
3 * Copyright (c) 2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#ifndef PMC_ATOM_H
17#define PMC_ATOM_H
18
19/* ValleyView Power Control Unit PCI Device ID */
20#define PCI_DEVICE_ID_VLV_PMC 0x0F1C
21
22/* PMC Memory mapped IO registers */
23#define PMC_BASE_ADDR_OFFSET 0x44
24#define PMC_BASE_ADDR_MASK 0xFFFFFE00
25#define PMC_MMIO_REG_LEN 0x100
26#define PMC_REG_BIT_WIDTH 32
27
28/* BIOS uses FUNC_DIS to disable specific function */
29#define PMC_FUNC_DIS 0x34
30#define PMC_FUNC_DIS_2 0x38
31
32/* S0ix wake event control */
33#define PMC_S0IX_WAKE_EN 0x3C
34
35#define BIT_LPC_CLOCK_RUN BIT(4)
36#define BIT_SHARED_IRQ_GPSC BIT(5)
37#define BIT_ORED_DEDICATED_IRQ_GPSS BIT(18)
38#define BIT_ORED_DEDICATED_IRQ_GPSC BIT(19)
39#define BIT_SHARED_IRQ_GPSS BIT(20)
40
41#define PMC_WAKE_EN_SETTING ~(BIT_LPC_CLOCK_RUN | \
42 BIT_SHARED_IRQ_GPSC | \
43 BIT_ORED_DEDICATED_IRQ_GPSS | \
44 BIT_ORED_DEDICATED_IRQ_GPSC | \
45 BIT_SHARED_IRQ_GPSS)
46
47/* The timers acumulate time spent in sleep state */
48#define PMC_S0IR_TMR 0x80
49#define PMC_S0I1_TMR 0x84
50#define PMC_S0I2_TMR 0x88
51#define PMC_S0I3_TMR 0x8C
52#define PMC_S0_TMR 0x90
53/* Sleep state counter is in units of of 32us */
54#define PMC_TMR_SHIFT 5
55
56/* These registers reflect D3 status of functions */
57#define PMC_D3_STS_0 0xA0
58
59#define BIT_LPSS1_F0_DMA BIT(0)
60#define BIT_LPSS1_F1_PWM1 BIT(1)
61#define BIT_LPSS1_F2_PWM2 BIT(2)
62#define BIT_LPSS1_F3_HSUART1 BIT(3)
63#define BIT_LPSS1_F4_HSUART2 BIT(4)
64#define BIT_LPSS1_F5_SPI BIT(5)
65#define BIT_LPSS1_F6_XXX BIT(6)
66#define BIT_LPSS1_F7_XXX BIT(7)
67#define BIT_SCC_EMMC BIT(8)
68#define BIT_SCC_SDIO BIT(9)
69#define BIT_SCC_SDCARD BIT(10)
70#define BIT_SCC_MIPI BIT(11)
71#define BIT_HDA BIT(12)
72#define BIT_LPE BIT(13)
73#define BIT_OTG BIT(14)
74#define BIT_USH BIT(15)
75#define BIT_GBE BIT(16)
76#define BIT_SATA BIT(17)
77#define BIT_USB_EHCI BIT(18)
78#define BIT_SEC BIT(19)
79#define BIT_PCIE_PORT0 BIT(20)
80#define BIT_PCIE_PORT1 BIT(21)
81#define BIT_PCIE_PORT2 BIT(22)
82#define BIT_PCIE_PORT3 BIT(23)
83#define BIT_LPSS2_F0_DMA BIT(24)
84#define BIT_LPSS2_F1_I2C1 BIT(25)
85#define BIT_LPSS2_F2_I2C2 BIT(26)
86#define BIT_LPSS2_F3_I2C3 BIT(27)
87#define BIT_LPSS2_F4_I2C4 BIT(28)
88#define BIT_LPSS2_F5_I2C5 BIT(29)
89#define BIT_LPSS2_F6_I2C6 BIT(30)
90#define BIT_LPSS2_F7_I2C7 BIT(31)
91
92#define PMC_D3_STS_1 0xA4
93#define BIT_SMB BIT(0)
94#define BIT_OTG_SS_PHY BIT(1)
95#define BIT_USH_SS_PHY BIT(2)
96#define BIT_DFX BIT(3)
97
98/* PMC I/O Registers */
99#define ACPI_BASE_ADDR_OFFSET 0x40
100#define ACPI_BASE_ADDR_MASK 0xFFFFFE00
101#define ACPI_MMIO_REG_LEN 0x100
102
103#define PM1_CNT 0x4
104#define SLEEP_TYPE_MASK 0xFFFFECFF
105#define SLEEP_TYPE_S5 0x1C00
106#define SLEEP_ENABLE 0x2000
107#endif /* PMC_ATOM_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a4ea02351f4d..ee30b9f0b91c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -72,7 +72,6 @@ extern u16 __read_mostly tlb_lld_4k[NR_INFO];
72extern u16 __read_mostly tlb_lld_2m[NR_INFO]; 72extern u16 __read_mostly tlb_lld_2m[NR_INFO];
73extern u16 __read_mostly tlb_lld_4m[NR_INFO]; 73extern u16 __read_mostly tlb_lld_4m[NR_INFO];
74extern u16 __read_mostly tlb_lld_1g[NR_INFO]; 74extern u16 __read_mostly tlb_lld_1g[NR_INFO];
75extern s8 __read_mostly tlb_flushall_shift;
76 75
77/* 76/*
78 * CPU type and hardware bug flags. Kept separately for each CPU. 77 * CPU type and hardware bug flags. Kept separately for each CPU.
@@ -696,6 +695,8 @@ static inline void cpu_relax(void)
696 rep_nop(); 695 rep_nop();
697} 696}
698 697
698#define cpu_relax_lowlatency() cpu_relax()
699
699/* Stop speculative execution and prefetching of modified code. */ 700/* Stop speculative execution and prefetching of modified code. */
700static inline void sync_core(void) 701static inline void sync_core(void)
701{ 702{
diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h
index 70f46f07f94e..ae0e241e228b 100644
--- a/arch/x86/include/asm/qrwlock.h
+++ b/arch/x86/include/asm/qrwlock.h
@@ -3,7 +3,7 @@
3 3
4#include <asm-generic/qrwlock_types.h> 4#include <asm-generic/qrwlock_types.h>
5 5
6#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) 6#ifndef CONFIG_X86_PPRO_FENCE
7#define queue_write_unlock queue_write_unlock 7#define queue_write_unlock queue_write_unlock
8static inline void queue_write_unlock(struct qrwlock *lock) 8static inline void queue_write_unlock(struct qrwlock *lock)
9{ 9{
diff --git a/arch/x86/include/asm/vga.h b/arch/x86/include/asm/vga.h
index 44282fbf7bf9..c4b9dc2f67c5 100644
--- a/arch/x86/include/asm/vga.h
+++ b/arch/x86/include/asm/vga.h
@@ -17,10 +17,4 @@
17#define vga_readb(x) (*(x)) 17#define vga_readb(x) (*(x))
18#define vga_writeb(x, y) (*(y) = (x)) 18#define vga_writeb(x, y) (*(y) = (x))
19 19
20#ifdef CONFIG_FB_EFI
21#define __ARCH_HAS_VGA_DEFAULT_DEVICE
22extern struct pci_dev *vga_default_device(void);
23extern void vga_set_default_device(struct pci_dev *pdev);
24#endif
25
26#endif /* _ASM_X86_VGA_H */ 20#endif /* _ASM_X86_VGA_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 7004d21e6219..bcbfade26d8d 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -51,6 +51,9 @@
51#define CPU_BASED_MONITOR_EXITING 0x20000000 51#define CPU_BASED_MONITOR_EXITING 0x20000000
52#define CPU_BASED_PAUSE_EXITING 0x40000000 52#define CPU_BASED_PAUSE_EXITING 0x40000000
53#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 53#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
54
55#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172
56
54/* 57/*
55 * Definitions of Secondary Processor-Based VM-Execution Controls. 58 * Definitions of Secondary Processor-Based VM-Execution Controls.
56 */ 59 */
@@ -76,7 +79,7 @@
76 79
77#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 80#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016
78 81
79#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 82#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004
80#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 83#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
81#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 84#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000
82#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 85#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
@@ -89,7 +92,7 @@
89 92
90#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff 93#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
91 94
92#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 95#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004
93#define VM_ENTRY_IA32E_MODE 0x00000200 96#define VM_ENTRY_IA32E_MODE 0x00000200
94#define VM_ENTRY_SMM 0x00000400 97#define VM_ENTRY_SMM 0x00000400
95#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 98#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index 09409c44f9a5..3dec769cadf7 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -22,6 +22,7 @@ header-y += ipcbuf.h
22header-y += ist.h 22header-y += ist.h
23header-y += kvm.h 23header-y += kvm.h
24header-y += kvm_para.h 24header-y += kvm_para.h
25header-y += kvm_perf.h
25header-y += ldt.h 26header-y += ldt.h
26header-y += mce.h 27header-y += mce.h
27header-y += mman.h 28header-y += mman.h
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index d3a87780c70b..d7dcef58aefa 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -23,7 +23,10 @@
23#define GP_VECTOR 13 23#define GP_VECTOR 13
24#define PF_VECTOR 14 24#define PF_VECTOR 14
25#define MF_VECTOR 16 25#define MF_VECTOR 16
26#define AC_VECTOR 17
26#define MC_VECTOR 18 27#define MC_VECTOR 18
28#define XM_VECTOR 19
29#define VE_VECTOR 20
27 30
28/* Select x86 specific features in <linux/kvm.h> */ 31/* Select x86 specific features in <linux/kvm.h> */
29#define __KVM_HAVE_PIT 32#define __KVM_HAVE_PIT
diff --git a/arch/x86/include/uapi/asm/kvm_perf.h b/arch/x86/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000000000000..3bb964f88aa1
--- /dev/null
+++ b/arch/x86/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,16 @@
1#ifndef _ASM_X86_KVM_PERF_H
2#define _ASM_X86_KVM_PERF_H
3
4#include <asm/svm.h>
5#include <asm/vmx.h>
6#include <asm/kvm.h>
7
8#define DECODE_STR_LEN 20
9
10#define VCPU_ID "vcpu_id"
11
12#define KVM_ENTRY_TRACE "kvm:kvm_entry"
13#define KVM_EXIT_TRACE "kvm:kvm_exit"
14#define KVM_EXIT_REASON "exit_reason"
15
16#endif /* _ASM_X86_KVM_PERF_H */
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index fcf2b3ae1bf0..eac9e92fe181 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -297,6 +297,8 @@
297#define MSR_IA32_TSC_ADJUST 0x0000003b 297#define MSR_IA32_TSC_ADJUST 0x0000003b
298#define MSR_IA32_BNDCFGS 0x00000d90 298#define MSR_IA32_BNDCFGS 0x00000d90
299 299
300#define MSR_IA32_XSS 0x00000da0
301
300#define FEATURE_CONTROL_LOCKED (1<<0) 302#define FEATURE_CONTROL_LOCKED (1<<0)
301#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) 303#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
302#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) 304#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
@@ -558,6 +560,7 @@
558 560
559/* VMX_BASIC bits and bitmasks */ 561/* VMX_BASIC bits and bitmasks */
560#define VMX_BASIC_VMCS_SIZE_SHIFT 32 562#define VMX_BASIC_VMCS_SIZE_SHIFT 32
563#define VMX_BASIC_TRUE_CTLS (1ULL << 55)
561#define VMX_BASIC_64 0x0001000000000000LLU 564#define VMX_BASIC_64 0x0001000000000000LLU
562#define VMX_BASIC_MEM_TYPE_SHIFT 50 565#define VMX_BASIC_MEM_TYPE_SHIFT 50
563#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU 566#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 047f9ff2e36c..bde3993624f1 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -106,6 +106,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
106obj-$(CONFIG_PERF_EVENTS) += perf_regs.o 106obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
107obj-$(CONFIG_TRACING) += tracepoint.o 107obj-$(CONFIG_TRACING) += tracepoint.o
108obj-$(CONFIG_IOSF_MBI) += iosf_mbi.o 108obj-$(CONFIG_IOSF_MBI) += iosf_mbi.o
109obj-$(CONFIG_PMC_ATOM) += pmc_atom.o
109 110
110### 111###
111# 64 bit specific files 112# 64 bit specific files
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 86281ffb96d6..a531f6564ed0 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -74,10 +74,6 @@ int acpi_fix_pin2_polarity __initdata;
74static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; 74static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
75#endif 75#endif
76 76
77#ifndef __HAVE_ARCH_CMPXCHG
78#warning ACPI uses CMPXCHG, i486 and later hardware
79#endif
80
81/* -------------------------------------------------------------------------- 77/* --------------------------------------------------------------------------
82 Boot-time Configuration 78 Boot-time Configuration
83 -------------------------------------------------------------------------- */ 79 -------------------------------------------------------------------------- */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index ce8b8ff0e0ef..60e5497681f5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -8,6 +8,7 @@
8#include <asm/processor.h> 8#include <asm/processor.h>
9#include <asm/apic.h> 9#include <asm/apic.h>
10#include <asm/cpu.h> 10#include <asm/cpu.h>
11#include <asm/smp.h>
11#include <asm/pci-direct.h> 12#include <asm/pci-direct.h>
12 13
13#ifdef CONFIG_X86_64 14#ifdef CONFIG_X86_64
@@ -50,7 +51,6 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
50 return wrmsr_safe_regs(gprs); 51 return wrmsr_safe_regs(gprs);
51} 52}
52 53
53#ifdef CONFIG_X86_32
54/* 54/*
55 * B step AMD K6 before B 9730xxxx have hardware bugs that can cause 55 * B step AMD K6 before B 9730xxxx have hardware bugs that can cause
56 * misexecution of code under Linux. Owners of such processors should 56 * misexecution of code under Linux. Owners of such processors should
@@ -70,6 +70,7 @@ __asm__(".globl vide\n\t.align 4\nvide: ret");
70 70
71static void init_amd_k5(struct cpuinfo_x86 *c) 71static void init_amd_k5(struct cpuinfo_x86 *c)
72{ 72{
73#ifdef CONFIG_X86_32
73/* 74/*
74 * General Systems BIOSen alias the cpu frequency registers 75 * General Systems BIOSen alias the cpu frequency registers
75 * of the Elan at 0x000df000. Unfortuantly, one of the Linux 76 * of the Elan at 0x000df000. Unfortuantly, one of the Linux
@@ -83,11 +84,12 @@ static void init_amd_k5(struct cpuinfo_x86 *c)
83 if (inl(CBAR) & CBAR_ENB) 84 if (inl(CBAR) & CBAR_ENB)
84 outl(0 | CBAR_KEY, CBAR); 85 outl(0 | CBAR_KEY, CBAR);
85 } 86 }
87#endif
86} 88}
87 89
88
89static void init_amd_k6(struct cpuinfo_x86 *c) 90static void init_amd_k6(struct cpuinfo_x86 *c)
90{ 91{
92#ifdef CONFIG_X86_32
91 u32 l, h; 93 u32 l, h;
92 int mbytes = get_num_physpages() >> (20-PAGE_SHIFT); 94 int mbytes = get_num_physpages() >> (20-PAGE_SHIFT);
93 95
@@ -176,10 +178,44 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
176 /* placeholder for any needed mods */ 178 /* placeholder for any needed mods */
177 return; 179 return;
178 } 180 }
181#endif
179} 182}
180 183
181static void amd_k7_smp_check(struct cpuinfo_x86 *c) 184static void init_amd_k7(struct cpuinfo_x86 *c)
182{ 185{
186#ifdef CONFIG_X86_32
187 u32 l, h;
188
189 /*
190 * Bit 15 of Athlon specific MSR 15, needs to be 0
191 * to enable SSE on Palomino/Morgan/Barton CPU's.
192 * If the BIOS didn't enable it already, enable it here.
193 */
194 if (c->x86_model >= 6 && c->x86_model <= 10) {
195 if (!cpu_has(c, X86_FEATURE_XMM)) {
196 printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
197 msr_clear_bit(MSR_K7_HWCR, 15);
198 set_cpu_cap(c, X86_FEATURE_XMM);
199 }
200 }
201
202 /*
203 * It's been determined by AMD that Athlons since model 8 stepping 1
204 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
205 * As per AMD technical note 27212 0.2
206 */
207 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
208 rdmsr(MSR_K7_CLK_CTL, l, h);
209 if ((l & 0xfff00000) != 0x20000000) {
210 printk(KERN_INFO
211 "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
212 l, ((l & 0x000fffff)|0x20000000));
213 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
214 }
215 }
216
217 set_cpu_cap(c, X86_FEATURE_K7);
218
183 /* calling is from identify_secondary_cpu() ? */ 219 /* calling is from identify_secondary_cpu() ? */
184 if (!c->cpu_index) 220 if (!c->cpu_index)
185 return; 221 return;
@@ -207,7 +243,7 @@ static void amd_k7_smp_check(struct cpuinfo_x86 *c)
207 if (((c->x86_model == 6) && (c->x86_mask >= 2)) || 243 if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
208 ((c->x86_model == 7) && (c->x86_mask >= 1)) || 244 ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
209 (c->x86_model > 7)) 245 (c->x86_model > 7))
210 if (cpu_has_mp) 246 if (cpu_has(c, X86_FEATURE_MP))
211 return; 247 return;
212 248
213 /* If we get here, not a certified SMP capable AMD system. */ 249 /* If we get here, not a certified SMP capable AMD system. */
@@ -219,45 +255,8 @@ static void amd_k7_smp_check(struct cpuinfo_x86 *c)
219 WARN_ONCE(1, "WARNING: This combination of AMD" 255 WARN_ONCE(1, "WARNING: This combination of AMD"
220 " processors is not suitable for SMP.\n"); 256 " processors is not suitable for SMP.\n");
221 add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); 257 add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
222}
223
224static void init_amd_k7(struct cpuinfo_x86 *c)
225{
226 u32 l, h;
227
228 /*
229 * Bit 15 of Athlon specific MSR 15, needs to be 0
230 * to enable SSE on Palomino/Morgan/Barton CPU's.
231 * If the BIOS didn't enable it already, enable it here.
232 */
233 if (c->x86_model >= 6 && c->x86_model <= 10) {
234 if (!cpu_has(c, X86_FEATURE_XMM)) {
235 printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
236 msr_clear_bit(MSR_K7_HWCR, 15);
237 set_cpu_cap(c, X86_FEATURE_XMM);
238 }
239 }
240
241 /*
242 * It's been determined by AMD that Athlons since model 8 stepping 1
243 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
244 * As per AMD technical note 27212 0.2
245 */
246 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
247 rdmsr(MSR_K7_CLK_CTL, l, h);
248 if ((l & 0xfff00000) != 0x20000000) {
249 printk(KERN_INFO
250 "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
251 l, ((l & 0x000fffff)|0x20000000));
252 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
253 }
254 }
255
256 set_cpu_cap(c, X86_FEATURE_K7);
257
258 amd_k7_smp_check(c);
259}
260#endif 258#endif
259}
261 260
262#ifdef CONFIG_NUMA 261#ifdef CONFIG_NUMA
263/* 262/*
@@ -446,6 +445,26 @@ static void early_init_amd_mc(struct cpuinfo_x86 *c)
446 445
447static void bsp_init_amd(struct cpuinfo_x86 *c) 446static void bsp_init_amd(struct cpuinfo_x86 *c)
448{ 447{
448
449#ifdef CONFIG_X86_64
450 if (c->x86 >= 0xf) {
451 unsigned long long tseg;
452
453 /*
454 * Split up direct mapping around the TSEG SMM area.
455 * Don't do it for gbpages because there seems very little
456 * benefit in doing so.
457 */
458 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
459 unsigned long pfn = tseg >> PAGE_SHIFT;
460
461 printk(KERN_DEBUG "tseg: %010llx\n", tseg);
462 if (pfn_range_is_mapped(pfn, pfn + 1))
463 set_memory_4k((unsigned long)__va(tseg), 1);
464 }
465 }
466#endif
467
449 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { 468 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
450 469
451 if (c->x86 > 0x10 || 470 if (c->x86 > 0x10 ||
@@ -515,101 +534,74 @@ static const int amd_erratum_383[];
515static const int amd_erratum_400[]; 534static const int amd_erratum_400[];
516static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); 535static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
517 536
518static void init_amd(struct cpuinfo_x86 *c) 537static void init_amd_k8(struct cpuinfo_x86 *c)
519{ 538{
520 u32 dummy; 539 u32 level;
521 unsigned long long value; 540 u64 value;
522 541
523#ifdef CONFIG_SMP 542 /* On C+ stepping K8 rep microcode works well for copy/memset */
524 /* 543 level = cpuid_eax(1);
525 * Disable TLB flush filter by setting HWCR.FFDIS on K8 544 if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
526 * bit 6 of msr C001_0015 545 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
527 *
528 * Errata 63 for SH-B3 steppings
529 * Errata 122 for all steppings (F+ have it disabled by default)
530 */
531 if (c->x86 == 0xf)
532 msr_set_bit(MSR_K7_HWCR, 6);
533#endif
534
535 early_init_amd(c);
536 546
537 /* 547 /*
538 * Bit 31 in normal CPUID used for nonstandard 3DNow ID; 548 * Some BIOSes incorrectly force this feature, but only K8 revision D
539 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway 549 * (model = 0x14) and later actually support it.
550 * (AMD Erratum #110, docId: 25759).
540 */ 551 */
541 clear_cpu_cap(c, 0*32+31); 552 if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
542 553 clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
543#ifdef CONFIG_X86_64 554 if (!rdmsrl_amd_safe(0xc001100d, &value)) {
544 /* On C+ stepping K8 rep microcode works well for copy/memset */ 555 value &= ~BIT_64(32);
545 if (c->x86 == 0xf) { 556 wrmsrl_amd_safe(0xc001100d, value);
546 u32 level;
547
548 level = cpuid_eax(1);
549 if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
550 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
551
552 /*
553 * Some BIOSes incorrectly force this feature, but only K8
554 * revision D (model = 0x14) and later actually support it.
555 * (AMD Erratum #110, docId: 25759).
556 */
557 if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
558 clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
559 if (!rdmsrl_amd_safe(0xc001100d, &value)) {
560 value &= ~(1ULL << 32);
561 wrmsrl_amd_safe(0xc001100d, value);
562 }
563 } 557 }
564
565 } 558 }
566 if (c->x86 >= 0x10)
567 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
568 559
569 /* get apicid instead of initial apic id from cpuid */ 560 if (!c->x86_model_id[0])
570 c->apicid = hard_smp_processor_id(); 561 strcpy(c->x86_model_id, "Hammer");
571#else 562}
563
564static void init_amd_gh(struct cpuinfo_x86 *c)
565{
566#ifdef CONFIG_X86_64
567 /* do this for boot cpu */
568 if (c == &boot_cpu_data)
569 check_enable_amd_mmconf_dmi();
570
571 fam10h_check_enable_mmcfg();
572#endif
572 573
573 /* 574 /*
574 * FIXME: We should handle the K5 here. Set up the write 575 * Disable GART TLB Walk Errors on Fam10h. We do this here because this
575 * range and also turn on MSR 83 bits 4 and 31 (write alloc, 576 * is always needed when GART is enabled, even in a kernel which has no
576 * no bus pipeline) 577 * MCE support built in. BIOS should disable GartTlbWlk Errors already.
578 * If it doesn't, we do it here as suggested by the BKDG.
579 *
580 * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012
577 */ 581 */
582 msr_set_bit(MSR_AMD64_MCx_MASK(4), 10);
578 583
579 switch (c->x86) { 584 /*
580 case 4: 585 * On family 10h BIOS may not have properly enabled WC+ support, causing
581 init_amd_k5(c); 586 * it to be converted to CD memtype. This may result in performance
582 break; 587 * degradation for certain nested-paging guests. Prevent this conversion
583 case 5: 588 * by clearing bit 24 in MSR_AMD64_BU_CFG2.
584 init_amd_k6(c); 589 *
585 break; 590 * NOTE: we want to use the _safe accessors so as not to #GP kvm
586 case 6: /* An Athlon/Duron */ 591 * guests on older kvm hosts.
587 init_amd_k7(c); 592 */
588 break; 593 msr_clear_bit(MSR_AMD64_BU_CFG2, 24);
589 }
590 594
591 /* K6s reports MCEs but don't actually have all the MSRs */ 595 if (cpu_has_amd_erratum(c, amd_erratum_383))
592 if (c->x86 < 6) 596 set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
593 clear_cpu_cap(c, X86_FEATURE_MCE); 597}
594#endif
595 598
596 /* Enable workaround for FXSAVE leak */ 599static void init_amd_bd(struct cpuinfo_x86 *c)
597 if (c->x86 >= 6) 600{
598 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); 601 u64 value;
599
600 if (!c->x86_model_id[0]) {
601 switch (c->x86) {
602 case 0xf:
603 /* Should distinguish Models here, but this is only
604 a fallback anyways. */
605 strcpy(c->x86_model_id, "Hammer");
606 break;
607 }
608 }
609 602
610 /* re-enable TopologyExtensions if switched off by BIOS */ 603 /* re-enable TopologyExtensions if switched off by BIOS */
611 if ((c->x86 == 0x15) && 604 if ((c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
612 (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
613 !cpu_has(c, X86_FEATURE_TOPOEXT)) { 605 !cpu_has(c, X86_FEATURE_TOPOEXT)) {
614 606
615 if (msr_set_bit(0xc0011005, 54) > 0) { 607 if (msr_set_bit(0xc0011005, 54) > 0) {
@@ -625,14 +617,60 @@ static void init_amd(struct cpuinfo_x86 *c)
625 * The way access filter has a performance penalty on some workloads. 617 * The way access filter has a performance penalty on some workloads.
626 * Disable it on the affected CPUs. 618 * Disable it on the affected CPUs.
627 */ 619 */
628 if ((c->x86 == 0x15) && 620 if ((c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
629 (c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
630
631 if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) { 621 if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) {
632 value |= 0x1E; 622 value |= 0x1E;
633 wrmsrl_safe(0xc0011021, value); 623 wrmsrl_safe(0xc0011021, value);
634 } 624 }
635 } 625 }
626}
627
628static void init_amd(struct cpuinfo_x86 *c)
629{
630 u32 dummy;
631
632#ifdef CONFIG_SMP
633 /*
634 * Disable TLB flush filter by setting HWCR.FFDIS on K8
635 * bit 6 of msr C001_0015
636 *
637 * Errata 63 for SH-B3 steppings
638 * Errata 122 for all steppings (F+ have it disabled by default)
639 */
640 if (c->x86 == 0xf)
641 msr_set_bit(MSR_K7_HWCR, 6);
642#endif
643
644 early_init_amd(c);
645
646 /*
647 * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
648 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
649 */
650 clear_cpu_cap(c, 0*32+31);
651
652 if (c->x86 >= 0x10)
653 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
654
655 /* get apicid instead of initial apic id from cpuid */
656 c->apicid = hard_smp_processor_id();
657
658 /* K6s reports MCEs but don't actually have all the MSRs */
659 if (c->x86 < 6)
660 clear_cpu_cap(c, X86_FEATURE_MCE);
661
662 switch (c->x86) {
663 case 4: init_amd_k5(c); break;
664 case 5: init_amd_k6(c); break;
665 case 6: init_amd_k7(c); break;
666 case 0xf: init_amd_k8(c); break;
667 case 0x10: init_amd_gh(c); break;
668 case 0x15: init_amd_bd(c); break;
669 }
670
671 /* Enable workaround for FXSAVE leak */
672 if (c->x86 >= 6)
673 set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
636 674
637 cpu_detect_cache_sizes(c); 675 cpu_detect_cache_sizes(c);
638 676
@@ -656,33 +694,6 @@ static void init_amd(struct cpuinfo_x86 *c)
656 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); 694 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
657 } 695 }
658 696
659#ifdef CONFIG_X86_64
660 if (c->x86 == 0x10) {
661 /* do this for boot cpu */
662 if (c == &boot_cpu_data)
663 check_enable_amd_mmconf_dmi();
664
665 fam10h_check_enable_mmcfg();
666 }
667
668 if (c == &boot_cpu_data && c->x86 >= 0xf) {
669 unsigned long long tseg;
670
671 /*
672 * Split up direct mapping around the TSEG SMM area.
673 * Don't do it for gbpages because there seems very little
674 * benefit in doing so.
675 */
676 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
677 unsigned long pfn = tseg >> PAGE_SHIFT;
678
679 printk(KERN_DEBUG "tseg: %010llx\n", tseg);
680 if (pfn_range_is_mapped(pfn, pfn + 1))
681 set_memory_4k((unsigned long)__va(tseg), 1);
682 }
683 }
684#endif
685
686 /* 697 /*
687 * Family 0x12 and above processors have APIC timer 698 * Family 0x12 and above processors have APIC timer
688 * running in deep C states. 699 * running in deep C states.
@@ -690,34 +701,6 @@ static void init_amd(struct cpuinfo_x86 *c)
690 if (c->x86 > 0x11) 701 if (c->x86 > 0x11)
691 set_cpu_cap(c, X86_FEATURE_ARAT); 702 set_cpu_cap(c, X86_FEATURE_ARAT);
692 703
693 if (c->x86 == 0x10) {
694 /*
695 * Disable GART TLB Walk Errors on Fam10h. We do this here
696 * because this is always needed when GART is enabled, even in a
697 * kernel which has no MCE support built in.
698 * BIOS should disable GartTlbWlk Errors already. If
699 * it doesn't, do it here as suggested by the BKDG.
700 *
701 * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012
702 */
703 msr_set_bit(MSR_AMD64_MCx_MASK(4), 10);
704
705 /*
706 * On family 10h BIOS may not have properly enabled WC+ support,
707 * causing it to be converted to CD memtype. This may result in
708 * performance degradation for certain nested-paging guests.
709 * Prevent this conversion by clearing bit 24 in
710 * MSR_AMD64_BU_CFG2.
711 *
712 * NOTE: we want to use the _safe accessors so as not to #GP kvm
713 * guests on older kvm hosts.
714 */
715 msr_clear_bit(MSR_AMD64_BU_CFG2, 24);
716
717 if (cpu_has_amd_erratum(c, amd_erratum_383))
718 set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
719 }
720
721 if (cpu_has_amd_erratum(c, amd_erratum_400)) 704 if (cpu_has_amd_erratum(c, amd_erratum_400))
722 set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); 705 set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
723 706
@@ -741,11 +724,6 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
741} 724}
742#endif 725#endif
743 726
744static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
745{
746 tlb_flushall_shift = 6;
747}
748
749static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) 727static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
750{ 728{
751 u32 ebx, eax, ecx, edx; 729 u32 ebx, eax, ecx, edx;
@@ -793,8 +771,6 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
793 tlb_lli_2m[ENTRIES] = eax & mask; 771 tlb_lli_2m[ENTRIES] = eax & mask;
794 772
795 tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; 773 tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
796
797 cpu_set_tlb_flushall_shift(c);
798} 774}
799 775
800static const struct cpu_dev amd_cpu_dev = { 776static const struct cpu_dev amd_cpu_dev = {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ef1b93f18ed1..333fd5209336 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -481,26 +481,17 @@ u16 __read_mostly tlb_lld_2m[NR_INFO];
481u16 __read_mostly tlb_lld_4m[NR_INFO]; 481u16 __read_mostly tlb_lld_4m[NR_INFO];
482u16 __read_mostly tlb_lld_1g[NR_INFO]; 482u16 __read_mostly tlb_lld_1g[NR_INFO];
483 483
484/*
485 * tlb_flushall_shift shows the balance point in replacing cr3 write
486 * with multiple 'invlpg'. It will do this replacement when
487 * flush_tlb_lines <= active_lines/2^tlb_flushall_shift.
488 * If tlb_flushall_shift is -1, means the replacement will be disabled.
489 */
490s8 __read_mostly tlb_flushall_shift = -1;
491
492void cpu_detect_tlb(struct cpuinfo_x86 *c) 484void cpu_detect_tlb(struct cpuinfo_x86 *c)
493{ 485{
494 if (this_cpu->c_detect_tlb) 486 if (this_cpu->c_detect_tlb)
495 this_cpu->c_detect_tlb(c); 487 this_cpu->c_detect_tlb(c);
496 488
497 printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" 489 printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n"
498 "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n" 490 "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n",
499 "tlb_flushall_shift: %d\n",
500 tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], 491 tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
501 tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], 492 tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
502 tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], 493 tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
503 tlb_lld_1g[ENTRIES], tlb_flushall_shift); 494 tlb_lld_1g[ENTRIES]);
504} 495}
505 496
506void detect_ht(struct cpuinfo_x86 *c) 497void detect_ht(struct cpuinfo_x86 *c)
@@ -634,6 +625,15 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
634 c->x86_capability[9] = ebx; 625 c->x86_capability[9] = ebx;
635 } 626 }
636 627
628 /* Extended state features: level 0x0000000d */
629 if (c->cpuid_level >= 0x0000000d) {
630 u32 eax, ebx, ecx, edx;
631
632 cpuid_count(0x0000000d, 1, &eax, &ebx, &ecx, &edx);
633
634 c->x86_capability[10] = eax;
635 }
636
637 /* AMD-defined flags: level 0x80000001 */ 637 /* AMD-defined flags: level 0x80000001 */
638 xlvl = cpuid_eax(0x80000000); 638 xlvl = cpuid_eax(0x80000000);
639 c->extended_cpuid_level = xlvl; 639 c->extended_cpuid_level = xlvl;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index a80029035bf2..74e804ddc5c7 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -253,7 +253,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
253 */ 253 */
254 if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && 254 if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
255 (c->x86_mask < 0x6 || c->x86_mask == 0xb)) 255 (c->x86_mask < 0x6 || c->x86_mask == 0xb))
256 set_cpu_cap(c, X86_FEATURE_11AP); 256 set_cpu_bug(c, X86_BUG_11AP);
257 257
258 258
259#ifdef CONFIG_X86_INTEL_USERCOPY 259#ifdef CONFIG_X86_INTEL_USERCOPY
@@ -370,6 +370,17 @@ static void init_intel(struct cpuinfo_x86 *c)
370 */ 370 */
371 detect_extended_topology(c); 371 detect_extended_topology(c);
372 372
373 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
374 /*
375 * let's use the legacy cpuid vector 0x1 and 0x4 for topology
376 * detection.
377 */
378 c->x86_max_cores = intel_num_cpu_cores(c);
379#ifdef CONFIG_X86_32
380 detect_ht(c);
381#endif
382 }
383
373 l2 = init_intel_cacheinfo(c); 384 l2 = init_intel_cacheinfo(c);
374 if (c->cpuid_level > 9) { 385 if (c->cpuid_level > 9) {
375 unsigned eax = cpuid_eax(10); 386 unsigned eax = cpuid_eax(10);
@@ -391,7 +402,7 @@ static void init_intel(struct cpuinfo_x86 *c)
391 402
392 if (c->x86 == 6 && cpu_has_clflush && 403 if (c->x86 == 6 && cpu_has_clflush &&
393 (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) 404 (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
394 set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR); 405 set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
395 406
396#ifdef CONFIG_X86_64 407#ifdef CONFIG_X86_64
397 if (c->x86 == 15) 408 if (c->x86 == 15)
@@ -438,17 +449,6 @@ static void init_intel(struct cpuinfo_x86 *c)
438 set_cpu_cap(c, X86_FEATURE_P3); 449 set_cpu_cap(c, X86_FEATURE_P3);
439#endif 450#endif
440 451
441 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
442 /*
443 * let's use the legacy cpuid vector 0x1 and 0x4 for topology
444 * detection.
445 */
446 c->x86_max_cores = intel_num_cpu_cores(c);
447#ifdef CONFIG_X86_32
448 detect_ht(c);
449#endif
450 }
451
452 /* Work around errata */ 452 /* Work around errata */
453 srat_detect_node(c); 453 srat_detect_node(c);
454 454
@@ -634,31 +634,6 @@ static void intel_tlb_lookup(const unsigned char desc)
634 } 634 }
635} 635}
636 636
637static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
638{
639 switch ((c->x86 << 8) + c->x86_model) {
640 case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
641 case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
642 case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
643 case 0x61d: /* six-core 45 nm xeon "Dunnington" */
644 tlb_flushall_shift = -1;
645 break;
646 case 0x63a: /* Ivybridge */
647 tlb_flushall_shift = 2;
648 break;
649 case 0x61a: /* 45 nm nehalem, "Bloomfield" */
650 case 0x61e: /* 45 nm nehalem, "Lynnfield" */
651 case 0x625: /* 32 nm nehalem, "Clarkdale" */
652 case 0x62c: /* 32 nm nehalem, "Gulftown" */
653 case 0x62e: /* 45 nm nehalem-ex, "Beckton" */
654 case 0x62f: /* 32 nm Xeon E7 */
655 case 0x62a: /* SandyBridge */
656 case 0x62d: /* SandyBridge, "Romely-EP" */
657 default:
658 tlb_flushall_shift = 6;
659 }
660}
661
662static void intel_detect_tlb(struct cpuinfo_x86 *c) 637static void intel_detect_tlb(struct cpuinfo_x86 *c)
663{ 638{
664 int i, j, n; 639 int i, j, n;
@@ -683,7 +658,6 @@ static void intel_detect_tlb(struct cpuinfo_x86 *c)
683 for (j = 1 ; j < 16 ; j++) 658 for (j = 1 ; j < 16 ; j++)
684 intel_tlb_lookup(desc[j]); 659 intel_tlb_lookup(desc[j]);
685 } 660 }
686 intel_tlb_flushall_shift_set(c);
687} 661}
688 662
689static const struct cpu_dev intel_cpu_dev = { 663static const struct cpu_dev intel_cpu_dev = {
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index a952e9c85b6f..9c8f7394c612 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -730,6 +730,18 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
730#endif 730#endif
731 } 731 }
732 732
733#ifdef CONFIG_X86_HT
734 /*
735 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
736 * turns means that the only possibility is SMT (as indicated in
737 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
738 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
739 * c->phys_proc_id.
740 */
741 if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
742 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
743#endif
744
733 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); 745 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
734 746
735 return l2; 747 return l2;
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 2bf616505499..e2b22df964cd 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -1,23 +1,25 @@
1#!/bin/sh 1#!/bin/sh
2# 2#
3# Generate the x86_cap_flags[] array from include/asm/cpufeature.h 3# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h
4# 4#
5 5
6IN=$1 6IN=$1
7OUT=$2 7OUT=$2
8 8
9TABS="$(printf '\t\t\t\t\t')" 9function dump_array()
10trap 'rm "$OUT"' EXIT 10{
11 ARRAY=$1
12 SIZE=$2
13 PFX=$3
14 POSTFIX=$4
11 15
12( 16 PFX_SZ=$(echo $PFX | wc -c)
13 echo "#ifndef _ASM_X86_CPUFEATURE_H" 17 TABS="$(printf '\t\t\t\t\t')"
14 echo "#include <asm/cpufeature.h>" 18
15 echo "#endif" 19 echo "const char * const $ARRAY[$SIZE] = {"
16 echo ""
17 echo "const char * const x86_cap_flags[NCAPINTS*32] = {"
18 20
19 # Iterate through any input lines starting with #define X86_FEATURE_ 21 # Iterate through any input lines starting with #define $PFX
20 sed -n -e 's/\t/ /g' -e 's/^ *# *define *X86_FEATURE_//p' $IN | 22 sed -n -e 's/\t/ /g' -e "s/^ *# *define *$PFX//p" $IN |
21 while read i 23 while read i
22 do 24 do
23 # Name is everything up to the first whitespace 25 # Name is everything up to the first whitespace
@@ -31,11 +33,32 @@ trap 'rm "$OUT"' EXIT
31 # Name is uppercase, VALUE is all lowercase 33 # Name is uppercase, VALUE is all lowercase
32 VALUE="$(echo "$VALUE" | tr A-Z a-z)" 34 VALUE="$(echo "$VALUE" | tr A-Z a-z)"
33 35
34 TABCOUNT=$(( ( 5*8 - 14 - $(echo "$NAME" | wc -c) ) / 8 )) 36 if [ -n "$POSTFIX" ]; then
35 printf "\t[%s]%.*s = %s,\n" \ 37 T=$(( $PFX_SZ + $(echo $POSTFIX | wc -c) + 2 ))
36 "X86_FEATURE_$NAME" "$TABCOUNT" "$TABS" "$VALUE" 38 TABS="$(printf '\t\t\t\t\t\t')"
39 TABCOUNT=$(( ( 6*8 - ($T + 1) - $(echo "$NAME" | wc -c) ) / 8 ))
40 printf "\t[%s - %s]%.*s = %s,\n" "$PFX$NAME" "$POSTFIX" "$TABCOUNT" "$TABS" "$VALUE"
41 else
42 TABCOUNT=$(( ( 5*8 - ($PFX_SZ + 1) - $(echo "$NAME" | wc -c) ) / 8 ))
43 printf "\t[%s]%.*s = %s,\n" "$PFX$NAME" "$TABCOUNT" "$TABS" "$VALUE"
44 fi
37 done 45 done
38 echo "};" 46 echo "};"
47}
48
49trap 'rm "$OUT"' EXIT
50
51(
52 echo "#ifndef _ASM_X86_CPUFEATURE_H"
53 echo "#include <asm/cpufeature.h>"
54 echo "#endif"
55 echo ""
56
57 dump_array "x86_cap_flags" "NCAPINTS*32" "X86_FEATURE_" ""
58 echo ""
59
60 dump_array "x86_bug_flags" "NBUGINTS*32" "X86_BUG_" "NCAPINTS*32"
61
39) > $OUT 62) > $OUT
40 63
41trap - EXIT 64trap - EXIT
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2bdfbff8a4f6..2879ecdaac43 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
118 continue; 118 continue;
119 if (event->attr.config1 & ~er->valid_mask) 119 if (event->attr.config1 & ~er->valid_mask)
120 return -EINVAL; 120 return -EINVAL;
121 /* Check if the extra msrs can be safely accessed*/
122 if (!er->extra_msr_access)
123 return -ENXIO;
121 124
122 reg->idx = er->idx; 125 reg->idx = er->idx;
123 reg->config = event->attr.config1; 126 reg->config = event->attr.config1;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 3b2f9bdd974b..8ade93111e03 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -295,14 +295,16 @@ struct extra_reg {
295 u64 config_mask; 295 u64 config_mask;
296 u64 valid_mask; 296 u64 valid_mask;
297 int idx; /* per_xxx->regs[] reg index */ 297 int idx; /* per_xxx->regs[] reg index */
298 bool extra_msr_access;
298}; 299};
299 300
300#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ 301#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
301 .event = (e), \ 302 .event = (e), \
302 .msr = (ms), \ 303 .msr = (ms), \
303 .config_mask = (m), \ 304 .config_mask = (m), \
304 .valid_mask = (vm), \ 305 .valid_mask = (vm), \
305 .idx = EXTRA_REG_##i, \ 306 .idx = EXTRA_REG_##i, \
307 .extra_msr_access = true, \
306 } 308 }
307 309
308#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ 310#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
index 3bbdf4cd38b9..30790d798e6b 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -294,31 +294,41 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
294 cpu_to_node(cpu)); 294 cpu_to_node(cpu));
295} 295}
296 296
297static void amd_uncore_cpu_up_prepare(unsigned int cpu) 297static int amd_uncore_cpu_up_prepare(unsigned int cpu)
298{ 298{
299 struct amd_uncore *uncore; 299 struct amd_uncore *uncore_nb = NULL, *uncore_l2;
300 300
301 if (amd_uncore_nb) { 301 if (amd_uncore_nb) {
302 uncore = amd_uncore_alloc(cpu); 302 uncore_nb = amd_uncore_alloc(cpu);
303 uncore->cpu = cpu; 303 if (!uncore_nb)
304 uncore->num_counters = NUM_COUNTERS_NB; 304 goto fail;
305 uncore->rdpmc_base = RDPMC_BASE_NB; 305 uncore_nb->cpu = cpu;
306 uncore->msr_base = MSR_F15H_NB_PERF_CTL; 306 uncore_nb->num_counters = NUM_COUNTERS_NB;
307 uncore->active_mask = &amd_nb_active_mask; 307 uncore_nb->rdpmc_base = RDPMC_BASE_NB;
308 uncore->pmu = &amd_nb_pmu; 308 uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
309 *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; 309 uncore_nb->active_mask = &amd_nb_active_mask;
310 uncore_nb->pmu = &amd_nb_pmu;
311 *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
310 } 312 }
311 313
312 if (amd_uncore_l2) { 314 if (amd_uncore_l2) {
313 uncore = amd_uncore_alloc(cpu); 315 uncore_l2 = amd_uncore_alloc(cpu);
314 uncore->cpu = cpu; 316 if (!uncore_l2)
315 uncore->num_counters = NUM_COUNTERS_L2; 317 goto fail;
316 uncore->rdpmc_base = RDPMC_BASE_L2; 318 uncore_l2->cpu = cpu;
317 uncore->msr_base = MSR_F16H_L2I_PERF_CTL; 319 uncore_l2->num_counters = NUM_COUNTERS_L2;
318 uncore->active_mask = &amd_l2_active_mask; 320 uncore_l2->rdpmc_base = RDPMC_BASE_L2;
319 uncore->pmu = &amd_l2_pmu; 321 uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
320 *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; 322 uncore_l2->active_mask = &amd_l2_active_mask;
323 uncore_l2->pmu = &amd_l2_pmu;
324 *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
321 } 325 }
326
327 return 0;
328
329fail:
330 kfree(uncore_nb);
331 return -ENOMEM;
322} 332}
323 333
324static struct amd_uncore * 334static struct amd_uncore *
@@ -441,7 +451,7 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
441 451
442 if (!--uncore->refcnt) 452 if (!--uncore->refcnt)
443 kfree(uncore); 453 kfree(uncore);
444 *per_cpu_ptr(amd_uncore_nb, cpu) = NULL; 454 *per_cpu_ptr(uncores, cpu) = NULL;
445} 455}
446 456
447static void amd_uncore_cpu_dead(unsigned int cpu) 457static void amd_uncore_cpu_dead(unsigned int cpu)
@@ -461,7 +471,8 @@ amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
461 471
462 switch (action & ~CPU_TASKS_FROZEN) { 472 switch (action & ~CPU_TASKS_FROZEN) {
463 case CPU_UP_PREPARE: 473 case CPU_UP_PREPARE:
464 amd_uncore_cpu_up_prepare(cpu); 474 if (amd_uncore_cpu_up_prepare(cpu))
475 return notifier_from_errno(-ENOMEM);
465 break; 476 break;
466 477
467 case CPU_STARTING: 478 case CPU_STARTING:
@@ -501,20 +512,33 @@ static void __init init_cpu_already_online(void *dummy)
501 amd_uncore_cpu_online(cpu); 512 amd_uncore_cpu_online(cpu);
502} 513}
503 514
515static void cleanup_cpu_online(void *dummy)
516{
517 unsigned int cpu = smp_processor_id();
518
519 amd_uncore_cpu_dead(cpu);
520}
521
504static int __init amd_uncore_init(void) 522static int __init amd_uncore_init(void)
505{ 523{
506 unsigned int cpu; 524 unsigned int cpu, cpu2;
507 int ret = -ENODEV; 525 int ret = -ENODEV;
508 526
509 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 527 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
510 return -ENODEV; 528 goto fail_nodev;
511 529
512 if (!cpu_has_topoext) 530 if (!cpu_has_topoext)
513 return -ENODEV; 531 goto fail_nodev;
514 532
515 if (cpu_has_perfctr_nb) { 533 if (cpu_has_perfctr_nb) {
516 amd_uncore_nb = alloc_percpu(struct amd_uncore *); 534 amd_uncore_nb = alloc_percpu(struct amd_uncore *);
517 perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1); 535 if (!amd_uncore_nb) {
536 ret = -ENOMEM;
537 goto fail_nb;
538 }
539 ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
540 if (ret)
541 goto fail_nb;
518 542
519 printk(KERN_INFO "perf: AMD NB counters detected\n"); 543 printk(KERN_INFO "perf: AMD NB counters detected\n");
520 ret = 0; 544 ret = 0;
@@ -522,20 +546,28 @@ static int __init amd_uncore_init(void)
522 546
523 if (cpu_has_perfctr_l2) { 547 if (cpu_has_perfctr_l2) {
524 amd_uncore_l2 = alloc_percpu(struct amd_uncore *); 548 amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
525 perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1); 549 if (!amd_uncore_l2) {
550 ret = -ENOMEM;
551 goto fail_l2;
552 }
553 ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
554 if (ret)
555 goto fail_l2;
526 556
527 printk(KERN_INFO "perf: AMD L2I counters detected\n"); 557 printk(KERN_INFO "perf: AMD L2I counters detected\n");
528 ret = 0; 558 ret = 0;
529 } 559 }
530 560
531 if (ret) 561 if (ret)
532 return -ENODEV; 562 goto fail_nodev;
533 563
534 cpu_notifier_register_begin(); 564 cpu_notifier_register_begin();
535 565
536 /* init cpus already online before registering for hotplug notifier */ 566 /* init cpus already online before registering for hotplug notifier */
537 for_each_online_cpu(cpu) { 567 for_each_online_cpu(cpu) {
538 amd_uncore_cpu_up_prepare(cpu); 568 ret = amd_uncore_cpu_up_prepare(cpu);
569 if (ret)
570 goto fail_online;
539 smp_call_function_single(cpu, init_cpu_already_online, NULL, 1); 571 smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
540 } 572 }
541 573
@@ -543,5 +575,30 @@ static int __init amd_uncore_init(void)
543 cpu_notifier_register_done(); 575 cpu_notifier_register_done();
544 576
545 return 0; 577 return 0;
578
579
580fail_online:
581 for_each_online_cpu(cpu2) {
582 if (cpu2 == cpu)
583 break;
584 smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1);
585 }
586 cpu_notifier_register_done();
587
588 /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
589 amd_uncore_nb = amd_uncore_l2 = NULL;
590 if (cpu_has_perfctr_l2)
591 perf_pmu_unregister(&amd_l2_pmu);
592fail_l2:
593 if (cpu_has_perfctr_nb)
594 perf_pmu_unregister(&amd_nb_pmu);
595 if (amd_uncore_l2)
596 free_percpu(amd_uncore_l2);
597fail_nb:
598 if (amd_uncore_nb)
599 free_percpu(amd_uncore_nb);
600
601fail_nodev:
602 return ret;
546} 603}
547device_initcall(amd_uncore_init); 604device_initcall(amd_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 07846d738bdb..2502d0d9d246 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2182,6 +2182,41 @@ static void intel_snb_check_microcode(void)
2182 } 2182 }
2183} 2183}
2184 2184
2185/*
2186 * Under certain circumstances, access certain MSR may cause #GP.
2187 * The function tests if the input MSR can be safely accessed.
2188 */
2189static bool check_msr(unsigned long msr, u64 mask)
2190{
2191 u64 val_old, val_new, val_tmp;
2192
2193 /*
2194 * Read the current value, change it and read it back to see if it
2195 * matches, this is needed to detect certain hardware emulators
2196 * (qemu/kvm) that don't trap on the MSR access and always return 0s.
2197 */
2198 if (rdmsrl_safe(msr, &val_old))
2199 return false;
2200
2201 /*
2202 * Only change the bits which can be updated by wrmsrl.
2203 */
2204 val_tmp = val_old ^ mask;
2205 if (wrmsrl_safe(msr, val_tmp) ||
2206 rdmsrl_safe(msr, &val_new))
2207 return false;
2208
2209 if (val_new != val_tmp)
2210 return false;
2211
2212 /* Here it's sure that the MSR can be safely accessed.
2213 * Restore the old value and return.
2214 */
2215 wrmsrl(msr, val_old);
2216
2217 return true;
2218}
2219
2185static __init void intel_sandybridge_quirk(void) 2220static __init void intel_sandybridge_quirk(void)
2186{ 2221{
2187 x86_pmu.check_microcode = intel_snb_check_microcode; 2222 x86_pmu.check_microcode = intel_snb_check_microcode;
@@ -2271,7 +2306,8 @@ __init int intel_pmu_init(void)
2271 union cpuid10_ebx ebx; 2306 union cpuid10_ebx ebx;
2272 struct event_constraint *c; 2307 struct event_constraint *c;
2273 unsigned int unused; 2308 unsigned int unused;
2274 int version; 2309 struct extra_reg *er;
2310 int version, i;
2275 2311
2276 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 2312 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
2277 switch (boot_cpu_data.x86) { 2313 switch (boot_cpu_data.x86) {
@@ -2474,6 +2510,9 @@ __init int intel_pmu_init(void)
2474 case 62: /* IvyBridge EP */ 2510 case 62: /* IvyBridge EP */
2475 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 2511 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
2476 sizeof(hw_cache_event_ids)); 2512 sizeof(hw_cache_event_ids));
2513 /* dTLB-load-misses on IVB is different than SNB */
2514 hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */
2515
2477 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, 2516 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
2478 sizeof(hw_cache_extra_regs)); 2517 sizeof(hw_cache_extra_regs));
2479 2518
@@ -2574,6 +2613,34 @@ __init int intel_pmu_init(void)
2574 } 2613 }
2575 } 2614 }
2576 2615
2616 /*
2617 * Access LBR MSR may cause #GP under certain circumstances.
2618 * E.g. KVM doesn't support LBR MSR
2619 * Check all LBT MSR here.
2620 * Disable LBR access if any LBR MSRs can not be accessed.
2621 */
2622 if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
2623 x86_pmu.lbr_nr = 0;
2624 for (i = 0; i < x86_pmu.lbr_nr; i++) {
2625 if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
2626 check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
2627 x86_pmu.lbr_nr = 0;
2628 }
2629
2630 /*
2631 * Access extra MSR may cause #GP under certain circumstances.
2632 * E.g. KVM doesn't support offcore event
2633 * Check all extra_regs here.
2634 */
2635 if (x86_pmu.extra_regs) {
2636 for (er = x86_pmu.extra_regs; er->msr; er++) {
2637 er->extra_msr_access = check_msr(er->msr, 0x1ffUL);
2638 /* Disable LBR select mapping */
2639 if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
2640 x86_pmu.lbr_sel_map = NULL;
2641 }
2642 }
2643
2577 /* Support full width counters using alternative MSR range */ 2644 /* Support full width counters using alternative MSR range */
2578 if (x86_pmu.intel_cap.full_width_write) { 2645 if (x86_pmu.intel_cap.full_width_write) {
2579 x86_pmu.max_period = x86_pmu.cntval_mask; 2646 x86_pmu.max_period = x86_pmu.cntval_mask;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 980970cb744d..696ade311ded 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -311,9 +311,11 @@ static int alloc_bts_buffer(int cpu)
311 if (!x86_pmu.bts) 311 if (!x86_pmu.bts)
312 return 0; 312 return 0;
313 313
314 buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL, node); 314 buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
315 if (unlikely(!buffer)) 315 if (unlikely(!buffer)) {
316 WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
316 return -ENOMEM; 317 return -ENOMEM;
318 }
317 319
318 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; 320 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
319 thresh = max / 16; 321 thresh = max / 16;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 65bbbea38b9c..cfc6f9dfcd90 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -550,16 +550,16 @@ static struct extra_reg snbep_uncore_cbox_extra_regs[] = {
550 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), 550 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6),
551 SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), 551 SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8),
552 SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), 552 SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8),
553 SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xc), 553 SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa),
554 SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xc), 554 SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa),
555 SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2), 555 SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2),
556 SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2), 556 SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2),
557 SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2), 557 SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2),
558 SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2), 558 SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2),
559 SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8), 559 SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8),
560 SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8), 560 SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8),
561 SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xc), 561 SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa),
562 SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xc), 562 SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa),
563 SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2), 563 SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2),
564 SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2), 564 SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2),
565 SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2), 565 SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2),
@@ -1222,6 +1222,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = {
1222 SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, 1222 SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
1223 SNBEP_CBO_PMON_CTL_TID_EN, 0x1), 1223 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
1224 SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), 1224 SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2),
1225
1225 SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), 1226 SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
1226 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), 1227 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),
1227 SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc), 1228 SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc),
@@ -1245,7 +1246,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = {
1245 SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), 1246 SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
1246 SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), 1247 SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
1247 SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), 1248 SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
1248 SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), 1249 SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
1249 SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), 1250 SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
1250 SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), 1251 SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
1251 SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), 1252 SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
@@ -2946,10 +2947,7 @@ again:
2946 * extra registers. If we failed to take an extra 2947 * extra registers. If we failed to take an extra
2947 * register, try the alternative. 2948 * register, try the alternative.
2948 */ 2949 */
2949 if (idx % 2) 2950 idx ^= 1;
2950 idx--;
2951 else
2952 idx++;
2953 if (idx != reg1->idx % 6) { 2951 if (idx != reg1->idx % 6) {
2954 if (idx == 2) 2952 if (idx == 2)
2955 config1 >>= 8; 2953 config1 >>= 8;
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 06fe3ed8b851..5433658e598d 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -97,6 +97,14 @@ static int show_cpuinfo(struct seq_file *m, void *v)
97 if (cpu_has(c, i) && x86_cap_flags[i] != NULL) 97 if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
98 seq_printf(m, " %s", x86_cap_flags[i]); 98 seq_printf(m, " %s", x86_cap_flags[i]);
99 99
100 seq_printf(m, "\nbugs\t\t:");
101 for (i = 0; i < 32*NBUGINTS; i++) {
102 unsigned int bug_bit = 32*NCAPINTS + i;
103
104 if (cpu_has_bug(c, bug_bit) && x86_bug_flags[i])
105 seq_printf(m, " %s", x86_bug_flags[i]);
106 }
107
100 seq_printf(m, "\nbogomips\t: %lu.%02lu\n", 108 seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
101 c->loops_per_jiffy/(500000/HZ), 109 c->loops_per_jiffy/(500000/HZ),
102 (c->loops_per_jiffy/(5000/HZ)) % 100); 110 (c->loops_per_jiffy/(5000/HZ)) % 100);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index b6f794aa1693..4a8013d55947 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -38,7 +38,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
38 { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 }, 38 { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 },
39 { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, 39 { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
40 { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, 40 { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
41 { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 },
42 { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, 41 { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
43 { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, 42 { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
44 { X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 }, 43 { X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 },
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index dbaa23e78b36..47c410d99f5d 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -425,8 +425,8 @@ sysenter_do_call:
425 cmpl $(NR_syscalls), %eax 425 cmpl $(NR_syscalls), %eax
426 jae sysenter_badsys 426 jae sysenter_badsys
427 call *sys_call_table(,%eax,4) 427 call *sys_call_table(,%eax,4)
428 movl %eax,PT_EAX(%esp)
429sysenter_after_call: 428sysenter_after_call:
429 movl %eax,PT_EAX(%esp)
430 LOCKDEP_SYS_EXIT 430 LOCKDEP_SYS_EXIT
431 DISABLE_INTERRUPTS(CLBR_ANY) 431 DISABLE_INTERRUPTS(CLBR_ANY)
432 TRACE_IRQS_OFF 432 TRACE_IRQS_OFF
@@ -502,6 +502,7 @@ ENTRY(system_call)
502 jae syscall_badsys 502 jae syscall_badsys
503syscall_call: 503syscall_call:
504 call *sys_call_table(,%eax,4) 504 call *sys_call_table(,%eax,4)
505syscall_after_call:
505 movl %eax,PT_EAX(%esp) # store the return value 506 movl %eax,PT_EAX(%esp) # store the return value
506syscall_exit: 507syscall_exit:
507 LOCKDEP_SYS_EXIT 508 LOCKDEP_SYS_EXIT
@@ -675,12 +676,12 @@ syscall_fault:
675END(syscall_fault) 676END(syscall_fault)
676 677
677syscall_badsys: 678syscall_badsys:
678 movl $-ENOSYS,PT_EAX(%esp) 679 movl $-ENOSYS,%eax
679 jmp syscall_exit 680 jmp syscall_after_call
680END(syscall_badsys) 681END(syscall_badsys)
681 682
682sysenter_badsys: 683sysenter_badsys:
683 movl $-ENOSYS,PT_EAX(%esp) 684 movl $-ENOSYS,%eax
684 jmp sysenter_after_call 685 jmp sysenter_after_call
685END(syscall_badsys) 686END(syscall_badsys)
686 CFI_ENDPROC 687 CFI_ENDPROC
@@ -1058,9 +1059,6 @@ ENTRY(mcount)
1058END(mcount) 1059END(mcount)
1059 1060
1060ENTRY(ftrace_caller) 1061ENTRY(ftrace_caller)
1061 cmpl $0, function_trace_stop
1062 jne ftrace_stub
1063
1064 pushl %eax 1062 pushl %eax
1065 pushl %ecx 1063 pushl %ecx
1066 pushl %edx 1064 pushl %edx
@@ -1092,8 +1090,6 @@ END(ftrace_caller)
1092 1090
1093ENTRY(ftrace_regs_caller) 1091ENTRY(ftrace_regs_caller)
1094 pushf /* push flags before compare (in cs location) */ 1092 pushf /* push flags before compare (in cs location) */
1095 cmpl $0, function_trace_stop
1096 jne ftrace_restore_flags
1097 1093
1098 /* 1094 /*
1099 * i386 does not save SS and ESP when coming from kernel. 1095 * i386 does not save SS and ESP when coming from kernel.
@@ -1152,7 +1148,6 @@ GLOBAL(ftrace_regs_call)
1152 popf /* Pop flags at end (no addl to corrupt flags) */ 1148 popf /* Pop flags at end (no addl to corrupt flags) */
1153 jmp ftrace_ret 1149 jmp ftrace_ret
1154 1150
1155ftrace_restore_flags:
1156 popf 1151 popf
1157 jmp ftrace_stub 1152 jmp ftrace_stub
1158#else /* ! CONFIG_DYNAMIC_FTRACE */ 1153#else /* ! CONFIG_DYNAMIC_FTRACE */
@@ -1161,9 +1156,6 @@ ENTRY(mcount)
1161 cmpl $__PAGE_OFFSET, %esp 1156 cmpl $__PAGE_OFFSET, %esp
1162 jb ftrace_stub /* Paging not enabled yet? */ 1157 jb ftrace_stub /* Paging not enabled yet? */
1163 1158
1164 cmpl $0, function_trace_stop
1165 jne ftrace_stub
1166
1167 cmpl $ftrace_stub, ftrace_trace_function 1159 cmpl $ftrace_stub, ftrace_trace_function
1168 jnz trace 1160 jnz trace
1169#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1161#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b25ca969edd2..2fac1343a90b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -207,7 +207,6 @@ ENDPROC(native_usergs_sysret64)
207 */ 207 */
208 .macro XCPT_FRAME start=1 offset=0 208 .macro XCPT_FRAME start=1 offset=0
209 INTR_FRAME \start, RIP+\offset-ORIG_RAX 209 INTR_FRAME \start, RIP+\offset-ORIG_RAX
210 /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
211 .endm 210 .endm
212 211
213/* 212/*
@@ -287,21 +286,21 @@ ENDPROC(native_usergs_sysret64)
287ENTRY(save_paranoid) 286ENTRY(save_paranoid)
288 XCPT_FRAME 1 RDI+8 287 XCPT_FRAME 1 RDI+8
289 cld 288 cld
290 movq_cfi rdi, RDI+8 289 movq %rdi, RDI+8(%rsp)
291 movq_cfi rsi, RSI+8 290 movq %rsi, RSI+8(%rsp)
292 movq_cfi rdx, RDX+8 291 movq_cfi rdx, RDX+8
293 movq_cfi rcx, RCX+8 292 movq_cfi rcx, RCX+8
294 movq_cfi rax, RAX+8 293 movq_cfi rax, RAX+8
295 movq_cfi r8, R8+8 294 movq %r8, R8+8(%rsp)
296 movq_cfi r9, R9+8 295 movq %r9, R9+8(%rsp)
297 movq_cfi r10, R10+8 296 movq %r10, R10+8(%rsp)
298 movq_cfi r11, R11+8 297 movq %r11, R11+8(%rsp)
299 movq_cfi rbx, RBX+8 298 movq_cfi rbx, RBX+8
300 movq_cfi rbp, RBP+8 299 movq %rbp, RBP+8(%rsp)
301 movq_cfi r12, R12+8 300 movq %r12, R12+8(%rsp)
302 movq_cfi r13, R13+8 301 movq %r13, R13+8(%rsp)
303 movq_cfi r14, R14+8 302 movq %r14, R14+8(%rsp)
304 movq_cfi r15, R15+8 303 movq %r15, R15+8(%rsp)
305 movl $1,%ebx 304 movl $1,%ebx
306 movl $MSR_GS_BASE,%ecx 305 movl $MSR_GS_BASE,%ecx
307 rdmsr 306 rdmsr
@@ -830,27 +829,24 @@ restore_args:
830 RESTORE_ARGS 1,8,1 829 RESTORE_ARGS 1,8,1
831 830
832irq_return: 831irq_return:
832 INTERRUPT_RETURN
833
834ENTRY(native_iret)
833 /* 835 /*
834 * Are we returning to a stack segment from the LDT? Note: in 836 * Are we returning to a stack segment from the LDT? Note: in
835 * 64-bit mode SS:RSP on the exception stack is always valid. 837 * 64-bit mode SS:RSP on the exception stack is always valid.
836 */ 838 */
837#ifdef CONFIG_X86_ESPFIX64 839#ifdef CONFIG_X86_ESPFIX64
838 testb $4,(SS-RIP)(%rsp) 840 testb $4,(SS-RIP)(%rsp)
839 jnz irq_return_ldt 841 jnz native_irq_return_ldt
840#endif 842#endif
841 843
842irq_return_iret: 844native_irq_return_iret:
843 INTERRUPT_RETURN
844 _ASM_EXTABLE(irq_return_iret, bad_iret)
845
846#ifdef CONFIG_PARAVIRT
847ENTRY(native_iret)
848 iretq 845 iretq
849 _ASM_EXTABLE(native_iret, bad_iret) 846 _ASM_EXTABLE(native_irq_return_iret, bad_iret)
850#endif
851 847
852#ifdef CONFIG_X86_ESPFIX64 848#ifdef CONFIG_X86_ESPFIX64
853irq_return_ldt: 849native_irq_return_ldt:
854 pushq_cfi %rax 850 pushq_cfi %rax
855 pushq_cfi %rdi 851 pushq_cfi %rdi
856 SWAPGS 852 SWAPGS
@@ -872,7 +868,7 @@ irq_return_ldt:
872 SWAPGS 868 SWAPGS
873 movq %rax,%rsp 869 movq %rax,%rsp
874 popq_cfi %rax 870 popq_cfi %rax
875 jmp irq_return_iret 871 jmp native_irq_return_iret
876#endif 872#endif
877 873
878 .section .fixup,"ax" 874 .section .fixup,"ax"
@@ -956,13 +952,8 @@ __do_double_fault:
956 cmpl $__KERNEL_CS,CS(%rdi) 952 cmpl $__KERNEL_CS,CS(%rdi)
957 jne do_double_fault 953 jne do_double_fault
958 movq RIP(%rdi),%rax 954 movq RIP(%rdi),%rax
959 cmpq $irq_return_iret,%rax 955 cmpq $native_irq_return_iret,%rax
960#ifdef CONFIG_PARAVIRT
961 je 1f
962 cmpq $native_iret,%rax
963#endif
964 jne do_double_fault /* This shouldn't happen... */ 956 jne do_double_fault /* This shouldn't happen... */
9651:
966 movq PER_CPU_VAR(kernel_stack),%rax 957 movq PER_CPU_VAR(kernel_stack),%rax
967 subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ 958 subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */
968 movq %rax,RSP(%rdi) 959 movq %rax,RSP(%rdi)
@@ -1395,21 +1386,21 @@ ENTRY(error_entry)
1395 CFI_ADJUST_CFA_OFFSET 15*8 1386 CFI_ADJUST_CFA_OFFSET 15*8
1396 /* oldrax contains error code */ 1387 /* oldrax contains error code */
1397 cld 1388 cld
1398 movq_cfi rdi, RDI+8 1389 movq %rdi, RDI+8(%rsp)
1399 movq_cfi rsi, RSI+8 1390 movq %rsi, RSI+8(%rsp)
1400 movq_cfi rdx, RDX+8 1391 movq %rdx, RDX+8(%rsp)
1401 movq_cfi rcx, RCX+8 1392 movq %rcx, RCX+8(%rsp)
1402 movq_cfi rax, RAX+8 1393 movq %rax, RAX+8(%rsp)
1403 movq_cfi r8, R8+8 1394 movq %r8, R8+8(%rsp)
1404 movq_cfi r9, R9+8 1395 movq %r9, R9+8(%rsp)
1405 movq_cfi r10, R10+8 1396 movq %r10, R10+8(%rsp)
1406 movq_cfi r11, R11+8 1397 movq %r11, R11+8(%rsp)
1407 movq_cfi rbx, RBX+8 1398 movq_cfi rbx, RBX+8
1408 movq_cfi rbp, RBP+8 1399 movq %rbp, RBP+8(%rsp)
1409 movq_cfi r12, R12+8 1400 movq %r12, R12+8(%rsp)
1410 movq_cfi r13, R13+8 1401 movq %r13, R13+8(%rsp)
1411 movq_cfi r14, R14+8 1402 movq %r14, R14+8(%rsp)
1412 movq_cfi r15, R15+8 1403 movq %r15, R15+8(%rsp)
1413 xorl %ebx,%ebx 1404 xorl %ebx,%ebx
1414 testl $3,CS+8(%rsp) 1405 testl $3,CS+8(%rsp)
1415 je error_kernelspace 1406 je error_kernelspace
@@ -1427,8 +1418,9 @@ error_sti:
1427 * compat mode. Check for these here too. 1418 * compat mode. Check for these here too.
1428 */ 1419 */
1429error_kernelspace: 1420error_kernelspace:
1421 CFI_REL_OFFSET rcx, RCX+8
1430 incl %ebx 1422 incl %ebx
1431 leaq irq_return_iret(%rip),%rcx 1423 leaq native_irq_return_iret(%rip),%rcx
1432 cmpq %rcx,RIP+8(%rsp) 1424 cmpq %rcx,RIP+8(%rsp)
1433 je error_swapgs 1425 je error_swapgs
1434 movl %ecx,%eax /* zero extend */ 1426 movl %ecx,%eax /* zero extend */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index cbc4a91b131e..3386dc9aa333 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -703,6 +703,9 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
703 unsigned long return_hooker = (unsigned long) 703 unsigned long return_hooker = (unsigned long)
704 &return_to_handler; 704 &return_to_handler;
705 705
706 if (unlikely(ftrace_graph_is_dead()))
707 return;
708
706 if (unlikely(atomic_read(&current->tracing_graph_pause))) 709 if (unlikely(atomic_read(&current->tracing_graph_pause)))
707 return; 710 return;
708 711
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 7596df664901..67e6d19ef1be 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -574,6 +574,9 @@ int kprobe_int3_handler(struct pt_regs *regs)
574 struct kprobe *p; 574 struct kprobe *p;
575 struct kprobe_ctlblk *kcb; 575 struct kprobe_ctlblk *kcb;
576 576
577 if (user_mode_vm(regs))
578 return 0;
579
577 addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); 580 addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
578 /* 581 /*
579 * We don't want to be preempted for the entire 582 * We don't want to be preempted for the entire
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index c050a0153168..c73aecf10d34 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -46,10 +46,6 @@ END(function_hook)
46.endm 46.endm
47 47
48ENTRY(ftrace_caller) 48ENTRY(ftrace_caller)
49 /* Check if tracing was disabled (quick check) */
50 cmpl $0, function_trace_stop
51 jne ftrace_stub
52
53 ftrace_caller_setup 49 ftrace_caller_setup
54 /* regs go into 4th parameter (but make it NULL) */ 50 /* regs go into 4th parameter (but make it NULL) */
55 movq $0, %rcx 51 movq $0, %rcx
@@ -73,10 +69,6 @@ ENTRY(ftrace_regs_caller)
73 /* Save the current flags before compare (in SS location)*/ 69 /* Save the current flags before compare (in SS location)*/
74 pushfq 70 pushfq
75 71
76 /* Check if tracing was disabled (quick check) */
77 cmpl $0, function_trace_stop
78 jne ftrace_restore_flags
79
80 /* skip=8 to skip flags saved in SS */ 72 /* skip=8 to skip flags saved in SS */
81 ftrace_caller_setup 8 73 ftrace_caller_setup 8
82 74
@@ -131,7 +123,7 @@ GLOBAL(ftrace_regs_call)
131 popfq 123 popfq
132 124
133 jmp ftrace_return 125 jmp ftrace_return
134ftrace_restore_flags: 126
135 popfq 127 popfq
136 jmp ftrace_stub 128 jmp ftrace_stub
137 129
@@ -141,9 +133,6 @@ END(ftrace_regs_caller)
141#else /* ! CONFIG_DYNAMIC_FTRACE */ 133#else /* ! CONFIG_DYNAMIC_FTRACE */
142 134
143ENTRY(function_hook) 135ENTRY(function_hook)
144 cmpl $0, function_trace_stop
145 jne ftrace_stub
146
147 cmpq $ftrace_stub, ftrace_trace_function 136 cmpq $ftrace_stub, ftrace_trace_function
148 jnz trace 137 jnz trace
149 138
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 3f08f34f93eb..a1da6737ba5b 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
6DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); 6DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
7DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); 7DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
8DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); 8DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
9DEF_NATIVE(pv_cpu_ops, iret, "iretq");
10DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); 9DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
11DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); 10DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
12DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); 11DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
@@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
50 PATCH_SITE(pv_irq_ops, save_fl); 49 PATCH_SITE(pv_irq_ops, save_fl);
51 PATCH_SITE(pv_irq_ops, irq_enable); 50 PATCH_SITE(pv_irq_ops, irq_enable);
52 PATCH_SITE(pv_irq_ops, irq_disable); 51 PATCH_SITE(pv_irq_ops, irq_disable);
53 PATCH_SITE(pv_cpu_ops, iret);
54 PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); 52 PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
55 PATCH_SITE(pv_cpu_ops, usergs_sysret32); 53 PATCH_SITE(pv_cpu_ops, usergs_sysret32);
56 PATCH_SITE(pv_cpu_ops, usergs_sysret64); 54 PATCH_SITE(pv_cpu_ops, usergs_sysret64);
diff --git a/arch/x86/kernel/pmc_atom.c b/arch/x86/kernel/pmc_atom.c
new file mode 100644
index 000000000000..0c424a67985d
--- /dev/null
+++ b/arch/x86/kernel/pmc_atom.c
@@ -0,0 +1,321 @@
1/*
2 * Intel Atom SOC Power Management Controller Driver
3 * Copyright (c) 2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
17
18#include <linux/module.h>
19#include <linux/init.h>
20#include <linux/pci.h>
21#include <linux/device.h>
22#include <linux/debugfs.h>
23#include <linux/seq_file.h>
24#include <linux/io.h>
25
26#include <asm/pmc_atom.h>
27
28#define DRIVER_NAME KBUILD_MODNAME
29
30struct pmc_dev {
31 u32 base_addr;
32 void __iomem *regmap;
33#ifdef CONFIG_DEBUG_FS
34 struct dentry *dbgfs_dir;
35#endif /* CONFIG_DEBUG_FS */
36};
37
38static struct pmc_dev pmc_device;
39static u32 acpi_base_addr;
40
41struct pmc_dev_map {
42 const char *name;
43 u32 bit_mask;
44};
45
46static const struct pmc_dev_map dev_map[] = {
47 {"0 - LPSS1_F0_DMA", BIT_LPSS1_F0_DMA},
48 {"1 - LPSS1_F1_PWM1", BIT_LPSS1_F1_PWM1},
49 {"2 - LPSS1_F2_PWM2", BIT_LPSS1_F2_PWM2},
50 {"3 - LPSS1_F3_HSUART1", BIT_LPSS1_F3_HSUART1},
51 {"4 - LPSS1_F4_HSUART2", BIT_LPSS1_F4_HSUART2},
52 {"5 - LPSS1_F5_SPI", BIT_LPSS1_F5_SPI},
53 {"6 - LPSS1_F6_Reserved", BIT_LPSS1_F6_XXX},
54 {"7 - LPSS1_F7_Reserved", BIT_LPSS1_F7_XXX},
55 {"8 - SCC_EMMC", BIT_SCC_EMMC},
56 {"9 - SCC_SDIO", BIT_SCC_SDIO},
57 {"10 - SCC_SDCARD", BIT_SCC_SDCARD},
58 {"11 - SCC_MIPI", BIT_SCC_MIPI},
59 {"12 - HDA", BIT_HDA},
60 {"13 - LPE", BIT_LPE},
61 {"14 - OTG", BIT_OTG},
62 {"15 - USH", BIT_USH},
63 {"16 - GBE", BIT_GBE},
64 {"17 - SATA", BIT_SATA},
65 {"18 - USB_EHCI", BIT_USB_EHCI},
66 {"19 - SEC", BIT_SEC},
67 {"20 - PCIE_PORT0", BIT_PCIE_PORT0},
68 {"21 - PCIE_PORT1", BIT_PCIE_PORT1},
69 {"22 - PCIE_PORT2", BIT_PCIE_PORT2},
70 {"23 - PCIE_PORT3", BIT_PCIE_PORT3},
71 {"24 - LPSS2_F0_DMA", BIT_LPSS2_F0_DMA},
72 {"25 - LPSS2_F1_I2C1", BIT_LPSS2_F1_I2C1},
73 {"26 - LPSS2_F2_I2C2", BIT_LPSS2_F2_I2C2},
74 {"27 - LPSS2_F3_I2C3", BIT_LPSS2_F3_I2C3},
75 {"28 - LPSS2_F3_I2C4", BIT_LPSS2_F4_I2C4},
76 {"29 - LPSS2_F5_I2C5", BIT_LPSS2_F5_I2C5},
77 {"30 - LPSS2_F6_I2C6", BIT_LPSS2_F6_I2C6},
78 {"31 - LPSS2_F7_I2C7", BIT_LPSS2_F7_I2C7},
79 {"32 - SMB", BIT_SMB},
80 {"33 - OTG_SS_PHY", BIT_OTG_SS_PHY},
81 {"34 - USH_SS_PHY", BIT_USH_SS_PHY},
82 {"35 - DFX", BIT_DFX},
83};
84
85static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset)
86{
87 return readl(pmc->regmap + reg_offset);
88}
89
90static inline void pmc_reg_write(struct pmc_dev *pmc, int reg_offset, u32 val)
91{
92 writel(val, pmc->regmap + reg_offset);
93}
94
95static void pmc_power_off(void)
96{
97 u16 pm1_cnt_port;
98 u32 pm1_cnt_value;
99
100 pr_info("Preparing to enter system sleep state S5\n");
101
102 pm1_cnt_port = acpi_base_addr + PM1_CNT;
103
104 pm1_cnt_value = inl(pm1_cnt_port);
105 pm1_cnt_value &= SLEEP_TYPE_MASK;
106 pm1_cnt_value |= SLEEP_TYPE_S5;
107 pm1_cnt_value |= SLEEP_ENABLE;
108
109 outl(pm1_cnt_value, pm1_cnt_port);
110}
111
112static void pmc_hw_reg_setup(struct pmc_dev *pmc)
113{
114 /*
115 * Disable PMC S0IX_WAKE_EN events coming from:
116 * - LPC clock run
117 * - GPIO_SUS ored dedicated IRQs
118 * - GPIO_SCORE ored dedicated IRQs
119 * - GPIO_SUS shared IRQ
120 * - GPIO_SCORE shared IRQ
121 */
122 pmc_reg_write(pmc, PMC_S0IX_WAKE_EN, (u32)PMC_WAKE_EN_SETTING);
123}
124
125#ifdef CONFIG_DEBUG_FS
126static int pmc_dev_state_show(struct seq_file *s, void *unused)
127{
128 struct pmc_dev *pmc = s->private;
129 u32 func_dis, func_dis_2, func_dis_index;
130 u32 d3_sts_0, d3_sts_1, d3_sts_index;
131 int dev_num, dev_index, reg_index;
132
133 func_dis = pmc_reg_read(pmc, PMC_FUNC_DIS);
134 func_dis_2 = pmc_reg_read(pmc, PMC_FUNC_DIS_2);
135 d3_sts_0 = pmc_reg_read(pmc, PMC_D3_STS_0);
136 d3_sts_1 = pmc_reg_read(pmc, PMC_D3_STS_1);
137
138 dev_num = ARRAY_SIZE(dev_map);
139
140 for (dev_index = 0; dev_index < dev_num; dev_index++) {
141 reg_index = dev_index / PMC_REG_BIT_WIDTH;
142 if (reg_index) {
143 func_dis_index = func_dis_2;
144 d3_sts_index = d3_sts_1;
145 } else {
146 func_dis_index = func_dis;
147 d3_sts_index = d3_sts_0;
148 }
149
150 seq_printf(s, "Dev: %-32s\tState: %s [%s]\n",
151 dev_map[dev_index].name,
152 dev_map[dev_index].bit_mask & func_dis_index ?
153 "Disabled" : "Enabled ",
154 dev_map[dev_index].bit_mask & d3_sts_index ?
155 "D3" : "D0");
156 }
157 return 0;
158}
159
160static int pmc_dev_state_open(struct inode *inode, struct file *file)
161{
162 return single_open(file, pmc_dev_state_show, inode->i_private);
163}
164
165static const struct file_operations pmc_dev_state_ops = {
166 .open = pmc_dev_state_open,
167 .read = seq_read,
168 .llseek = seq_lseek,
169 .release = single_release,
170};
171
172static int pmc_sleep_tmr_show(struct seq_file *s, void *unused)
173{
174 struct pmc_dev *pmc = s->private;
175 u64 s0ir_tmr, s0i1_tmr, s0i2_tmr, s0i3_tmr, s0_tmr;
176
177 s0ir_tmr = (u64)pmc_reg_read(pmc, PMC_S0IR_TMR) << PMC_TMR_SHIFT;
178 s0i1_tmr = (u64)pmc_reg_read(pmc, PMC_S0I1_TMR) << PMC_TMR_SHIFT;
179 s0i2_tmr = (u64)pmc_reg_read(pmc, PMC_S0I2_TMR) << PMC_TMR_SHIFT;
180 s0i3_tmr = (u64)pmc_reg_read(pmc, PMC_S0I3_TMR) << PMC_TMR_SHIFT;
181 s0_tmr = (u64)pmc_reg_read(pmc, PMC_S0_TMR) << PMC_TMR_SHIFT;
182
183 seq_printf(s, "S0IR Residency:\t%lldus\n", s0ir_tmr);
184 seq_printf(s, "S0I1 Residency:\t%lldus\n", s0i1_tmr);
185 seq_printf(s, "S0I2 Residency:\t%lldus\n", s0i2_tmr);
186 seq_printf(s, "S0I3 Residency:\t%lldus\n", s0i3_tmr);
187 seq_printf(s, "S0 Residency:\t%lldus\n", s0_tmr);
188 return 0;
189}
190
191static int pmc_sleep_tmr_open(struct inode *inode, struct file *file)
192{
193 return single_open(file, pmc_sleep_tmr_show, inode->i_private);
194}
195
196static const struct file_operations pmc_sleep_tmr_ops = {
197 .open = pmc_sleep_tmr_open,
198 .read = seq_read,
199 .llseek = seq_lseek,
200 .release = single_release,
201};
202
203static void pmc_dbgfs_unregister(struct pmc_dev *pmc)
204{
205 if (!pmc->dbgfs_dir)
206 return;
207
208 debugfs_remove_recursive(pmc->dbgfs_dir);
209 pmc->dbgfs_dir = NULL;
210}
211
212static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev)
213{
214 struct dentry *dir, *f;
215
216 dir = debugfs_create_dir("pmc_atom", NULL);
217 if (!dir)
218 return -ENOMEM;
219
220 f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO,
221 dir, pmc, &pmc_dev_state_ops);
222 if (!f) {
223 dev_err(&pdev->dev, "dev_states register failed\n");
224 goto err;
225 }
226 f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO,
227 dir, pmc, &pmc_sleep_tmr_ops);
228 if (!f) {
229 dev_err(&pdev->dev, "sleep_state register failed\n");
230 goto err;
231 }
232 pmc->dbgfs_dir = dir;
233 return 0;
234err:
235 pmc_dbgfs_unregister(pmc);
236 return -ENODEV;
237}
238#endif /* CONFIG_DEBUG_FS */
239
240static int pmc_setup_dev(struct pci_dev *pdev)
241{
242 struct pmc_dev *pmc = &pmc_device;
243 int ret;
244
245 /* Obtain ACPI base address */
246 pci_read_config_dword(pdev, ACPI_BASE_ADDR_OFFSET, &acpi_base_addr);
247 acpi_base_addr &= ACPI_BASE_ADDR_MASK;
248
249 /* Install power off function */
250 if (acpi_base_addr != 0 && pm_power_off == NULL)
251 pm_power_off = pmc_power_off;
252
253 pci_read_config_dword(pdev, PMC_BASE_ADDR_OFFSET, &pmc->base_addr);
254 pmc->base_addr &= PMC_BASE_ADDR_MASK;
255
256 pmc->regmap = ioremap_nocache(pmc->base_addr, PMC_MMIO_REG_LEN);
257 if (!pmc->regmap) {
258 dev_err(&pdev->dev, "error: ioremap failed\n");
259 return -ENOMEM;
260 }
261
262 /* PMC hardware registers setup */
263 pmc_hw_reg_setup(pmc);
264
265#ifdef CONFIG_DEBUG_FS
266 ret = pmc_dbgfs_register(pmc, pdev);
267 if (ret) {
268 iounmap(pmc->regmap);
269 return ret;
270 }
271#endif /* CONFIG_DEBUG_FS */
272 return 0;
273}
274
275/*
276 * Data for PCI driver interface
277 *
278 * This data only exists for exporting the supported
279 * PCI ids via MODULE_DEVICE_TABLE. We do not actually
280 * register a pci_driver, because lpc_ich will register
281 * a driver on the same PCI id.
282 */
283static const struct pci_device_id pmc_pci_ids[] = {
284 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_VLV_PMC) },
285 { 0, },
286};
287
288MODULE_DEVICE_TABLE(pci, pmc_pci_ids);
289
290static int __init pmc_atom_init(void)
291{
292 int err = -ENODEV;
293 struct pci_dev *pdev = NULL;
294 const struct pci_device_id *ent;
295
296 /* We look for our device - PCU PMC
297 * we assume that there is max. one device.
298 *
299 * We can't use plain pci_driver mechanism,
300 * as the device is really a multiple function device,
301 * main driver that binds to the pci_device is lpc_ich
302 * and have to find & bind to the device this way.
303 */
304 for_each_pci_dev(pdev) {
305 ent = pci_match_id(pmc_pci_ids, pdev);
306 if (ent) {
307 err = pmc_setup_dev(pdev);
308 goto out;
309 }
310 }
311 /* Device not found. */
312out:
313 return err;
314}
315
316module_init(pmc_atom_init);
317/* no module_exit, this driver shouldn't be unloaded */
318
319MODULE_AUTHOR("Aubrey Li <aubrey.li@linux.intel.com>");
320MODULE_DESCRIPTION("Intel Atom SOC Power Management Controller Interface");
321MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 52b1157c53eb..17962e667a91 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -28,6 +28,7 @@
28#include <linux/mc146818rtc.h> 28#include <linux/mc146818rtc.h>
29#include <asm/realmode.h> 29#include <asm/realmode.h>
30#include <asm/x86_init.h> 30#include <asm/x86_init.h>
31#include <asm/efi.h>
31 32
32/* 33/*
33 * Power off function, if any 34 * Power off function, if any
@@ -401,12 +402,25 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
401 402
402static int __init reboot_init(void) 403static int __init reboot_init(void)
403{ 404{
405 int rv;
406
404 /* 407 /*
405 * Only do the DMI check if reboot_type hasn't been overridden 408 * Only do the DMI check if reboot_type hasn't been overridden
406 * on the command line 409 * on the command line
407 */ 410 */
408 if (reboot_default) 411 if (!reboot_default)
409 dmi_check_system(reboot_dmi_table); 412 return 0;
413
414 /*
415 * The DMI quirks table takes precedence. If no quirks entry
416 * matches and the ACPI Hardware Reduced bit is set, force EFI
417 * reboot.
418 */
419 rv = dmi_check_system(reboot_dmi_table);
420
421 if (!rv && efi_reboot_required())
422 reboot_type = BOOT_EFI;
423
410 return 0; 424 return 0;
411} 425}
412core_initcall(reboot_init); 426core_initcall(reboot_init);
@@ -528,11 +542,7 @@ static void native_machine_emergency_restart(void)
528 break; 542 break;
529 543
530 case BOOT_EFI: 544 case BOOT_EFI:
531 if (efi_enabled(EFI_RUNTIME_SERVICES)) 545 efi_reboot(reboot_mode, NULL);
532 efi.reset_system(reboot_mode == REBOOT_WARM ?
533 EFI_RESET_WARM :
534 EFI_RESET_COLD,
535 EFI_SUCCESS, 0, NULL);
536 reboot_type = BOOT_BIOS; 546 reboot_type = BOOT_BIOS;
537 break; 547 break;
538 548
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
index 2a26819bb6a8..80eab01c1a68 100644
--- a/arch/x86/kernel/resource.c
+++ b/arch/x86/kernel/resource.c
@@ -37,10 +37,12 @@ static void remove_e820_regions(struct resource *avail)
37 37
38void arch_remove_reservations(struct resource *avail) 38void arch_remove_reservations(struct resource *avail)
39{ 39{
40 /* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */ 40 /*
41 * Trim out BIOS area (high 2MB) and E820 regions. We do not remove
42 * the low 1MB unconditionally, as this area is needed for some ISA
43 * cards requiring a memory range, e.g. the i82365 PCMCIA controller.
44 */
41 if (avail->flags & IORESOURCE_MEM) { 45 if (avail->flags & IORESOURCE_MEM) {
42 if (avail->start < BIOS_END)
43 avail->start = BIOS_END;
44 resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END); 46 resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END);
45 47
46 remove_e820_regions(avail); 48 remove_e820_regions(avail);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 78a0e6298922..41ead8d3bc0b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -924,10 +924,10 @@ void __init setup_arch(char **cmdline_p)
924#endif 924#endif
925#ifdef CONFIG_EFI 925#ifdef CONFIG_EFI
926 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, 926 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
927 "EL32", 4)) { 927 EFI32_LOADER_SIGNATURE, 4)) {
928 set_bit(EFI_BOOT, &efi.flags); 928 set_bit(EFI_BOOT, &efi.flags);
929 } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, 929 } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
930 "EL64", 4)) { 930 EFI64_LOADER_SIGNATURE, 4)) {
931 set_bit(EFI_BOOT, &efi.flags); 931 set_bit(EFI_BOOT, &efi.flags);
932 set_bit(EFI_64BIT, &efi.flags); 932 set_bit(EFI_64BIT, &efi.flags);
933 } 933 }
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index ea030319b321..56b0c338061e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -234,9 +234,6 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
234 return ns; 234 return ns;
235} 235}
236 236
237/* XXX surely we already have this someplace in the kernel?! */
238#define DIV_ROUND(n, d) (((n) + ((d) / 2)) / (d))
239
240static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) 237static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
241{ 238{
242 unsigned long long tsc_now, ns_now; 239 unsigned long long tsc_now, ns_now;
@@ -259,7 +256,9 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
259 * time function is continuous; see the comment near struct 256 * time function is continuous; see the comment near struct
260 * cyc2ns_data. 257 * cyc2ns_data.
261 */ 258 */
262 data->cyc2ns_mul = DIV_ROUND(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, cpu_khz); 259 data->cyc2ns_mul =
260 DIV_ROUND_CLOSEST(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR,
261 cpu_khz);
263 data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; 262 data->cyc2ns_shift = CYC2NS_SCALE_FACTOR;
264 data->cyc2ns_offset = ns_now - 263 data->cyc2ns_offset = ns_now -
265 mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); 264 mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index f9087315e0cd..a5380590ab0e 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -95,4 +95,12 @@ static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu)
95 best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 95 best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
96 return best && (best->edx & bit(X86_FEATURE_GBPAGES)); 96 return best && (best->edx & bit(X86_FEATURE_GBPAGES));
97} 97}
98
99static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
100{
101 struct kvm_cpuid_entry2 *best;
102
103 best = kvm_find_cpuid_entry(vcpu, 7, 0);
104 return best && (best->ebx & bit(X86_FEATURE_RTM));
105}
98#endif 106#endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e4e833d3d7d7..56657b0bb3bb 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -162,6 +162,10 @@
162#define NoWrite ((u64)1 << 45) /* No writeback */ 162#define NoWrite ((u64)1 << 45) /* No writeback */
163#define SrcWrite ((u64)1 << 46) /* Write back src operand */ 163#define SrcWrite ((u64)1 << 46) /* Write back src operand */
164#define NoMod ((u64)1 << 47) /* Mod field is ignored */ 164#define NoMod ((u64)1 << 47) /* Mod field is ignored */
165#define Intercept ((u64)1 << 48) /* Has valid intercept field */
166#define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
167#define NoBigReal ((u64)1 << 50) /* No big real mode */
168#define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
165 169
166#define DstXacc (DstAccLo | SrcAccHi | SrcWrite) 170#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
167 171
@@ -426,6 +430,7 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
426 .modrm_reg = ctxt->modrm_reg, 430 .modrm_reg = ctxt->modrm_reg,
427 .modrm_rm = ctxt->modrm_rm, 431 .modrm_rm = ctxt->modrm_rm,
428 .src_val = ctxt->src.val64, 432 .src_val = ctxt->src.val64,
433 .dst_val = ctxt->dst.val64,
429 .src_bytes = ctxt->src.bytes, 434 .src_bytes = ctxt->src.bytes,
430 .dst_bytes = ctxt->dst.bytes, 435 .dst_bytes = ctxt->dst.bytes,
431 .ad_bytes = ctxt->ad_bytes, 436 .ad_bytes = ctxt->ad_bytes,
@@ -511,12 +516,6 @@ static u32 desc_limit_scaled(struct desc_struct *desc)
511 return desc->g ? (limit << 12) | 0xfff : limit; 516 return desc->g ? (limit << 12) | 0xfff : limit;
512} 517}
513 518
514static void set_seg_override(struct x86_emulate_ctxt *ctxt, int seg)
515{
516 ctxt->has_seg_override = true;
517 ctxt->seg_override = seg;
518}
519
520static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) 519static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
521{ 520{
522 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) 521 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
@@ -525,14 +524,6 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
525 return ctxt->ops->get_cached_segment_base(ctxt, seg); 524 return ctxt->ops->get_cached_segment_base(ctxt, seg);
526} 525}
527 526
528static unsigned seg_override(struct x86_emulate_ctxt *ctxt)
529{
530 if (!ctxt->has_seg_override)
531 return 0;
532
533 return ctxt->seg_override;
534}
535
536static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, 527static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
537 u32 error, bool valid) 528 u32 error, bool valid)
538{ 529{
@@ -651,7 +642,12 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
651 if (!fetch && (desc.type & 8) && !(desc.type & 2)) 642 if (!fetch && (desc.type & 8) && !(desc.type & 2))
652 goto bad; 643 goto bad;
653 lim = desc_limit_scaled(&desc); 644 lim = desc_limit_scaled(&desc);
654 if ((desc.type & 8) || !(desc.type & 4)) { 645 if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch &&
646 (ctxt->d & NoBigReal)) {
647 /* la is between zero and 0xffff */
648 if (la > 0xffff || (u32)(la + size - 1) > 0xffff)
649 goto bad;
650 } else if ((desc.type & 8) || !(desc.type & 4)) {
655 /* expand-up segment */ 651 /* expand-up segment */
656 if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) 652 if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
657 goto bad; 653 goto bad;
@@ -716,68 +712,71 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
716} 712}
717 713
718/* 714/*
719 * Fetch the next byte of the instruction being emulated which is pointed to 715 * Prefetch the remaining bytes of the instruction without crossing page
720 * by ctxt->_eip, then increment ctxt->_eip.
721 *
722 * Also prefetch the remaining bytes of the instruction without crossing page
723 * boundary if they are not in fetch_cache yet. 716 * boundary if they are not in fetch_cache yet.
724 */ 717 */
725static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, u8 *dest) 718static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
726{ 719{
727 struct fetch_cache *fc = &ctxt->fetch;
728 int rc; 720 int rc;
729 int size, cur_size; 721 unsigned size;
730 722 unsigned long linear;
731 if (ctxt->_eip == fc->end) { 723 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
732 unsigned long linear; 724 struct segmented_address addr = { .seg = VCPU_SREG_CS,
733 struct segmented_address addr = { .seg = VCPU_SREG_CS, 725 .ea = ctxt->eip + cur_size };
734 .ea = ctxt->_eip }; 726
735 cur_size = fc->end - fc->start; 727 size = 15UL ^ cur_size;
736 size = min(15UL - cur_size, 728 rc = __linearize(ctxt, addr, size, false, true, &linear);
737 PAGE_SIZE - offset_in_page(ctxt->_eip)); 729 if (unlikely(rc != X86EMUL_CONTINUE))
738 rc = __linearize(ctxt, addr, size, false, true, &linear); 730 return rc;
739 if (unlikely(rc != X86EMUL_CONTINUE))
740 return rc;
741 rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size,
742 size, &ctxt->exception);
743 if (unlikely(rc != X86EMUL_CONTINUE))
744 return rc;
745 fc->end += size;
746 }
747 *dest = fc->data[ctxt->_eip - fc->start];
748 ctxt->_eip++;
749 return X86EMUL_CONTINUE;
750}
751 731
752static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, 732 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
753 void *dest, unsigned size)
754{
755 int rc;
756 733
757 /* x86 instructions are limited to 15 bytes. */ 734 /*
758 if (unlikely(ctxt->_eip + size - ctxt->eip > 15)) 735 * One instruction can only straddle two pages,
736 * and one has been loaded at the beginning of
737 * x86_decode_insn. So, if not enough bytes
738 * still, we must have hit the 15-byte boundary.
739 */
740 if (unlikely(size < op_size))
759 return X86EMUL_UNHANDLEABLE; 741 return X86EMUL_UNHANDLEABLE;
760 while (size--) { 742 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
761 rc = do_insn_fetch_byte(ctxt, dest++); 743 size, &ctxt->exception);
762 if (rc != X86EMUL_CONTINUE) 744 if (unlikely(rc != X86EMUL_CONTINUE))
763 return rc; 745 return rc;
764 } 746 ctxt->fetch.end += size;
765 return X86EMUL_CONTINUE; 747 return X86EMUL_CONTINUE;
766} 748}
767 749
750static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
751 unsigned size)
752{
753 if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size))
754 return __do_insn_fetch_bytes(ctxt, size);
755 else
756 return X86EMUL_CONTINUE;
757}
758
768/* Fetch next part of the instruction being emulated. */ 759/* Fetch next part of the instruction being emulated. */
769#define insn_fetch(_type, _ctxt) \ 760#define insn_fetch(_type, _ctxt) \
770({ unsigned long _x; \ 761({ _type _x; \
771 rc = do_insn_fetch(_ctxt, &_x, sizeof(_type)); \ 762 \
763 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
772 if (rc != X86EMUL_CONTINUE) \ 764 if (rc != X86EMUL_CONTINUE) \
773 goto done; \ 765 goto done; \
774 (_type)_x; \ 766 ctxt->_eip += sizeof(_type); \
767 _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \
768 ctxt->fetch.ptr += sizeof(_type); \
769 _x; \
775}) 770})
776 771
777#define insn_fetch_arr(_arr, _size, _ctxt) \ 772#define insn_fetch_arr(_arr, _size, _ctxt) \
778({ rc = do_insn_fetch(_ctxt, _arr, (_size)); \ 773({ \
774 rc = do_insn_fetch_bytes(_ctxt, _size); \
779 if (rc != X86EMUL_CONTINUE) \ 775 if (rc != X86EMUL_CONTINUE) \
780 goto done; \ 776 goto done; \
777 ctxt->_eip += (_size); \
778 memcpy(_arr, ctxt->fetch.ptr, _size); \
779 ctxt->fetch.ptr += (_size); \
781}) 780})
782 781
783/* 782/*
@@ -1063,19 +1062,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1063 struct operand *op) 1062 struct operand *op)
1064{ 1063{
1065 u8 sib; 1064 u8 sib;
1066 int index_reg = 0, base_reg = 0, scale; 1065 int index_reg, base_reg, scale;
1067 int rc = X86EMUL_CONTINUE; 1066 int rc = X86EMUL_CONTINUE;
1068 ulong modrm_ea = 0; 1067 ulong modrm_ea = 0;
1069 1068
1070 if (ctxt->rex_prefix) { 1069 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1071 ctxt->modrm_reg = (ctxt->rex_prefix & 4) << 1; /* REX.R */ 1070 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1072 index_reg = (ctxt->rex_prefix & 2) << 2; /* REX.X */ 1071 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1073 ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */
1074 }
1075 1072
1076 ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6; 1073 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1077 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; 1074 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1078 ctxt->modrm_rm |= (ctxt->modrm & 0x07); 1075 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1079 ctxt->modrm_seg = VCPU_SREG_DS; 1076 ctxt->modrm_seg = VCPU_SREG_DS;
1080 1077
1081 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) { 1078 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
@@ -1093,7 +1090,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1093 if (ctxt->d & Mmx) { 1090 if (ctxt->d & Mmx) {
1094 op->type = OP_MM; 1091 op->type = OP_MM;
1095 op->bytes = 8; 1092 op->bytes = 8;
1096 op->addr.xmm = ctxt->modrm_rm & 7; 1093 op->addr.mm = ctxt->modrm_rm & 7;
1097 return rc; 1094 return rc;
1098 } 1095 }
1099 fetch_register_operand(op); 1096 fetch_register_operand(op);
@@ -1190,6 +1187,9 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1190 } 1187 }
1191 } 1188 }
1192 op->addr.mem.ea = modrm_ea; 1189 op->addr.mem.ea = modrm_ea;
1190 if (ctxt->ad_bytes != 8)
1191 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1192
1193done: 1193done:
1194 return rc; 1194 return rc;
1195} 1195}
@@ -1220,12 +1220,14 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1220 long sv = 0, mask; 1220 long sv = 0, mask;
1221 1221
1222 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) { 1222 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1223 mask = ~(ctxt->dst.bytes * 8 - 1); 1223 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1224 1224
1225 if (ctxt->src.bytes == 2) 1225 if (ctxt->src.bytes == 2)
1226 sv = (s16)ctxt->src.val & (s16)mask; 1226 sv = (s16)ctxt->src.val & (s16)mask;
1227 else if (ctxt->src.bytes == 4) 1227 else if (ctxt->src.bytes == 4)
1228 sv = (s32)ctxt->src.val & (s32)mask; 1228 sv = (s32)ctxt->src.val & (s32)mask;
1229 else
1230 sv = (s64)ctxt->src.val & (s64)mask;
1229 1231
1230 ctxt->dst.addr.mem.ea += (sv >> 3); 1232 ctxt->dst.addr.mem.ea += (sv >> 3);
1231 } 1233 }
@@ -1315,8 +1317,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1315 in_page = (ctxt->eflags & EFLG_DF) ? 1317 in_page = (ctxt->eflags & EFLG_DF) ?
1316 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : 1318 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1317 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); 1319 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1318 n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, 1320 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1319 count);
1320 if (n == 0) 1321 if (n == 0)
1321 n = 1; 1322 n = 1;
1322 rc->pos = rc->end = 0; 1323 rc->pos = rc->end = 0;
@@ -1358,17 +1359,19 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1358 u16 selector, struct desc_ptr *dt) 1359 u16 selector, struct desc_ptr *dt)
1359{ 1360{
1360 const struct x86_emulate_ops *ops = ctxt->ops; 1361 const struct x86_emulate_ops *ops = ctxt->ops;
1362 u32 base3 = 0;
1361 1363
1362 if (selector & 1 << 2) { 1364 if (selector & 1 << 2) {
1363 struct desc_struct desc; 1365 struct desc_struct desc;
1364 u16 sel; 1366 u16 sel;
1365 1367
1366 memset (dt, 0, sizeof *dt); 1368 memset (dt, 0, sizeof *dt);
1367 if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR)) 1369 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1370 VCPU_SREG_LDTR))
1368 return; 1371 return;
1369 1372
1370 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ 1373 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1371 dt->address = get_desc_base(&desc); 1374 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1372 } else 1375 } else
1373 ops->get_gdt(ctxt, dt); 1376 ops->get_gdt(ctxt, dt);
1374} 1377}
@@ -1422,6 +1425,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1422 ulong desc_addr; 1425 ulong desc_addr;
1423 int ret; 1426 int ret;
1424 u16 dummy; 1427 u16 dummy;
1428 u32 base3 = 0;
1425 1429
1426 memset(&seg_desc, 0, sizeof seg_desc); 1430 memset(&seg_desc, 0, sizeof seg_desc);
1427 1431
@@ -1538,9 +1542,14 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1538 ret = write_segment_descriptor(ctxt, selector, &seg_desc); 1542 ret = write_segment_descriptor(ctxt, selector, &seg_desc);
1539 if (ret != X86EMUL_CONTINUE) 1543 if (ret != X86EMUL_CONTINUE)
1540 return ret; 1544 return ret;
1545 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1546 ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
1547 sizeof(base3), &ctxt->exception);
1548 if (ret != X86EMUL_CONTINUE)
1549 return ret;
1541 } 1550 }
1542load: 1551load:
1543 ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg); 1552 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1544 return X86EMUL_CONTINUE; 1553 return X86EMUL_CONTINUE;
1545exception: 1554exception:
1546 emulate_exception(ctxt, err_vec, err_code, true); 1555 emulate_exception(ctxt, err_vec, err_code, true);
@@ -1575,34 +1584,28 @@ static void write_register_operand(struct operand *op)
1575 1584
1576static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) 1585static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1577{ 1586{
1578 int rc;
1579
1580 switch (op->type) { 1587 switch (op->type) {
1581 case OP_REG: 1588 case OP_REG:
1582 write_register_operand(op); 1589 write_register_operand(op);
1583 break; 1590 break;
1584 case OP_MEM: 1591 case OP_MEM:
1585 if (ctxt->lock_prefix) 1592 if (ctxt->lock_prefix)
1586 rc = segmented_cmpxchg(ctxt, 1593 return segmented_cmpxchg(ctxt,
1594 op->addr.mem,
1595 &op->orig_val,
1596 &op->val,
1597 op->bytes);
1598 else
1599 return segmented_write(ctxt,
1587 op->addr.mem, 1600 op->addr.mem,
1588 &op->orig_val,
1589 &op->val, 1601 &op->val,
1590 op->bytes); 1602 op->bytes);
1591 else
1592 rc = segmented_write(ctxt,
1593 op->addr.mem,
1594 &op->val,
1595 op->bytes);
1596 if (rc != X86EMUL_CONTINUE)
1597 return rc;
1598 break; 1603 break;
1599 case OP_MEM_STR: 1604 case OP_MEM_STR:
1600 rc = segmented_write(ctxt, 1605 return segmented_write(ctxt,
1601 op->addr.mem, 1606 op->addr.mem,
1602 op->data, 1607 op->data,
1603 op->bytes * op->count); 1608 op->bytes * op->count);
1604 if (rc != X86EMUL_CONTINUE)
1605 return rc;
1606 break; 1609 break;
1607 case OP_XMM: 1610 case OP_XMM:
1608 write_sse_reg(ctxt, &op->vec_val, op->addr.xmm); 1611 write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
@@ -1671,7 +1674,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1671 return rc; 1674 return rc;
1672 1675
1673 change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF 1676 change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1674 | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID; 1677 | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID;
1675 1678
1676 switch(ctxt->mode) { 1679 switch(ctxt->mode) {
1677 case X86EMUL_MODE_PROT64: 1680 case X86EMUL_MODE_PROT64:
@@ -1754,6 +1757,9 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1754 if (rc != X86EMUL_CONTINUE) 1757 if (rc != X86EMUL_CONTINUE)
1755 return rc; 1758 return rc;
1756 1759
1760 if (ctxt->modrm_reg == VCPU_SREG_SS)
1761 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1762
1757 rc = load_segment_descriptor(ctxt, (u16)selector, seg); 1763 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1758 return rc; 1764 return rc;
1759} 1765}
@@ -1991,6 +1997,9 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
1991{ 1997{
1992 u64 old = ctxt->dst.orig_val64; 1998 u64 old = ctxt->dst.orig_val64;
1993 1999
2000 if (ctxt->dst.bytes == 16)
2001 return X86EMUL_UNHANDLEABLE;
2002
1994 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || 2003 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
1995 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { 2004 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
1996 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); 2005 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
@@ -2017,6 +2026,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2017{ 2026{
2018 int rc; 2027 int rc;
2019 unsigned long cs; 2028 unsigned long cs;
2029 int cpl = ctxt->ops->cpl(ctxt);
2020 2030
2021 rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); 2031 rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
2022 if (rc != X86EMUL_CONTINUE) 2032 if (rc != X86EMUL_CONTINUE)
@@ -2026,6 +2036,9 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2026 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); 2036 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2027 if (rc != X86EMUL_CONTINUE) 2037 if (rc != X86EMUL_CONTINUE)
2028 return rc; 2038 return rc;
2039 /* Outer-privilege level return is not implemented */
2040 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2041 return X86EMUL_UNHANDLEABLE;
2029 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); 2042 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2030 return rc; 2043 return rc;
2031} 2044}
@@ -2044,8 +2057,10 @@ static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2044static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) 2057static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2045{ 2058{
2046 /* Save real source value, then compare EAX against destination. */ 2059 /* Save real source value, then compare EAX against destination. */
2060 ctxt->dst.orig_val = ctxt->dst.val;
2061 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2047 ctxt->src.orig_val = ctxt->src.val; 2062 ctxt->src.orig_val = ctxt->src.val;
2048 ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); 2063 ctxt->src.val = ctxt->dst.orig_val;
2049 fastop(ctxt, em_cmp); 2064 fastop(ctxt, em_cmp);
2050 2065
2051 if (ctxt->eflags & EFLG_ZF) { 2066 if (ctxt->eflags & EFLG_ZF) {
@@ -2055,6 +2070,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2055 /* Failure: write the value we saw to EAX. */ 2070 /* Failure: write the value we saw to EAX. */
2056 ctxt->dst.type = OP_REG; 2071 ctxt->dst.type = OP_REG;
2057 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); 2072 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2073 ctxt->dst.val = ctxt->dst.orig_val;
2058 } 2074 }
2059 return X86EMUL_CONTINUE; 2075 return X86EMUL_CONTINUE;
2060} 2076}
@@ -2194,7 +2210,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
2194 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip; 2210 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2195 if (efer & EFER_LMA) { 2211 if (efer & EFER_LMA) {
2196#ifdef CONFIG_X86_64 2212#ifdef CONFIG_X86_64
2197 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags & ~EFLG_RF; 2213 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2198 2214
2199 ops->get_msr(ctxt, 2215 ops->get_msr(ctxt,
2200 ctxt->mode == X86EMUL_MODE_PROT64 ? 2216 ctxt->mode == X86EMUL_MODE_PROT64 ?
@@ -2202,14 +2218,14 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
2202 ctxt->_eip = msr_data; 2218 ctxt->_eip = msr_data;
2203 2219
2204 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); 2220 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2205 ctxt->eflags &= ~(msr_data | EFLG_RF); 2221 ctxt->eflags &= ~msr_data;
2206#endif 2222#endif
2207 } else { 2223 } else {
2208 /* legacy mode */ 2224 /* legacy mode */
2209 ops->get_msr(ctxt, MSR_STAR, &msr_data); 2225 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2210 ctxt->_eip = (u32)msr_data; 2226 ctxt->_eip = (u32)msr_data;
2211 2227
2212 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); 2228 ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
2213 } 2229 }
2214 2230
2215 return X86EMUL_CONTINUE; 2231 return X86EMUL_CONTINUE;
@@ -2258,7 +2274,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2258 break; 2274 break;
2259 } 2275 }
2260 2276
2261 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); 2277 ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
2262 cs_sel = (u16)msr_data; 2278 cs_sel = (u16)msr_data;
2263 cs_sel &= ~SELECTOR_RPL_MASK; 2279 cs_sel &= ~SELECTOR_RPL_MASK;
2264 ss_sel = cs_sel + 8; 2280 ss_sel = cs_sel + 8;
@@ -2964,7 +2980,7 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
2964 2980
2965static int em_mov(struct x86_emulate_ctxt *ctxt) 2981static int em_mov(struct x86_emulate_ctxt *ctxt)
2966{ 2982{
2967 memcpy(ctxt->dst.valptr, ctxt->src.valptr, ctxt->op_bytes); 2983 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
2968 return X86EMUL_CONTINUE; 2984 return X86EMUL_CONTINUE;
2969} 2985}
2970 2986
@@ -3221,7 +3237,8 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt)
3221 3237
3222static int em_smsw(struct x86_emulate_ctxt *ctxt) 3238static int em_smsw(struct x86_emulate_ctxt *ctxt)
3223{ 3239{
3224 ctxt->dst.bytes = 2; 3240 if (ctxt->dst.type == OP_MEM)
3241 ctxt->dst.bytes = 2;
3225 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0); 3242 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3226 return X86EMUL_CONTINUE; 3243 return X86EMUL_CONTINUE;
3227} 3244}
@@ -3496,7 +3513,7 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
3496 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX); 3513 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
3497 3514
3498 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || 3515 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
3499 (rcx > 3)) 3516 ctxt->ops->check_pmc(ctxt, rcx))
3500 return emulate_gp(ctxt, 0); 3517 return emulate_gp(ctxt, 0);
3501 3518
3502 return X86EMUL_CONTINUE; 3519 return X86EMUL_CONTINUE;
@@ -3521,9 +3538,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3521} 3538}
3522 3539
3523#define D(_y) { .flags = (_y) } 3540#define D(_y) { .flags = (_y) }
3524#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } 3541#define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
3525#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ 3542#define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
3526 .check_perm = (_p) } 3543 .intercept = x86_intercept_##_i, .check_perm = (_p) }
3527#define N D(NotImpl) 3544#define N D(NotImpl)
3528#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } 3545#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3529#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } 3546#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
@@ -3532,10 +3549,10 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3532#define I(_f, _e) { .flags = (_f), .u.execute = (_e) } 3549#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3533#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } 3550#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
3534#define II(_f, _e, _i) \ 3551#define II(_f, _e, _i) \
3535 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } 3552 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
3536#define IIP(_f, _e, _i, _p) \ 3553#define IIP(_f, _e, _i, _p) \
3537 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \ 3554 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
3538 .check_perm = (_p) } 3555 .intercept = x86_intercept_##_i, .check_perm = (_p) }
3539#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } 3556#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
3540 3557
3541#define D2bv(_f) D((_f) | ByteOp), D(_f) 3558#define D2bv(_f) D((_f) | ByteOp), D(_f)
@@ -3634,8 +3651,8 @@ static const struct opcode group6[] = {
3634}; 3651};
3635 3652
3636static const struct group_dual group7 = { { 3653static const struct group_dual group7 = { {
3637 II(Mov | DstMem | Priv, em_sgdt, sgdt), 3654 II(Mov | DstMem, em_sgdt, sgdt),
3638 II(Mov | DstMem | Priv, em_sidt, sidt), 3655 II(Mov | DstMem, em_sidt, sidt),
3639 II(SrcMem | Priv, em_lgdt, lgdt), 3656 II(SrcMem | Priv, em_lgdt, lgdt),
3640 II(SrcMem | Priv, em_lidt, lidt), 3657 II(SrcMem | Priv, em_lidt, lidt),
3641 II(SrcNone | DstMem | Mov, em_smsw, smsw), N, 3658 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
@@ -3899,7 +3916,7 @@ static const struct opcode twobyte_table[256] = {
3899 N, N, 3916 N, N,
3900 N, N, N, N, N, N, N, N, 3917 N, N, N, N, N, N, N, N,
3901 /* 0x40 - 0x4F */ 3918 /* 0x40 - 0x4F */
3902 X16(D(DstReg | SrcMem | ModRM | Mov)), 3919 X16(D(DstReg | SrcMem | ModRM)),
3903 /* 0x50 - 0x5F */ 3920 /* 0x50 - 0x5F */
3904 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 3921 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
3905 /* 0x60 - 0x6F */ 3922 /* 0x60 - 0x6F */
@@ -4061,12 +4078,12 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4061 mem_common: 4078 mem_common:
4062 *op = ctxt->memop; 4079 *op = ctxt->memop;
4063 ctxt->memopp = op; 4080 ctxt->memopp = op;
4064 if ((ctxt->d & BitOp) && op == &ctxt->dst) 4081 if (ctxt->d & BitOp)
4065 fetch_bit_operand(ctxt); 4082 fetch_bit_operand(ctxt);
4066 op->orig_val = op->val; 4083 op->orig_val = op->val;
4067 break; 4084 break;
4068 case OpMem64: 4085 case OpMem64:
4069 ctxt->memop.bytes = 8; 4086 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4070 goto mem_common; 4087 goto mem_common;
4071 case OpAcc: 4088 case OpAcc:
4072 op->type = OP_REG; 4089 op->type = OP_REG;
@@ -4150,7 +4167,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4150 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; 4167 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4151 op->addr.mem.ea = 4168 op->addr.mem.ea =
4152 register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI)); 4169 register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
4153 op->addr.mem.seg = seg_override(ctxt); 4170 op->addr.mem.seg = ctxt->seg_override;
4154 op->val = 0; 4171 op->val = 0;
4155 op->count = 1; 4172 op->count = 1;
4156 break; 4173 break;
@@ -4161,7 +4178,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4161 register_address(ctxt, 4178 register_address(ctxt,
4162 reg_read(ctxt, VCPU_REGS_RBX) + 4179 reg_read(ctxt, VCPU_REGS_RBX) +
4163 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff)); 4180 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4164 op->addr.mem.seg = seg_override(ctxt); 4181 op->addr.mem.seg = ctxt->seg_override;
4165 op->val = 0; 4182 op->val = 0;
4166 break; 4183 break;
4167 case OpImmFAddr: 4184 case OpImmFAddr:
@@ -4208,16 +4225,22 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
4208 int mode = ctxt->mode; 4225 int mode = ctxt->mode;
4209 int def_op_bytes, def_ad_bytes, goffset, simd_prefix; 4226 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4210 bool op_prefix = false; 4227 bool op_prefix = false;
4228 bool has_seg_override = false;
4211 struct opcode opcode; 4229 struct opcode opcode;
4212 4230
4213 ctxt->memop.type = OP_NONE; 4231 ctxt->memop.type = OP_NONE;
4214 ctxt->memopp = NULL; 4232 ctxt->memopp = NULL;
4215 ctxt->_eip = ctxt->eip; 4233 ctxt->_eip = ctxt->eip;
4216 ctxt->fetch.start = ctxt->_eip; 4234 ctxt->fetch.ptr = ctxt->fetch.data;
4217 ctxt->fetch.end = ctxt->fetch.start + insn_len; 4235 ctxt->fetch.end = ctxt->fetch.data + insn_len;
4218 ctxt->opcode_len = 1; 4236 ctxt->opcode_len = 1;
4219 if (insn_len > 0) 4237 if (insn_len > 0)
4220 memcpy(ctxt->fetch.data, insn, insn_len); 4238 memcpy(ctxt->fetch.data, insn, insn_len);
4239 else {
4240 rc = __do_insn_fetch_bytes(ctxt, 1);
4241 if (rc != X86EMUL_CONTINUE)
4242 return rc;
4243 }
4221 4244
4222 switch (mode) { 4245 switch (mode) {
4223 case X86EMUL_MODE_REAL: 4246 case X86EMUL_MODE_REAL:
@@ -4261,11 +4284,13 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
4261 case 0x2e: /* CS override */ 4284 case 0x2e: /* CS override */
4262 case 0x36: /* SS override */ 4285 case 0x36: /* SS override */
4263 case 0x3e: /* DS override */ 4286 case 0x3e: /* DS override */
4264 set_seg_override(ctxt, (ctxt->b >> 3) & 3); 4287 has_seg_override = true;
4288 ctxt->seg_override = (ctxt->b >> 3) & 3;
4265 break; 4289 break;
4266 case 0x64: /* FS override */ 4290 case 0x64: /* FS override */
4267 case 0x65: /* GS override */ 4291 case 0x65: /* GS override */
4268 set_seg_override(ctxt, ctxt->b & 7); 4292 has_seg_override = true;
4293 ctxt->seg_override = ctxt->b & 7;
4269 break; 4294 break;
4270 case 0x40 ... 0x4f: /* REX */ 4295 case 0x40 ... 0x4f: /* REX */
4271 if (mode != X86EMUL_MODE_PROT64) 4296 if (mode != X86EMUL_MODE_PROT64)
@@ -4314,6 +4339,13 @@ done_prefixes:
4314 if (ctxt->d & ModRM) 4339 if (ctxt->d & ModRM)
4315 ctxt->modrm = insn_fetch(u8, ctxt); 4340 ctxt->modrm = insn_fetch(u8, ctxt);
4316 4341
4342 /* vex-prefix instructions are not implemented */
4343 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
4344 (mode == X86EMUL_MODE_PROT64 ||
4345 (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) {
4346 ctxt->d = NotImpl;
4347 }
4348
4317 while (ctxt->d & GroupMask) { 4349 while (ctxt->d & GroupMask) {
4318 switch (ctxt->d & GroupMask) { 4350 switch (ctxt->d & GroupMask) {
4319 case Group: 4351 case Group:
@@ -4356,49 +4388,59 @@ done_prefixes:
4356 ctxt->d |= opcode.flags; 4388 ctxt->d |= opcode.flags;
4357 } 4389 }
4358 4390
4359 ctxt->execute = opcode.u.execute;
4360 ctxt->check_perm = opcode.check_perm;
4361 ctxt->intercept = opcode.intercept;
4362
4363 /* Unrecognised? */ 4391 /* Unrecognised? */
4364 if (ctxt->d == 0 || (ctxt->d & NotImpl)) 4392 if (ctxt->d == 0)
4365 return EMULATION_FAILED; 4393 return EMULATION_FAILED;
4366 4394
4367 if (!(ctxt->d & EmulateOnUD) && ctxt->ud) 4395 ctxt->execute = opcode.u.execute;
4368 return EMULATION_FAILED;
4369 4396
4370 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) 4397 if (unlikely(ctxt->d &
4371 ctxt->op_bytes = 8; 4398 (NotImpl|EmulateOnUD|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
4399 /*
4400 * These are copied unconditionally here, and checked unconditionally
4401 * in x86_emulate_insn.
4402 */
4403 ctxt->check_perm = opcode.check_perm;
4404 ctxt->intercept = opcode.intercept;
4405
4406 if (ctxt->d & NotImpl)
4407 return EMULATION_FAILED;
4408
4409 if (!(ctxt->d & EmulateOnUD) && ctxt->ud)
4410 return EMULATION_FAILED;
4372 4411
4373 if (ctxt->d & Op3264) { 4412 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
4374 if (mode == X86EMUL_MODE_PROT64)
4375 ctxt->op_bytes = 8; 4413 ctxt->op_bytes = 8;
4376 else
4377 ctxt->op_bytes = 4;
4378 }
4379 4414
4380 if (ctxt->d & Sse) 4415 if (ctxt->d & Op3264) {
4381 ctxt->op_bytes = 16; 4416 if (mode == X86EMUL_MODE_PROT64)
4382 else if (ctxt->d & Mmx) 4417 ctxt->op_bytes = 8;
4383 ctxt->op_bytes = 8; 4418 else
4419 ctxt->op_bytes = 4;
4420 }
4421
4422 if (ctxt->d & Sse)
4423 ctxt->op_bytes = 16;
4424 else if (ctxt->d & Mmx)
4425 ctxt->op_bytes = 8;
4426 }
4384 4427
4385 /* ModRM and SIB bytes. */ 4428 /* ModRM and SIB bytes. */
4386 if (ctxt->d & ModRM) { 4429 if (ctxt->d & ModRM) {
4387 rc = decode_modrm(ctxt, &ctxt->memop); 4430 rc = decode_modrm(ctxt, &ctxt->memop);
4388 if (!ctxt->has_seg_override) 4431 if (!has_seg_override) {
4389 set_seg_override(ctxt, ctxt->modrm_seg); 4432 has_seg_override = true;
4433 ctxt->seg_override = ctxt->modrm_seg;
4434 }
4390 } else if (ctxt->d & MemAbs) 4435 } else if (ctxt->d & MemAbs)
4391 rc = decode_abs(ctxt, &ctxt->memop); 4436 rc = decode_abs(ctxt, &ctxt->memop);
4392 if (rc != X86EMUL_CONTINUE) 4437 if (rc != X86EMUL_CONTINUE)
4393 goto done; 4438 goto done;
4394 4439
4395 if (!ctxt->has_seg_override) 4440 if (!has_seg_override)
4396 set_seg_override(ctxt, VCPU_SREG_DS); 4441 ctxt->seg_override = VCPU_SREG_DS;
4397
4398 ctxt->memop.addr.mem.seg = seg_override(ctxt);
4399 4442
4400 if (ctxt->memop.type == OP_MEM && ctxt->ad_bytes != 8) 4443 ctxt->memop.addr.mem.seg = ctxt->seg_override;
4401 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
4402 4444
4403 /* 4445 /*
4404 * Decode and fetch the source operand: register, memory 4446 * Decode and fetch the source operand: register, memory
@@ -4420,7 +4462,7 @@ done_prefixes:
4420 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); 4462 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
4421 4463
4422done: 4464done:
4423 if (ctxt->memopp && ctxt->memopp->type == OP_MEM && ctxt->rip_relative) 4465 if (ctxt->rip_relative)
4424 ctxt->memopp->addr.mem.ea += ctxt->_eip; 4466 ctxt->memopp->addr.mem.ea += ctxt->_eip;
4425 4467
4426 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; 4468 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
@@ -4495,6 +4537,16 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
4495 return X86EMUL_CONTINUE; 4537 return X86EMUL_CONTINUE;
4496} 4538}
4497 4539
4540void init_decode_cache(struct x86_emulate_ctxt *ctxt)
4541{
4542 memset(&ctxt->rip_relative, 0,
4543 (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
4544
4545 ctxt->io_read.pos = 0;
4546 ctxt->io_read.end = 0;
4547 ctxt->mem_read.end = 0;
4548}
4549
4498int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) 4550int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4499{ 4551{
4500 const struct x86_emulate_ops *ops = ctxt->ops; 4552 const struct x86_emulate_ops *ops = ctxt->ops;
@@ -4503,12 +4555,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4503 4555
4504 ctxt->mem_read.pos = 0; 4556 ctxt->mem_read.pos = 0;
4505 4557
4506 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
4507 (ctxt->d & Undefined)) {
4508 rc = emulate_ud(ctxt);
4509 goto done;
4510 }
4511
4512 /* LOCK prefix is allowed only with some instructions */ 4558 /* LOCK prefix is allowed only with some instructions */
4513 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) { 4559 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
4514 rc = emulate_ud(ctxt); 4560 rc = emulate_ud(ctxt);
@@ -4520,69 +4566,82 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4520 goto done; 4566 goto done;
4521 } 4567 }
4522 4568
4523 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) 4569 if (unlikely(ctxt->d &
4524 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { 4570 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
4525 rc = emulate_ud(ctxt); 4571 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
4526 goto done; 4572 (ctxt->d & Undefined)) {
4527 } 4573 rc = emulate_ud(ctxt);
4528 4574 goto done;
4529 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { 4575 }
4530 rc = emulate_nm(ctxt);
4531 goto done;
4532 }
4533 4576
4534 if (ctxt->d & Mmx) { 4577 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
4535 rc = flush_pending_x87_faults(ctxt); 4578 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
4536 if (rc != X86EMUL_CONTINUE) 4579 rc = emulate_ud(ctxt);
4537 goto done; 4580 goto done;
4538 /* 4581 }
4539 * Now that we know the fpu is exception safe, we can fetch
4540 * operands from it.
4541 */
4542 fetch_possible_mmx_operand(ctxt, &ctxt->src);
4543 fetch_possible_mmx_operand(ctxt, &ctxt->src2);
4544 if (!(ctxt->d & Mov))
4545 fetch_possible_mmx_operand(ctxt, &ctxt->dst);
4546 }
4547 4582
4548 if (unlikely(ctxt->guest_mode) && ctxt->intercept) { 4583 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
4549 rc = emulator_check_intercept(ctxt, ctxt->intercept, 4584 rc = emulate_nm(ctxt);
4550 X86_ICPT_PRE_EXCEPT);
4551 if (rc != X86EMUL_CONTINUE)
4552 goto done; 4585 goto done;
4553 } 4586 }
4554 4587
4555 /* Privileged instruction can be executed only in CPL=0 */ 4588 if (ctxt->d & Mmx) {
4556 if ((ctxt->d & Priv) && ops->cpl(ctxt)) { 4589 rc = flush_pending_x87_faults(ctxt);
4557 rc = emulate_gp(ctxt, 0); 4590 if (rc != X86EMUL_CONTINUE)
4558 goto done; 4591 goto done;
4559 } 4592 /*
4593 * Now that we know the fpu is exception safe, we can fetch
4594 * operands from it.
4595 */
4596 fetch_possible_mmx_operand(ctxt, &ctxt->src);
4597 fetch_possible_mmx_operand(ctxt, &ctxt->src2);
4598 if (!(ctxt->d & Mov))
4599 fetch_possible_mmx_operand(ctxt, &ctxt->dst);
4600 }
4560 4601
4561 /* Instruction can only be executed in protected mode */ 4602 if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
4562 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) { 4603 rc = emulator_check_intercept(ctxt, ctxt->intercept,
4563 rc = emulate_ud(ctxt); 4604 X86_ICPT_PRE_EXCEPT);
4564 goto done; 4605 if (rc != X86EMUL_CONTINUE)
4565 } 4606 goto done;
4607 }
4566 4608
4567 /* Do instruction specific permission checks */ 4609 /* Privileged instruction can be executed only in CPL=0 */
4568 if (ctxt->check_perm) { 4610 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
4569 rc = ctxt->check_perm(ctxt); 4611 if (ctxt->d & PrivUD)
4570 if (rc != X86EMUL_CONTINUE) 4612 rc = emulate_ud(ctxt);
4613 else
4614 rc = emulate_gp(ctxt, 0);
4571 goto done; 4615 goto done;
4572 } 4616 }
4573 4617
4574 if (unlikely(ctxt->guest_mode) && ctxt->intercept) { 4618 /* Instruction can only be executed in protected mode */
4575 rc = emulator_check_intercept(ctxt, ctxt->intercept, 4619 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
4576 X86_ICPT_POST_EXCEPT); 4620 rc = emulate_ud(ctxt);
4577 if (rc != X86EMUL_CONTINUE)
4578 goto done; 4621 goto done;
4579 } 4622 }
4580 4623
4581 if (ctxt->rep_prefix && (ctxt->d & String)) { 4624 /* Do instruction specific permission checks */
4582 /* All REP prefixes have the same first termination condition */ 4625 if (ctxt->d & CheckPerm) {
4583 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { 4626 rc = ctxt->check_perm(ctxt);
4584 ctxt->eip = ctxt->_eip; 4627 if (rc != X86EMUL_CONTINUE)
4585 goto done; 4628 goto done;
4629 }
4630
4631 if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
4632 rc = emulator_check_intercept(ctxt, ctxt->intercept,
4633 X86_ICPT_POST_EXCEPT);
4634 if (rc != X86EMUL_CONTINUE)
4635 goto done;
4636 }
4637
4638 if (ctxt->rep_prefix && (ctxt->d & String)) {
4639 /* All REP prefixes have the same first termination condition */
4640 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
4641 ctxt->eip = ctxt->_eip;
4642 ctxt->eflags &= ~EFLG_RF;
4643 goto done;
4644 }
4586 } 4645 }
4587 } 4646 }
4588 4647
@@ -4616,13 +4675,18 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4616 4675
4617special_insn: 4676special_insn:
4618 4677
4619 if (unlikely(ctxt->guest_mode) && ctxt->intercept) { 4678 if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
4620 rc = emulator_check_intercept(ctxt, ctxt->intercept, 4679 rc = emulator_check_intercept(ctxt, ctxt->intercept,
4621 X86_ICPT_POST_MEMACCESS); 4680 X86_ICPT_POST_MEMACCESS);
4622 if (rc != X86EMUL_CONTINUE) 4681 if (rc != X86EMUL_CONTINUE)
4623 goto done; 4682 goto done;
4624 } 4683 }
4625 4684
4685 if (ctxt->rep_prefix && (ctxt->d & String))
4686 ctxt->eflags |= EFLG_RF;
4687 else
4688 ctxt->eflags &= ~EFLG_RF;
4689
4626 if (ctxt->execute) { 4690 if (ctxt->execute) {
4627 if (ctxt->d & Fastop) { 4691 if (ctxt->d & Fastop) {
4628 void (*fop)(struct fastop *) = (void *)ctxt->execute; 4692 void (*fop)(struct fastop *) = (void *)ctxt->execute;
@@ -4657,8 +4721,9 @@ special_insn:
4657 break; 4721 break;
4658 case 0x90 ... 0x97: /* nop / xchg reg, rax */ 4722 case 0x90 ... 0x97: /* nop / xchg reg, rax */
4659 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX)) 4723 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
4660 break; 4724 ctxt->dst.type = OP_NONE;
4661 rc = em_xchg(ctxt); 4725 else
4726 rc = em_xchg(ctxt);
4662 break; 4727 break;
4663 case 0x98: /* cbw/cwde/cdqe */ 4728 case 0x98: /* cbw/cwde/cdqe */
4664 switch (ctxt->op_bytes) { 4729 switch (ctxt->op_bytes) {
@@ -4709,17 +4774,17 @@ special_insn:
4709 goto done; 4774 goto done;
4710 4775
4711writeback: 4776writeback:
4712 if (!(ctxt->d & NoWrite)) {
4713 rc = writeback(ctxt, &ctxt->dst);
4714 if (rc != X86EMUL_CONTINUE)
4715 goto done;
4716 }
4717 if (ctxt->d & SrcWrite) { 4777 if (ctxt->d & SrcWrite) {
4718 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR); 4778 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
4719 rc = writeback(ctxt, &ctxt->src); 4779 rc = writeback(ctxt, &ctxt->src);
4720 if (rc != X86EMUL_CONTINUE) 4780 if (rc != X86EMUL_CONTINUE)
4721 goto done; 4781 goto done;
4722 } 4782 }
4783 if (!(ctxt->d & NoWrite)) {
4784 rc = writeback(ctxt, &ctxt->dst);
4785 if (rc != X86EMUL_CONTINUE)
4786 goto done;
4787 }
4723 4788
4724 /* 4789 /*
4725 * restore dst type in case the decoding will be reused 4790 * restore dst type in case the decoding will be reused
@@ -4761,6 +4826,7 @@ writeback:
4761 } 4826 }
4762 goto done; /* skip rip writeback */ 4827 goto done; /* skip rip writeback */
4763 } 4828 }
4829 ctxt->eflags &= ~EFLG_RF;
4764 } 4830 }
4765 4831
4766 ctxt->eip = ctxt->_eip; 4832 ctxt->eip = ctxt->_eip;
@@ -4793,8 +4859,10 @@ twobyte_insn:
4793 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); 4859 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
4794 break; 4860 break;
4795 case 0x40 ... 0x4f: /* cmov */ 4861 case 0x40 ... 0x4f: /* cmov */
4796 ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val; 4862 if (test_cc(ctxt->b, ctxt->eflags))
4797 if (!test_cc(ctxt->b, ctxt->eflags)) 4863 ctxt->dst.val = ctxt->src.val;
4864 else if (ctxt->mode != X86EMUL_MODE_PROT64 ||
4865 ctxt->op_bytes != 4)
4798 ctxt->dst.type = OP_NONE; /* no writeback */ 4866 ctxt->dst.type = OP_NONE; /* no writeback */
4799 break; 4867 break;
4800 case 0x80 ... 0x8f: /* jnz rel, etc*/ 4868 case 0x80 ... 0x8f: /* jnz rel, etc*/
@@ -4818,8 +4886,8 @@ twobyte_insn:
4818 break; 4886 break;
4819 case 0xc3: /* movnti */ 4887 case 0xc3: /* movnti */
4820 ctxt->dst.bytes = ctxt->op_bytes; 4888 ctxt->dst.bytes = ctxt->op_bytes;
4821 ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : 4889 ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val :
4822 (u64) ctxt->src.val; 4890 (u32) ctxt->src.val;
4823 break; 4891 break;
4824 default: 4892 default:
4825 goto cannot_emulate; 4893 goto cannot_emulate;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 006911858174..3855103f71fd 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1451,7 +1451,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
1451 vcpu->arch.apic_arb_prio = 0; 1451 vcpu->arch.apic_arb_prio = 0;
1452 vcpu->arch.apic_attention = 0; 1452 vcpu->arch.apic_attention = 0;
1453 1453
1454 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" 1454 apic_debug("%s: vcpu=%p, id=%d, base_msr="
1455 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, 1455 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
1456 vcpu, kvm_apic_id(apic), 1456 vcpu, kvm_apic_id(apic),
1457 vcpu->arch.apic_base, apic->base_address); 1457 vcpu->arch.apic_base, apic->base_address);
@@ -1895,7 +1895,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
1895 /* evaluate pending_events before reading the vector */ 1895 /* evaluate pending_events before reading the vector */
1896 smp_rmb(); 1896 smp_rmb();
1897 sipi_vector = apic->sipi_vector; 1897 sipi_vector = apic->sipi_vector;
1898 pr_debug("vcpu %d received sipi with vector # %x\n", 1898 apic_debug("vcpu %d received sipi with vector # %x\n",
1899 vcpu->vcpu_id, sipi_vector); 1899 vcpu->vcpu_id, sipi_vector);
1900 kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); 1900 kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
1901 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 1901 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 9d2e0ffcb190..5aaf35641768 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -22,7 +22,7 @@
22 __entry->unsync = sp->unsync; 22 __entry->unsync = sp->unsync;
23 23
24#define KVM_MMU_PAGE_PRINTK() ({ \ 24#define KVM_MMU_PAGE_PRINTK() ({ \
25 const char *ret = p->buffer + p->len; \ 25 const u32 saved_len = p->len; \
26 static const char *access_str[] = { \ 26 static const char *access_str[] = { \
27 "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \ 27 "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \
28 }; \ 28 }; \
@@ -41,7 +41,7 @@
41 role.nxe ? "" : "!", \ 41 role.nxe ? "" : "!", \
42 __entry->root_count, \ 42 __entry->root_count, \
43 __entry->unsync ? "unsync" : "sync", 0); \ 43 __entry->unsync ? "unsync" : "sync", 0); \
44 ret; \ 44 p->buffer + saved_len; \
45 }) 45 })
46 46
47#define kvm_mmu_trace_pferr_flags \ 47#define kvm_mmu_trace_pferr_flags \
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index cbecaa90399c..3dd6accb64ec 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -428,6 +428,15 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
428 return 1; 428 return 1;
429} 429}
430 430
431int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc)
432{
433 struct kvm_pmu *pmu = &vcpu->arch.pmu;
434 bool fixed = pmc & (1u << 30);
435 pmc &= ~(3u << 30);
436 return (!fixed && pmc >= pmu->nr_arch_gp_counters) ||
437 (fixed && pmc >= pmu->nr_arch_fixed_counters);
438}
439
431int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data) 440int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
432{ 441{
433 struct kvm_pmu *pmu = &vcpu->arch.pmu; 442 struct kvm_pmu *pmu = &vcpu->arch.pmu;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b5e994ad0135..ddf742768ecf 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -486,14 +486,14 @@ static int is_external_interrupt(u32 info)
486 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); 486 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
487} 487}
488 488
489static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) 489static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
490{ 490{
491 struct vcpu_svm *svm = to_svm(vcpu); 491 struct vcpu_svm *svm = to_svm(vcpu);
492 u32 ret = 0; 492 u32 ret = 0;
493 493
494 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) 494 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
495 ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; 495 ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
496 return ret & mask; 496 return ret;
497} 497}
498 498
499static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) 499static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
@@ -1415,7 +1415,16 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
1415 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1; 1415 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
1416 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; 1416 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
1417 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 1417 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
1418 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; 1418
1419 /*
1420 * AMD CPUs circa 2014 track the G bit for all segments except CS.
1421 * However, the SVM spec states that the G bit is not observed by the
1422 * CPU, and some VMware virtual CPUs drop the G bit for all segments.
1423 * So let's synthesize a legal G bit for all segments, this helps
1424 * running KVM nested. It also helps cross-vendor migration, because
1425 * Intel's vmentry has a check on the 'G' bit.
1426 */
1427 var->g = s->limit > 0xfffff;
1419 1428
1420 /* 1429 /*
1421 * AMD's VMCB does not have an explicit unusable field, so emulate it 1430 * AMD's VMCB does not have an explicit unusable field, so emulate it
@@ -1424,14 +1433,6 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
1424 var->unusable = !var->present || (var->type == 0); 1433 var->unusable = !var->present || (var->type == 0);
1425 1434
1426 switch (seg) { 1435 switch (seg) {
1427 case VCPU_SREG_CS:
1428 /*
1429 * SVM always stores 0 for the 'G' bit in the CS selector in
1430 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
1431 * Intel's VMENTRY has a check on the 'G' bit.
1432 */
1433 var->g = s->limit > 0xfffff;
1434 break;
1435 case VCPU_SREG_TR: 1436 case VCPU_SREG_TR:
1436 /* 1437 /*
1437 * Work around a bug where the busy flag in the tr selector 1438 * Work around a bug where the busy flag in the tr selector
@@ -2116,22 +2117,27 @@ static void nested_svm_unmap(struct page *page)
2116 2117
2117static int nested_svm_intercept_ioio(struct vcpu_svm *svm) 2118static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
2118{ 2119{
2119 unsigned port; 2120 unsigned port, size, iopm_len;
2120 u8 val, bit; 2121 u16 val, mask;
2122 u8 start_bit;
2121 u64 gpa; 2123 u64 gpa;
2122 2124
2123 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) 2125 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
2124 return NESTED_EXIT_HOST; 2126 return NESTED_EXIT_HOST;
2125 2127
2126 port = svm->vmcb->control.exit_info_1 >> 16; 2128 port = svm->vmcb->control.exit_info_1 >> 16;
2129 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
2130 SVM_IOIO_SIZE_SHIFT;
2127 gpa = svm->nested.vmcb_iopm + (port / 8); 2131 gpa = svm->nested.vmcb_iopm + (port / 8);
2128 bit = port % 8; 2132 start_bit = port % 8;
2129 val = 0; 2133 iopm_len = (start_bit + size > 8) ? 2 : 1;
2134 mask = (0xf >> (4 - size)) << start_bit;
2135 val = 0;
2130 2136
2131 if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1)) 2137 if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, iopm_len))
2132 val &= (1 << bit); 2138 return NESTED_EXIT_DONE;
2133 2139
2134 return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 2140 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
2135} 2141}
2136 2142
2137static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) 2143static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
@@ -4205,7 +4211,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
4205 if (info->intercept == x86_intercept_cr_write) 4211 if (info->intercept == x86_intercept_cr_write)
4206 icpt_info.exit_code += info->modrm_reg; 4212 icpt_info.exit_code += info->modrm_reg;
4207 4213
4208 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0) 4214 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
4215 info->intercept == x86_intercept_clts)
4209 break; 4216 break;
4210 4217
4211 intercept = svm->nested.intercept; 4218 intercept = svm->nested.intercept;
@@ -4250,14 +4257,14 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
4250 u64 exit_info; 4257 u64 exit_info;
4251 u32 bytes; 4258 u32 bytes;
4252 4259
4253 exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16;
4254
4255 if (info->intercept == x86_intercept_in || 4260 if (info->intercept == x86_intercept_in ||
4256 info->intercept == x86_intercept_ins) { 4261 info->intercept == x86_intercept_ins) {
4257 exit_info |= SVM_IOIO_TYPE_MASK; 4262 exit_info = ((info->src_val & 0xffff) << 16) |
4258 bytes = info->src_bytes; 4263 SVM_IOIO_TYPE_MASK;
4259 } else {
4260 bytes = info->dst_bytes; 4264 bytes = info->dst_bytes;
4265 } else {
4266 exit_info = (info->dst_val & 0xffff) << 16;
4267 bytes = info->src_bytes;
4261 } 4268 }
4262 4269
4263 if (info->intercept == x86_intercept_outs || 4270 if (info->intercept == x86_intercept_outs ||
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 33574c95220d..e850a7d332be 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -721,10 +721,10 @@ TRACE_EVENT(kvm_emulate_insn,
721 ), 721 ),
722 722
723 TP_fast_assign( 723 TP_fast_assign(
724 __entry->rip = vcpu->arch.emulate_ctxt.fetch.start;
725 __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); 724 __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS);
726 __entry->len = vcpu->arch.emulate_ctxt._eip 725 __entry->len = vcpu->arch.emulate_ctxt.fetch.ptr
727 - vcpu->arch.emulate_ctxt.fetch.start; 726 - vcpu->arch.emulate_ctxt.fetch.data;
727 __entry->rip = vcpu->arch.emulate_ctxt._eip - __entry->len;
728 memcpy(__entry->insn, 728 memcpy(__entry->insn,
729 vcpu->arch.emulate_ctxt.fetch.data, 729 vcpu->arch.emulate_ctxt.fetch.data,
730 15); 730 15);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 801332edefc3..e618f34bde2d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -383,6 +383,9 @@ struct nested_vmx {
383 383
384 struct hrtimer preemption_timer; 384 struct hrtimer preemption_timer;
385 bool preemption_timer_expired; 385 bool preemption_timer_expired;
386
387 /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
388 u64 vmcs01_debugctl;
386}; 389};
387 390
388#define POSTED_INTR_ON 0 391#define POSTED_INTR_ON 0
@@ -740,7 +743,6 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
740static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); 743static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
741static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); 744static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
742static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); 745static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
743static bool vmx_mpx_supported(void);
744 746
745static DEFINE_PER_CPU(struct vmcs *, vmxarea); 747static DEFINE_PER_CPU(struct vmcs *, vmxarea);
746static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 748static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -820,7 +822,6 @@ static const u32 vmx_msr_index[] = {
820#endif 822#endif
821 MSR_EFER, MSR_TSC_AUX, MSR_STAR, 823 MSR_EFER, MSR_TSC_AUX, MSR_STAR,
822}; 824};
823#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
824 825
825static inline bool is_page_fault(u32 intr_info) 826static inline bool is_page_fault(u32 intr_info)
826{ 827{
@@ -1940,7 +1941,7 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1940 vmcs_writel(GUEST_RFLAGS, rflags); 1941 vmcs_writel(GUEST_RFLAGS, rflags);
1941} 1942}
1942 1943
1943static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) 1944static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
1944{ 1945{
1945 u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 1946 u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
1946 int ret = 0; 1947 int ret = 0;
@@ -1950,7 +1951,7 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
1950 if (interruptibility & GUEST_INTR_STATE_MOV_SS) 1951 if (interruptibility & GUEST_INTR_STATE_MOV_SS)
1951 ret |= KVM_X86_SHADOW_INT_MOV_SS; 1952 ret |= KVM_X86_SHADOW_INT_MOV_SS;
1952 1953
1953 return ret & mask; 1954 return ret;
1954} 1955}
1955 1956
1956static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) 1957static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
@@ -2239,10 +2240,13 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
2239 * or other means. 2240 * or other means.
2240 */ 2241 */
2241static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high; 2242static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high;
2243static u32 nested_vmx_true_procbased_ctls_low;
2242static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; 2244static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high;
2243static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; 2245static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
2244static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; 2246static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
2247static u32 nested_vmx_true_exit_ctls_low;
2245static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; 2248static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
2249static u32 nested_vmx_true_entry_ctls_low;
2246static u32 nested_vmx_misc_low, nested_vmx_misc_high; 2250static u32 nested_vmx_misc_low, nested_vmx_misc_high;
2247static u32 nested_vmx_ept_caps; 2251static u32 nested_vmx_ept_caps;
2248static __init void nested_vmx_setup_ctls_msrs(void) 2252static __init void nested_vmx_setup_ctls_msrs(void)
@@ -2265,21 +2269,13 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2265 /* pin-based controls */ 2269 /* pin-based controls */
2266 rdmsr(MSR_IA32_VMX_PINBASED_CTLS, 2270 rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
2267 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high); 2271 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high);
2268 /*
2269 * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is
2270 * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR.
2271 */
2272 nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; 2272 nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
2273 nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | 2273 nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
2274 PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; 2274 PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS;
2275 nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | 2275 nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
2276 PIN_BASED_VMX_PREEMPTION_TIMER; 2276 PIN_BASED_VMX_PREEMPTION_TIMER;
2277 2277
2278 /* 2278 /* exit controls */
2279 * Exit controls
2280 * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and
2281 * 17 must be 1.
2282 */
2283 rdmsr(MSR_IA32_VMX_EXIT_CTLS, 2279 rdmsr(MSR_IA32_VMX_EXIT_CTLS,
2284 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high); 2280 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high);
2285 nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; 2281 nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
@@ -2296,10 +2292,13 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2296 if (vmx_mpx_supported()) 2292 if (vmx_mpx_supported())
2297 nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; 2293 nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
2298 2294
2295 /* We support free control of debug control saving. */
2296 nested_vmx_true_exit_ctls_low = nested_vmx_exit_ctls_low &
2297 ~VM_EXIT_SAVE_DEBUG_CONTROLS;
2298
2299 /* entry controls */ 2299 /* entry controls */
2300 rdmsr(MSR_IA32_VMX_ENTRY_CTLS, 2300 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
2301 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); 2301 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
2302 /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */
2303 nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; 2302 nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
2304 nested_vmx_entry_ctls_high &= 2303 nested_vmx_entry_ctls_high &=
2305#ifdef CONFIG_X86_64 2304#ifdef CONFIG_X86_64
@@ -2311,10 +2310,14 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2311 if (vmx_mpx_supported()) 2310 if (vmx_mpx_supported())
2312 nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; 2311 nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
2313 2312
2313 /* We support free control of debug control loading. */
2314 nested_vmx_true_entry_ctls_low = nested_vmx_entry_ctls_low &
2315 ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
2316
2314 /* cpu-based controls */ 2317 /* cpu-based controls */
2315 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, 2318 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
2316 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); 2319 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
2317 nested_vmx_procbased_ctls_low = 0; 2320 nested_vmx_procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
2318 nested_vmx_procbased_ctls_high &= 2321 nested_vmx_procbased_ctls_high &=
2319 CPU_BASED_VIRTUAL_INTR_PENDING | 2322 CPU_BASED_VIRTUAL_INTR_PENDING |
2320 CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | 2323 CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
@@ -2335,7 +2338,12 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2335 * can use it to avoid exits to L1 - even when L0 runs L2 2338 * can use it to avoid exits to L1 - even when L0 runs L2
2336 * without MSR bitmaps. 2339 * without MSR bitmaps.
2337 */ 2340 */
2338 nested_vmx_procbased_ctls_high |= CPU_BASED_USE_MSR_BITMAPS; 2341 nested_vmx_procbased_ctls_high |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
2342 CPU_BASED_USE_MSR_BITMAPS;
2343
2344 /* We support free control of CR3 access interception. */
2345 nested_vmx_true_procbased_ctls_low = nested_vmx_procbased_ctls_low &
2346 ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
2339 2347
2340 /* secondary cpu-based controls */ 2348 /* secondary cpu-based controls */
2341 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, 2349 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
@@ -2394,7 +2402,7 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2394 * guest, and the VMCS structure we give it - not about the 2402 * guest, and the VMCS structure we give it - not about the
2395 * VMX support of the underlying hardware. 2403 * VMX support of the underlying hardware.
2396 */ 2404 */
2397 *pdata = VMCS12_REVISION | 2405 *pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS |
2398 ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | 2406 ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
2399 (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); 2407 (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
2400 break; 2408 break;
@@ -2404,16 +2412,25 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2404 nested_vmx_pinbased_ctls_high); 2412 nested_vmx_pinbased_ctls_high);
2405 break; 2413 break;
2406 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: 2414 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
2415 *pdata = vmx_control_msr(nested_vmx_true_procbased_ctls_low,
2416 nested_vmx_procbased_ctls_high);
2417 break;
2407 case MSR_IA32_VMX_PROCBASED_CTLS: 2418 case MSR_IA32_VMX_PROCBASED_CTLS:
2408 *pdata = vmx_control_msr(nested_vmx_procbased_ctls_low, 2419 *pdata = vmx_control_msr(nested_vmx_procbased_ctls_low,
2409 nested_vmx_procbased_ctls_high); 2420 nested_vmx_procbased_ctls_high);
2410 break; 2421 break;
2411 case MSR_IA32_VMX_TRUE_EXIT_CTLS: 2422 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
2423 *pdata = vmx_control_msr(nested_vmx_true_exit_ctls_low,
2424 nested_vmx_exit_ctls_high);
2425 break;
2412 case MSR_IA32_VMX_EXIT_CTLS: 2426 case MSR_IA32_VMX_EXIT_CTLS:
2413 *pdata = vmx_control_msr(nested_vmx_exit_ctls_low, 2427 *pdata = vmx_control_msr(nested_vmx_exit_ctls_low,
2414 nested_vmx_exit_ctls_high); 2428 nested_vmx_exit_ctls_high);
2415 break; 2429 break;
2416 case MSR_IA32_VMX_TRUE_ENTRY_CTLS: 2430 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
2431 *pdata = vmx_control_msr(nested_vmx_true_entry_ctls_low,
2432 nested_vmx_entry_ctls_high);
2433 break;
2417 case MSR_IA32_VMX_ENTRY_CTLS: 2434 case MSR_IA32_VMX_ENTRY_CTLS:
2418 *pdata = vmx_control_msr(nested_vmx_entry_ctls_low, 2435 *pdata = vmx_control_msr(nested_vmx_entry_ctls_low,
2419 nested_vmx_entry_ctls_high); 2436 nested_vmx_entry_ctls_high);
@@ -2442,7 +2459,7 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2442 *pdata = -1ULL; 2459 *pdata = -1ULL;
2443 break; 2460 break;
2444 case MSR_IA32_VMX_VMCS_ENUM: 2461 case MSR_IA32_VMX_VMCS_ENUM:
2445 *pdata = 0x1f; 2462 *pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */
2446 break; 2463 break;
2447 case MSR_IA32_VMX_PROCBASED_CTLS2: 2464 case MSR_IA32_VMX_PROCBASED_CTLS2:
2448 *pdata = vmx_control_msr(nested_vmx_secondary_ctls_low, 2465 *pdata = vmx_control_msr(nested_vmx_secondary_ctls_low,
@@ -3653,7 +3670,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
3653 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); 3670 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
3654 3671
3655out: 3672out:
3656 vmx->emulation_required |= emulation_required(vcpu); 3673 vmx->emulation_required = emulation_required(vcpu);
3657} 3674}
3658 3675
3659static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 3676static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -4422,7 +4439,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4422 vmx->vcpu.arch.pat = host_pat; 4439 vmx->vcpu.arch.pat = host_pat;
4423 } 4440 }
4424 4441
4425 for (i = 0; i < NR_VMX_MSR; ++i) { 4442 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
4426 u32 index = vmx_msr_index[i]; 4443 u32 index = vmx_msr_index[i];
4427 u32 data_low, data_high; 4444 u32 data_low, data_high;
4428 int j = vmx->nmsrs; 4445 int j = vmx->nmsrs;
@@ -4873,7 +4890,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
4873 if (!(vcpu->guest_debug & 4890 if (!(vcpu->guest_debug &
4874 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { 4891 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
4875 vcpu->arch.dr6 &= ~15; 4892 vcpu->arch.dr6 &= ~15;
4876 vcpu->arch.dr6 |= dr6; 4893 vcpu->arch.dr6 |= dr6 | DR6_RTM;
4877 if (!(dr6 & ~DR6_RESERVED)) /* icebp */ 4894 if (!(dr6 & ~DR6_RESERVED)) /* icebp */
4878 skip_emulated_instruction(vcpu); 4895 skip_emulated_instruction(vcpu);
4879 4896
@@ -5039,7 +5056,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
5039 reg = (exit_qualification >> 8) & 15; 5056 reg = (exit_qualification >> 8) & 15;
5040 switch ((exit_qualification >> 4) & 3) { 5057 switch ((exit_qualification >> 4) & 3) {
5041 case 0: /* mov to cr */ 5058 case 0: /* mov to cr */
5042 val = kvm_register_read(vcpu, reg); 5059 val = kvm_register_readl(vcpu, reg);
5043 trace_kvm_cr_write(cr, val); 5060 trace_kvm_cr_write(cr, val);
5044 switch (cr) { 5061 switch (cr) {
5045 case 0: 5062 case 0:
@@ -5056,7 +5073,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
5056 return 1; 5073 return 1;
5057 case 8: { 5074 case 8: {
5058 u8 cr8_prev = kvm_get_cr8(vcpu); 5075 u8 cr8_prev = kvm_get_cr8(vcpu);
5059 u8 cr8 = kvm_register_read(vcpu, reg); 5076 u8 cr8 = (u8)val;
5060 err = kvm_set_cr8(vcpu, cr8); 5077 err = kvm_set_cr8(vcpu, cr8);
5061 kvm_complete_insn_gp(vcpu, err); 5078 kvm_complete_insn_gp(vcpu, err);
5062 if (irqchip_in_kernel(vcpu->kvm)) 5079 if (irqchip_in_kernel(vcpu->kvm))
@@ -5132,7 +5149,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5132 return 0; 5149 return 0;
5133 } else { 5150 } else {
5134 vcpu->arch.dr7 &= ~DR7_GD; 5151 vcpu->arch.dr7 &= ~DR7_GD;
5135 vcpu->arch.dr6 |= DR6_BD; 5152 vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
5136 vmcs_writel(GUEST_DR7, vcpu->arch.dr7); 5153 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
5137 kvm_queue_exception(vcpu, DB_VECTOR); 5154 kvm_queue_exception(vcpu, DB_VECTOR);
5138 return 1; 5155 return 1;
@@ -5165,7 +5182,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5165 return 1; 5182 return 1;
5166 kvm_register_write(vcpu, reg, val); 5183 kvm_register_write(vcpu, reg, val);
5167 } else 5184 } else
5168 if (kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg))) 5185 if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
5169 return 1; 5186 return 1;
5170 5187
5171 skip_emulated_instruction(vcpu); 5188 skip_emulated_instruction(vcpu);
@@ -5621,7 +5638,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
5621 cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 5638 cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
5622 intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; 5639 intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
5623 5640
5624 while (!guest_state_valid(vcpu) && count-- != 0) { 5641 while (vmx->emulation_required && count-- != 0) {
5625 if (intr_window_requested && vmx_interrupt_allowed(vcpu)) 5642 if (intr_window_requested && vmx_interrupt_allowed(vcpu))
5626 return handle_interrupt_window(&vmx->vcpu); 5643 return handle_interrupt_window(&vmx->vcpu);
5627 5644
@@ -5655,7 +5672,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
5655 schedule(); 5672 schedule();
5656 } 5673 }
5657 5674
5658 vmx->emulation_required = emulation_required(vcpu);
5659out: 5675out:
5660 return ret; 5676 return ret;
5661} 5677}
@@ -5754,22 +5770,27 @@ static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
5754 5770
5755/* 5771/*
5756 * Free all VMCSs saved for this vcpu, except the one pointed by 5772 * Free all VMCSs saved for this vcpu, except the one pointed by
5757 * vmx->loaded_vmcs. These include the VMCSs in vmcs02_pool (except the one 5773 * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
5758 * currently used, if running L2), and vmcs01 when running L2. 5774 * must be &vmx->vmcs01.
5759 */ 5775 */
5760static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) 5776static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
5761{ 5777{
5762 struct vmcs02_list *item, *n; 5778 struct vmcs02_list *item, *n;
5779
5780 WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
5763 list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { 5781 list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
5764 if (vmx->loaded_vmcs != &item->vmcs02) 5782 /*
5765 free_loaded_vmcs(&item->vmcs02); 5783 * Something will leak if the above WARN triggers. Better than
5784 * a use-after-free.
5785 */
5786 if (vmx->loaded_vmcs == &item->vmcs02)
5787 continue;
5788
5789 free_loaded_vmcs(&item->vmcs02);
5766 list_del(&item->list); 5790 list_del(&item->list);
5767 kfree(item); 5791 kfree(item);
5792 vmx->nested.vmcs02_num--;
5768 } 5793 }
5769 vmx->nested.vmcs02_num = 0;
5770
5771 if (vmx->loaded_vmcs != &vmx->vmcs01)
5772 free_loaded_vmcs(&vmx->vmcs01);
5773} 5794}
5774 5795
5775/* 5796/*
@@ -5918,7 +5939,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
5918 * which replaces physical address width with 32 5939 * which replaces physical address width with 32
5919 * 5940 *
5920 */ 5941 */
5921 if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { 5942 if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
5922 nested_vmx_failInvalid(vcpu); 5943 nested_vmx_failInvalid(vcpu);
5923 skip_emulated_instruction(vcpu); 5944 skip_emulated_instruction(vcpu);
5924 return 1; 5945 return 1;
@@ -5936,7 +5957,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
5936 vmx->nested.vmxon_ptr = vmptr; 5957 vmx->nested.vmxon_ptr = vmptr;
5937 break; 5958 break;
5938 case EXIT_REASON_VMCLEAR: 5959 case EXIT_REASON_VMCLEAR:
5939 if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { 5960 if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
5940 nested_vmx_failValid(vcpu, 5961 nested_vmx_failValid(vcpu,
5941 VMXERR_VMCLEAR_INVALID_ADDRESS); 5962 VMXERR_VMCLEAR_INVALID_ADDRESS);
5942 skip_emulated_instruction(vcpu); 5963 skip_emulated_instruction(vcpu);
@@ -5951,7 +5972,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
5951 } 5972 }
5952 break; 5973 break;
5953 case EXIT_REASON_VMPTRLD: 5974 case EXIT_REASON_VMPTRLD:
5954 if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { 5975 if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
5955 nested_vmx_failValid(vcpu, 5976 nested_vmx_failValid(vcpu,
5956 VMXERR_VMPTRLD_INVALID_ADDRESS); 5977 VMXERR_VMPTRLD_INVALID_ADDRESS);
5957 skip_emulated_instruction(vcpu); 5978 skip_emulated_instruction(vcpu);
@@ -6086,20 +6107,27 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
6086static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) 6107static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
6087{ 6108{
6088 u32 exec_control; 6109 u32 exec_control;
6110 if (vmx->nested.current_vmptr == -1ull)
6111 return;
6112
6113 /* current_vmptr and current_vmcs12 are always set/reset together */
6114 if (WARN_ON(vmx->nested.current_vmcs12 == NULL))
6115 return;
6116
6089 if (enable_shadow_vmcs) { 6117 if (enable_shadow_vmcs) {
6090 if (vmx->nested.current_vmcs12 != NULL) { 6118 /* copy to memory all shadowed fields in case
6091 /* copy to memory all shadowed fields in case 6119 they were modified */
6092 they were modified */ 6120 copy_shadow_to_vmcs12(vmx);
6093 copy_shadow_to_vmcs12(vmx); 6121 vmx->nested.sync_shadow_vmcs = false;
6094 vmx->nested.sync_shadow_vmcs = false; 6122 exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
6095 exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); 6123 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
6096 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; 6124 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
6097 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 6125 vmcs_write64(VMCS_LINK_POINTER, -1ull);
6098 vmcs_write64(VMCS_LINK_POINTER, -1ull);
6099 }
6100 } 6126 }
6101 kunmap(vmx->nested.current_vmcs12_page); 6127 kunmap(vmx->nested.current_vmcs12_page);
6102 nested_release_page(vmx->nested.current_vmcs12_page); 6128 nested_release_page(vmx->nested.current_vmcs12_page);
6129 vmx->nested.current_vmptr = -1ull;
6130 vmx->nested.current_vmcs12 = NULL;
6103} 6131}
6104 6132
6105/* 6133/*
@@ -6110,12 +6138,9 @@ static void free_nested(struct vcpu_vmx *vmx)
6110{ 6138{
6111 if (!vmx->nested.vmxon) 6139 if (!vmx->nested.vmxon)
6112 return; 6140 return;
6141
6113 vmx->nested.vmxon = false; 6142 vmx->nested.vmxon = false;
6114 if (vmx->nested.current_vmptr != -1ull) { 6143 nested_release_vmcs12(vmx);
6115 nested_release_vmcs12(vmx);
6116 vmx->nested.current_vmptr = -1ull;
6117 vmx->nested.current_vmcs12 = NULL;
6118 }
6119 if (enable_shadow_vmcs) 6144 if (enable_shadow_vmcs)
6120 free_vmcs(vmx->nested.current_shadow_vmcs); 6145 free_vmcs(vmx->nested.current_shadow_vmcs);
6121 /* Unpin physical memory we referred to in current vmcs02 */ 6146 /* Unpin physical memory we referred to in current vmcs02 */
@@ -6152,11 +6177,8 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
6152 if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr)) 6177 if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr))
6153 return 1; 6178 return 1;
6154 6179
6155 if (vmptr == vmx->nested.current_vmptr) { 6180 if (vmptr == vmx->nested.current_vmptr)
6156 nested_release_vmcs12(vmx); 6181 nested_release_vmcs12(vmx);
6157 vmx->nested.current_vmptr = -1ull;
6158 vmx->nested.current_vmcs12 = NULL;
6159 }
6160 6182
6161 page = nested_get_page(vcpu, vmptr); 6183 page = nested_get_page(vcpu, vmptr);
6162 if (page == NULL) { 6184 if (page == NULL) {
@@ -6384,7 +6406,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
6384 return 1; 6406 return 1;
6385 6407
6386 /* Decode instruction info and find the field to read */ 6408 /* Decode instruction info and find the field to read */
6387 field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); 6409 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
6388 /* Read the field, zero-extended to a u64 field_value */ 6410 /* Read the field, zero-extended to a u64 field_value */
6389 if (!vmcs12_read_any(vcpu, field, &field_value)) { 6411 if (!vmcs12_read_any(vcpu, field, &field_value)) {
6390 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); 6412 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
@@ -6397,7 +6419,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
6397 * on the guest's mode (32 or 64 bit), not on the given field's length. 6419 * on the guest's mode (32 or 64 bit), not on the given field's length.
6398 */ 6420 */
6399 if (vmx_instruction_info & (1u << 10)) { 6421 if (vmx_instruction_info & (1u << 10)) {
6400 kvm_register_write(vcpu, (((vmx_instruction_info) >> 3) & 0xf), 6422 kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf),
6401 field_value); 6423 field_value);
6402 } else { 6424 } else {
6403 if (get_vmx_mem_address(vcpu, exit_qualification, 6425 if (get_vmx_mem_address(vcpu, exit_qualification,
@@ -6434,21 +6456,21 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
6434 return 1; 6456 return 1;
6435 6457
6436 if (vmx_instruction_info & (1u << 10)) 6458 if (vmx_instruction_info & (1u << 10))
6437 field_value = kvm_register_read(vcpu, 6459 field_value = kvm_register_readl(vcpu,
6438 (((vmx_instruction_info) >> 3) & 0xf)); 6460 (((vmx_instruction_info) >> 3) & 0xf));
6439 else { 6461 else {
6440 if (get_vmx_mem_address(vcpu, exit_qualification, 6462 if (get_vmx_mem_address(vcpu, exit_qualification,
6441 vmx_instruction_info, &gva)) 6463 vmx_instruction_info, &gva))
6442 return 1; 6464 return 1;
6443 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, 6465 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva,
6444 &field_value, (is_long_mode(vcpu) ? 8 : 4), &e)) { 6466 &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
6445 kvm_inject_page_fault(vcpu, &e); 6467 kvm_inject_page_fault(vcpu, &e);
6446 return 1; 6468 return 1;
6447 } 6469 }
6448 } 6470 }
6449 6471
6450 6472
6451 field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); 6473 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
6452 if (vmcs_field_readonly(field)) { 6474 if (vmcs_field_readonly(field)) {
6453 nested_vmx_failValid(vcpu, 6475 nested_vmx_failValid(vcpu,
6454 VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); 6476 VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
@@ -6498,9 +6520,8 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
6498 skip_emulated_instruction(vcpu); 6520 skip_emulated_instruction(vcpu);
6499 return 1; 6521 return 1;
6500 } 6522 }
6501 if (vmx->nested.current_vmptr != -1ull)
6502 nested_release_vmcs12(vmx);
6503 6523
6524 nested_release_vmcs12(vmx);
6504 vmx->nested.current_vmptr = vmptr; 6525 vmx->nested.current_vmptr = vmptr;
6505 vmx->nested.current_vmcs12 = new_vmcs12; 6526 vmx->nested.current_vmcs12 = new_vmcs12;
6506 vmx->nested.current_vmcs12_page = page; 6527 vmx->nested.current_vmcs12_page = page;
@@ -6571,7 +6592,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
6571 } 6592 }
6572 6593
6573 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); 6594 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
6574 type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); 6595 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
6575 6596
6576 types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; 6597 types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
6577 6598
@@ -6751,7 +6772,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
6751 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 6772 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6752 int cr = exit_qualification & 15; 6773 int cr = exit_qualification & 15;
6753 int reg = (exit_qualification >> 8) & 15; 6774 int reg = (exit_qualification >> 8) & 15;
6754 unsigned long val = kvm_register_read(vcpu, reg); 6775 unsigned long val = kvm_register_readl(vcpu, reg);
6755 6776
6756 switch ((exit_qualification >> 4) & 3) { 6777 switch ((exit_qualification >> 4) & 3) {
6757 case 0: /* mov to cr */ 6778 case 0: /* mov to cr */
@@ -7112,7 +7133,26 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
7112 if (max_irr == -1) 7133 if (max_irr == -1)
7113 return; 7134 return;
7114 7135
7115 vmx_set_rvi(max_irr); 7136 /*
7137 * If a vmexit is needed, vmx_check_nested_events handles it.
7138 */
7139 if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
7140 return;
7141
7142 if (!is_guest_mode(vcpu)) {
7143 vmx_set_rvi(max_irr);
7144 return;
7145 }
7146
7147 /*
7148 * Fall back to pre-APICv interrupt injection since L2
7149 * is run without virtual interrupt delivery.
7150 */
7151 if (!kvm_event_needs_reinjection(vcpu) &&
7152 vmx_interrupt_allowed(vcpu)) {
7153 kvm_queue_interrupt(vcpu, max_irr, false);
7154 vmx_inject_irq(vcpu);
7155 }
7116} 7156}
7117 7157
7118static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 7158static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
@@ -7520,13 +7560,31 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
7520 vmx_complete_interrupts(vmx); 7560 vmx_complete_interrupts(vmx);
7521} 7561}
7522 7562
7563static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
7564{
7565 struct vcpu_vmx *vmx = to_vmx(vcpu);
7566 int cpu;
7567
7568 if (vmx->loaded_vmcs == &vmx->vmcs01)
7569 return;
7570
7571 cpu = get_cpu();
7572 vmx->loaded_vmcs = &vmx->vmcs01;
7573 vmx_vcpu_put(vcpu);
7574 vmx_vcpu_load(vcpu, cpu);
7575 vcpu->cpu = cpu;
7576 put_cpu();
7577}
7578
7523static void vmx_free_vcpu(struct kvm_vcpu *vcpu) 7579static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
7524{ 7580{
7525 struct vcpu_vmx *vmx = to_vmx(vcpu); 7581 struct vcpu_vmx *vmx = to_vmx(vcpu);
7526 7582
7527 free_vpid(vmx); 7583 free_vpid(vmx);
7528 free_loaded_vmcs(vmx->loaded_vmcs); 7584 leave_guest_mode(vcpu);
7585 vmx_load_vmcs01(vcpu);
7529 free_nested(vmx); 7586 free_nested(vmx);
7587 free_loaded_vmcs(vmx->loaded_vmcs);
7530 kfree(vmx->guest_msrs); 7588 kfree(vmx->guest_msrs);
7531 kvm_vcpu_uninit(vcpu); 7589 kvm_vcpu_uninit(vcpu);
7532 kmem_cache_free(kvm_vcpu_cache, vmx); 7590 kmem_cache_free(kvm_vcpu_cache, vmx);
@@ -7548,6 +7606,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
7548 goto free_vcpu; 7606 goto free_vcpu;
7549 7607
7550 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); 7608 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
7609 BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
7610 > PAGE_SIZE);
7611
7551 err = -ENOMEM; 7612 err = -ENOMEM;
7552 if (!vmx->guest_msrs) { 7613 if (!vmx->guest_msrs) {
7553 goto uninit_vcpu; 7614 goto uninit_vcpu;
@@ -7836,7 +7897,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7836 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); 7897 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
7837 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); 7898 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
7838 7899
7839 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); 7900 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
7901 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
7902 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
7903 } else {
7904 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
7905 vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
7906 }
7840 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 7907 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
7841 vmcs12->vm_entry_intr_info_field); 7908 vmcs12->vm_entry_intr_info_field);
7842 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, 7909 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
@@ -7846,7 +7913,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7846 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 7913 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
7847 vmcs12->guest_interruptibility_info); 7914 vmcs12->guest_interruptibility_info);
7848 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); 7915 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
7849 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
7850 vmx_set_rflags(vcpu, vmcs12->guest_rflags); 7916 vmx_set_rflags(vcpu, vmcs12->guest_rflags);
7851 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 7917 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
7852 vmcs12->guest_pending_dbg_exceptions); 7918 vmcs12->guest_pending_dbg_exceptions);
@@ -8113,14 +8179,14 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
8113 } 8179 }
8114 8180
8115 if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && 8181 if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) &&
8116 !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { 8182 !PAGE_ALIGNED(vmcs12->msr_bitmap)) {
8117 /*TODO: Also verify bits beyond physical address width are 0*/ 8183 /*TODO: Also verify bits beyond physical address width are 0*/
8118 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 8184 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
8119 return 1; 8185 return 1;
8120 } 8186 }
8121 8187
8122 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && 8188 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
8123 !IS_ALIGNED(vmcs12->apic_access_addr, PAGE_SIZE)) { 8189 !PAGE_ALIGNED(vmcs12->apic_access_addr)) {
8124 /*TODO: Also verify bits beyond physical address width are 0*/ 8190 /*TODO: Also verify bits beyond physical address width are 0*/
8125 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 8191 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
8126 return 1; 8192 return 1;
@@ -8136,15 +8202,18 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
8136 } 8202 }
8137 8203
8138 if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, 8204 if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
8139 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high) || 8205 nested_vmx_true_procbased_ctls_low,
8206 nested_vmx_procbased_ctls_high) ||
8140 !vmx_control_verify(vmcs12->secondary_vm_exec_control, 8207 !vmx_control_verify(vmcs12->secondary_vm_exec_control,
8141 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) || 8208 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) ||
8142 !vmx_control_verify(vmcs12->pin_based_vm_exec_control, 8209 !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
8143 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) || 8210 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) ||
8144 !vmx_control_verify(vmcs12->vm_exit_controls, 8211 !vmx_control_verify(vmcs12->vm_exit_controls,
8145 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high) || 8212 nested_vmx_true_exit_ctls_low,
8213 nested_vmx_exit_ctls_high) ||
8146 !vmx_control_verify(vmcs12->vm_entry_controls, 8214 !vmx_control_verify(vmcs12->vm_entry_controls,
8147 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high)) 8215 nested_vmx_true_entry_ctls_low,
8216 nested_vmx_entry_ctls_high))
8148 { 8217 {
8149 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 8218 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
8150 return 1; 8219 return 1;
@@ -8221,6 +8290,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
8221 8290
8222 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); 8291 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
8223 8292
8293 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
8294 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
8295
8224 cpu = get_cpu(); 8296 cpu = get_cpu();
8225 vmx->loaded_vmcs = vmcs02; 8297 vmx->loaded_vmcs = vmcs02;
8226 vmx_vcpu_put(vcpu); 8298 vmx_vcpu_put(vcpu);
@@ -8398,7 +8470,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
8398 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); 8470 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
8399 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); 8471 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
8400 8472
8401 kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
8402 vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); 8473 vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
8403 vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP); 8474 vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP);
8404 vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); 8475 vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
@@ -8477,9 +8548,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
8477 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | 8548 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
8478 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); 8549 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
8479 8550
8551 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) {
8552 kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
8553 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
8554 }
8555
8480 /* TODO: These cannot have changed unless we have MSR bitmaps and 8556 /* TODO: These cannot have changed unless we have MSR bitmaps and
8481 * the relevant bit asks not to trap the change */ 8557 * the relevant bit asks not to trap the change */
8482 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
8483 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) 8558 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
8484 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); 8559 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
8485 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) 8560 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
@@ -8670,7 +8745,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8670 unsigned long exit_qualification) 8745 unsigned long exit_qualification)
8671{ 8746{
8672 struct vcpu_vmx *vmx = to_vmx(vcpu); 8747 struct vcpu_vmx *vmx = to_vmx(vcpu);
8673 int cpu;
8674 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 8748 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
8675 8749
8676 /* trying to cancel vmlaunch/vmresume is a bug */ 8750 /* trying to cancel vmlaunch/vmresume is a bug */
@@ -8695,12 +8769,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8695 vmcs12->vm_exit_intr_error_code, 8769 vmcs12->vm_exit_intr_error_code,
8696 KVM_ISA_VMX); 8770 KVM_ISA_VMX);
8697 8771
8698 cpu = get_cpu(); 8772 vmx_load_vmcs01(vcpu);
8699 vmx->loaded_vmcs = &vmx->vmcs01;
8700 vmx_vcpu_put(vcpu);
8701 vmx_vcpu_load(vcpu, cpu);
8702 vcpu->cpu = cpu;
8703 put_cpu();
8704 8773
8705 vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS)); 8774 vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS));
8706 vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); 8775 vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
@@ -8890,7 +8959,7 @@ static int __init vmx_init(void)
8890 8959
8891 rdmsrl_safe(MSR_EFER, &host_efer); 8960 rdmsrl_safe(MSR_EFER, &host_efer);
8892 8961
8893 for (i = 0; i < NR_VMX_MSR; ++i) 8962 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
8894 kvm_define_shared_msr(i, vmx_msr_index[i]); 8963 kvm_define_shared_msr(i, vmx_msr_index[i]);
8895 8964
8896 vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); 8965 vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f6449334ec45..b86d329b953a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -87,6 +87,7 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
87 87
88static void update_cr8_intercept(struct kvm_vcpu *vcpu); 88static void update_cr8_intercept(struct kvm_vcpu *vcpu);
89static void process_nmi(struct kvm_vcpu *vcpu); 89static void process_nmi(struct kvm_vcpu *vcpu);
90static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
90 91
91struct kvm_x86_ops *kvm_x86_ops; 92struct kvm_x86_ops *kvm_x86_ops;
92EXPORT_SYMBOL_GPL(kvm_x86_ops); 93EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -211,6 +212,7 @@ static void shared_msr_update(unsigned slot, u32 msr)
211 212
212void kvm_define_shared_msr(unsigned slot, u32 msr) 213void kvm_define_shared_msr(unsigned slot, u32 msr)
213{ 214{
215 BUG_ON(slot >= KVM_NR_SHARED_MSRS);
214 if (slot >= shared_msrs_global.nr) 216 if (slot >= shared_msrs_global.nr)
215 shared_msrs_global.nr = slot + 1; 217 shared_msrs_global.nr = slot + 1;
216 shared_msrs_global.msrs[slot] = msr; 218 shared_msrs_global.msrs[slot] = msr;
@@ -310,6 +312,31 @@ static int exception_class(int vector)
310 return EXCPT_BENIGN; 312 return EXCPT_BENIGN;
311} 313}
312 314
315#define EXCPT_FAULT 0
316#define EXCPT_TRAP 1
317#define EXCPT_ABORT 2
318#define EXCPT_INTERRUPT 3
319
320static int exception_type(int vector)
321{
322 unsigned int mask;
323
324 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
325 return EXCPT_INTERRUPT;
326
327 mask = 1 << vector;
328
329 /* #DB is trap, as instruction watchpoints are handled elsewhere */
330 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
331 return EXCPT_TRAP;
332
333 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
334 return EXCPT_ABORT;
335
336 /* Reserved exceptions will result in fault */
337 return EXCPT_FAULT;
338}
339
313static void kvm_multiple_exception(struct kvm_vcpu *vcpu, 340static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
314 unsigned nr, bool has_error, u32 error_code, 341 unsigned nr, bool has_error, u32 error_code,
315 bool reinject) 342 bool reinject)
@@ -758,6 +785,15 @@ static void kvm_update_dr7(struct kvm_vcpu *vcpu)
758 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED; 785 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
759} 786}
760 787
788static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
789{
790 u64 fixed = DR6_FIXED_1;
791
792 if (!guest_cpuid_has_rtm(vcpu))
793 fixed |= DR6_RTM;
794 return fixed;
795}
796
761static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) 797static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
762{ 798{
763 switch (dr) { 799 switch (dr) {
@@ -773,7 +809,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
773 case 6: 809 case 6:
774 if (val & 0xffffffff00000000ULL) 810 if (val & 0xffffffff00000000ULL)
775 return -1; /* #GP */ 811 return -1; /* #GP */
776 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 812 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
777 kvm_update_dr6(vcpu); 813 kvm_update_dr6(vcpu);
778 break; 814 break;
779 case 5: 815 case 5:
@@ -1215,6 +1251,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1215 unsigned long flags; 1251 unsigned long flags;
1216 s64 usdiff; 1252 s64 usdiff;
1217 bool matched; 1253 bool matched;
1254 bool already_matched;
1218 u64 data = msr->data; 1255 u64 data = msr->data;
1219 1256
1220 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); 1257 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
@@ -1279,6 +1316,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1279 pr_debug("kvm: adjusted tsc offset by %llu\n", delta); 1316 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1280 } 1317 }
1281 matched = true; 1318 matched = true;
1319 already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
1282 } else { 1320 } else {
1283 /* 1321 /*
1284 * We split periods of matched TSC writes into generations. 1322 * We split periods of matched TSC writes into generations.
@@ -1294,7 +1332,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1294 kvm->arch.cur_tsc_write = data; 1332 kvm->arch.cur_tsc_write = data;
1295 kvm->arch.cur_tsc_offset = offset; 1333 kvm->arch.cur_tsc_offset = offset;
1296 matched = false; 1334 matched = false;
1297 pr_debug("kvm: new tsc generation %u, clock %llu\n", 1335 pr_debug("kvm: new tsc generation %llu, clock %llu\n",
1298 kvm->arch.cur_tsc_generation, data); 1336 kvm->arch.cur_tsc_generation, data);
1299 } 1337 }
1300 1338
@@ -1319,10 +1357,11 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1319 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); 1357 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1320 1358
1321 spin_lock(&kvm->arch.pvclock_gtod_sync_lock); 1359 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
1322 if (matched) 1360 if (!matched) {
1323 kvm->arch.nr_vcpus_matched_tsc++;
1324 else
1325 kvm->arch.nr_vcpus_matched_tsc = 0; 1361 kvm->arch.nr_vcpus_matched_tsc = 0;
1362 } else if (!already_matched) {
1363 kvm->arch.nr_vcpus_matched_tsc++;
1364 }
1326 1365
1327 kvm_track_tsc_matching(vcpu); 1366 kvm_track_tsc_matching(vcpu);
1328 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock); 1367 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
@@ -2032,6 +2071,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2032 data &= ~(u64)0x40; /* ignore flush filter disable */ 2071 data &= ~(u64)0x40; /* ignore flush filter disable */
2033 data &= ~(u64)0x100; /* ignore ignne emulation enable */ 2072 data &= ~(u64)0x100; /* ignore ignne emulation enable */
2034 data &= ~(u64)0x8; /* ignore TLB cache disable */ 2073 data &= ~(u64)0x8; /* ignore TLB cache disable */
2074 data &= ~(u64)0x40000; /* ignore Mc status write enable */
2035 if (data != 0) { 2075 if (data != 0) {
2036 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", 2076 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
2037 data); 2077 data);
@@ -2974,9 +3014,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2974 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; 3014 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
2975 events->interrupt.nr = vcpu->arch.interrupt.nr; 3015 events->interrupt.nr = vcpu->arch.interrupt.nr;
2976 events->interrupt.soft = 0; 3016 events->interrupt.soft = 0;
2977 events->interrupt.shadow = 3017 events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
2978 kvm_x86_ops->get_interrupt_shadow(vcpu,
2979 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
2980 3018
2981 events->nmi.injected = vcpu->arch.nmi_injected; 3019 events->nmi.injected = vcpu->arch.nmi_injected;
2982 events->nmi.pending = vcpu->arch.nmi_pending != 0; 3020 events->nmi.pending = vcpu->arch.nmi_pending != 0;
@@ -4082,7 +4120,8 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
4082 4120
4083 if (gpa == UNMAPPED_GVA) 4121 if (gpa == UNMAPPED_GVA)
4084 return X86EMUL_PROPAGATE_FAULT; 4122 return X86EMUL_PROPAGATE_FAULT;
4085 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); 4123 ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, data,
4124 offset, toread);
4086 if (ret < 0) { 4125 if (ret < 0) {
4087 r = X86EMUL_IO_NEEDED; 4126 r = X86EMUL_IO_NEEDED;
4088 goto out; 4127 goto out;
@@ -4103,10 +4142,24 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
4103{ 4142{
4104 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 4143 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4105 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 4144 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4145 unsigned offset;
4146 int ret;
4106 4147
4107 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 4148 /* Inline kvm_read_guest_virt_helper for speed. */
4108 access | PFERR_FETCH_MASK, 4149 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
4109 exception); 4150 exception);
4151 if (unlikely(gpa == UNMAPPED_GVA))
4152 return X86EMUL_PROPAGATE_FAULT;
4153
4154 offset = addr & (PAGE_SIZE-1);
4155 if (WARN_ON(offset + bytes > PAGE_SIZE))
4156 bytes = (unsigned)PAGE_SIZE - offset;
4157 ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, val,
4158 offset, bytes);
4159 if (unlikely(ret < 0))
4160 return X86EMUL_IO_NEEDED;
4161
4162 return X86EMUL_CONTINUE;
4110} 4163}
4111 4164
4112int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, 4165int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
@@ -4730,7 +4783,6 @@ static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
4730 if (desc->g) 4783 if (desc->g)
4731 var.limit = (var.limit << 12) | 0xfff; 4784 var.limit = (var.limit << 12) | 0xfff;
4732 var.type = desc->type; 4785 var.type = desc->type;
4733 var.present = desc->p;
4734 var.dpl = desc->dpl; 4786 var.dpl = desc->dpl;
4735 var.db = desc->d; 4787 var.db = desc->d;
4736 var.s = desc->s; 4788 var.s = desc->s;
@@ -4762,6 +4814,12 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
4762 return kvm_set_msr(emul_to_vcpu(ctxt), &msr); 4814 return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
4763} 4815}
4764 4816
4817static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
4818 u32 pmc)
4819{
4820 return kvm_pmu_check_pmc(emul_to_vcpu(ctxt), pmc);
4821}
4822
4765static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, 4823static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
4766 u32 pmc, u64 *pdata) 4824 u32 pmc, u64 *pdata)
4767{ 4825{
@@ -4838,6 +4896,7 @@ static const struct x86_emulate_ops emulate_ops = {
4838 .set_dr = emulator_set_dr, 4896 .set_dr = emulator_set_dr,
4839 .set_msr = emulator_set_msr, 4897 .set_msr = emulator_set_msr,
4840 .get_msr = emulator_get_msr, 4898 .get_msr = emulator_get_msr,
4899 .check_pmc = emulator_check_pmc,
4841 .read_pmc = emulator_read_pmc, 4900 .read_pmc = emulator_read_pmc,
4842 .halt = emulator_halt, 4901 .halt = emulator_halt,
4843 .wbinvd = emulator_wbinvd, 4902 .wbinvd = emulator_wbinvd,
@@ -4850,7 +4909,7 @@ static const struct x86_emulate_ops emulate_ops = {
4850 4909
4851static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) 4910static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4852{ 4911{
4853 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); 4912 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
4854 /* 4913 /*
4855 * an sti; sti; sequence only disable interrupts for the first 4914 * an sti; sti; sequence only disable interrupts for the first
4856 * instruction. So, if the last instruction, be it emulated or 4915 * instruction. So, if the last instruction, be it emulated or
@@ -4858,8 +4917,13 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4858 * means that the last instruction is an sti. We should not 4917 * means that the last instruction is an sti. We should not
4859 * leave the flag on in this case. The same goes for mov ss 4918 * leave the flag on in this case. The same goes for mov ss
4860 */ 4919 */
4861 if (!(int_shadow & mask)) 4920 if (int_shadow & mask)
4921 mask = 0;
4922 if (unlikely(int_shadow || mask)) {
4862 kvm_x86_ops->set_interrupt_shadow(vcpu, mask); 4923 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
4924 if (!mask)
4925 kvm_make_request(KVM_REQ_EVENT, vcpu);
4926 }
4863} 4927}
4864 4928
4865static void inject_emulated_exception(struct kvm_vcpu *vcpu) 4929static void inject_emulated_exception(struct kvm_vcpu *vcpu)
@@ -4874,19 +4938,6 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
4874 kvm_queue_exception(vcpu, ctxt->exception.vector); 4938 kvm_queue_exception(vcpu, ctxt->exception.vector);
4875} 4939}
4876 4940
4877static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
4878{
4879 memset(&ctxt->opcode_len, 0,
4880 (void *)&ctxt->_regs - (void *)&ctxt->opcode_len);
4881
4882 ctxt->fetch.start = 0;
4883 ctxt->fetch.end = 0;
4884 ctxt->io_read.pos = 0;
4885 ctxt->io_read.end = 0;
4886 ctxt->mem_read.pos = 0;
4887 ctxt->mem_read.end = 0;
4888}
4889
4890static void init_emulate_ctxt(struct kvm_vcpu *vcpu) 4941static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4891{ 4942{
4892 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; 4943 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
@@ -5085,23 +5136,22 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
5085 return dr6; 5136 return dr6;
5086} 5137}
5087 5138
5088static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r) 5139static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
5089{ 5140{
5090 struct kvm_run *kvm_run = vcpu->run; 5141 struct kvm_run *kvm_run = vcpu->run;
5091 5142
5092 /* 5143 /*
5093 * Use the "raw" value to see if TF was passed to the processor. 5144 * rflags is the old, "raw" value of the flags. The new value has
5094 * Note that the new value of the flags has not been saved yet. 5145 * not been saved yet.
5095 * 5146 *
5096 * This is correct even for TF set by the guest, because "the 5147 * This is correct even for TF set by the guest, because "the
5097 * processor will not generate this exception after the instruction 5148 * processor will not generate this exception after the instruction
5098 * that sets the TF flag". 5149 * that sets the TF flag".
5099 */ 5150 */
5100 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
5101
5102 if (unlikely(rflags & X86_EFLAGS_TF)) { 5151 if (unlikely(rflags & X86_EFLAGS_TF)) {
5103 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { 5152 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
5104 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1; 5153 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
5154 DR6_RTM;
5105 kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; 5155 kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
5106 kvm_run->debug.arch.exception = DB_VECTOR; 5156 kvm_run->debug.arch.exception = DB_VECTOR;
5107 kvm_run->exit_reason = KVM_EXIT_DEBUG; 5157 kvm_run->exit_reason = KVM_EXIT_DEBUG;
@@ -5114,7 +5164,7 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r)
5114 * cleared by the processor". 5164 * cleared by the processor".
5115 */ 5165 */
5116 vcpu->arch.dr6 &= ~15; 5166 vcpu->arch.dr6 &= ~15;
5117 vcpu->arch.dr6 |= DR6_BS; 5167 vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
5118 kvm_queue_exception(vcpu, DB_VECTOR); 5168 kvm_queue_exception(vcpu, DB_VECTOR);
5119 } 5169 }
5120 } 5170 }
@@ -5133,7 +5183,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
5133 vcpu->arch.eff_db); 5183 vcpu->arch.eff_db);
5134 5184
5135 if (dr6 != 0) { 5185 if (dr6 != 0) {
5136 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; 5186 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
5137 kvm_run->debug.arch.pc = kvm_rip_read(vcpu) + 5187 kvm_run->debug.arch.pc = kvm_rip_read(vcpu) +
5138 get_segment_base(vcpu, VCPU_SREG_CS); 5188 get_segment_base(vcpu, VCPU_SREG_CS);
5139 5189
@@ -5144,14 +5194,15 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
5144 } 5194 }
5145 } 5195 }
5146 5196
5147 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK)) { 5197 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
5198 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
5148 dr6 = kvm_vcpu_check_hw_bp(eip, 0, 5199 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
5149 vcpu->arch.dr7, 5200 vcpu->arch.dr7,
5150 vcpu->arch.db); 5201 vcpu->arch.db);
5151 5202
5152 if (dr6 != 0) { 5203 if (dr6 != 0) {
5153 vcpu->arch.dr6 &= ~15; 5204 vcpu->arch.dr6 &= ~15;
5154 vcpu->arch.dr6 |= dr6; 5205 vcpu->arch.dr6 |= dr6 | DR6_RTM;
5155 kvm_queue_exception(vcpu, DB_VECTOR); 5206 kvm_queue_exception(vcpu, DB_VECTOR);
5156 *r = EMULATE_DONE; 5207 *r = EMULATE_DONE;
5157 return true; 5208 return true;
@@ -5215,6 +5266,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
5215 5266
5216 if (emulation_type & EMULTYPE_SKIP) { 5267 if (emulation_type & EMULTYPE_SKIP) {
5217 kvm_rip_write(vcpu, ctxt->_eip); 5268 kvm_rip_write(vcpu, ctxt->_eip);
5269 if (ctxt->eflags & X86_EFLAGS_RF)
5270 kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
5218 return EMULATE_DONE; 5271 return EMULATE_DONE;
5219 } 5272 }
5220 5273
@@ -5265,13 +5318,22 @@ restart:
5265 r = EMULATE_DONE; 5318 r = EMULATE_DONE;
5266 5319
5267 if (writeback) { 5320 if (writeback) {
5321 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
5268 toggle_interruptibility(vcpu, ctxt->interruptibility); 5322 toggle_interruptibility(vcpu, ctxt->interruptibility);
5269 kvm_make_request(KVM_REQ_EVENT, vcpu);
5270 vcpu->arch.emulate_regs_need_sync_to_vcpu = false; 5323 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5271 kvm_rip_write(vcpu, ctxt->eip); 5324 kvm_rip_write(vcpu, ctxt->eip);
5272 if (r == EMULATE_DONE) 5325 if (r == EMULATE_DONE)
5273 kvm_vcpu_check_singlestep(vcpu, &r); 5326 kvm_vcpu_check_singlestep(vcpu, rflags, &r);
5274 kvm_set_rflags(vcpu, ctxt->eflags); 5327 __kvm_set_rflags(vcpu, ctxt->eflags);
5328
5329 /*
5330 * For STI, interrupts are shadowed; so KVM_REQ_EVENT will
5331 * do nothing, and it will be requested again as soon as
5332 * the shadow expires. But we still need to check here,
5333 * because POPF has no interrupt shadow.
5334 */
5335 if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
5336 kvm_make_request(KVM_REQ_EVENT, vcpu);
5275 } else 5337 } else
5276 vcpu->arch.emulate_regs_need_sync_to_vcpu = true; 5338 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
5277 5339
@@ -5662,7 +5724,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
5662 u64 param, ingpa, outgpa, ret; 5724 u64 param, ingpa, outgpa, ret;
5663 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; 5725 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
5664 bool fast, longmode; 5726 bool fast, longmode;
5665 int cs_db, cs_l;
5666 5727
5667 /* 5728 /*
5668 * hypercall generates UD from non zero cpl and real mode 5729 * hypercall generates UD from non zero cpl and real mode
@@ -5673,8 +5734,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
5673 return 0; 5734 return 0;
5674 } 5735 }
5675 5736
5676 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 5737 longmode = is_64_bit_mode(vcpu);
5677 longmode = is_long_mode(vcpu) && cs_l == 1;
5678 5738
5679 if (!longmode) { 5739 if (!longmode) {
5680 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | 5740 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
@@ -5739,7 +5799,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
5739int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) 5799int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
5740{ 5800{
5741 unsigned long nr, a0, a1, a2, a3, ret; 5801 unsigned long nr, a0, a1, a2, a3, ret;
5742 int r = 1; 5802 int op_64_bit, r = 1;
5743 5803
5744 if (kvm_hv_hypercall_enabled(vcpu->kvm)) 5804 if (kvm_hv_hypercall_enabled(vcpu->kvm))
5745 return kvm_hv_hypercall(vcpu); 5805 return kvm_hv_hypercall(vcpu);
@@ -5752,7 +5812,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
5752 5812
5753 trace_kvm_hypercall(nr, a0, a1, a2, a3); 5813 trace_kvm_hypercall(nr, a0, a1, a2, a3);
5754 5814
5755 if (!is_long_mode(vcpu)) { 5815 op_64_bit = is_64_bit_mode(vcpu);
5816 if (!op_64_bit) {
5756 nr &= 0xFFFFFFFF; 5817 nr &= 0xFFFFFFFF;
5757 a0 &= 0xFFFFFFFF; 5818 a0 &= 0xFFFFFFFF;
5758 a1 &= 0xFFFFFFFF; 5819 a1 &= 0xFFFFFFFF;
@@ -5778,6 +5839,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
5778 break; 5839 break;
5779 } 5840 }
5780out: 5841out:
5842 if (!op_64_bit)
5843 ret = (u32)ret;
5781 kvm_register_write(vcpu, VCPU_REGS_RAX, ret); 5844 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5782 ++vcpu->stat.hypercalls; 5845 ++vcpu->stat.hypercalls;
5783 return r; 5846 return r;
@@ -5856,6 +5919,11 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
5856 trace_kvm_inj_exception(vcpu->arch.exception.nr, 5919 trace_kvm_inj_exception(vcpu->arch.exception.nr,
5857 vcpu->arch.exception.has_error_code, 5920 vcpu->arch.exception.has_error_code,
5858 vcpu->arch.exception.error_code); 5921 vcpu->arch.exception.error_code);
5922
5923 if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
5924 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
5925 X86_EFLAGS_RF);
5926
5859 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, 5927 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
5860 vcpu->arch.exception.has_error_code, 5928 vcpu->arch.exception.has_error_code,
5861 vcpu->arch.exception.error_code, 5929 vcpu->arch.exception.error_code,
@@ -5887,6 +5955,18 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
5887 kvm_x86_ops->set_nmi(vcpu); 5955 kvm_x86_ops->set_nmi(vcpu);
5888 } 5956 }
5889 } else if (kvm_cpu_has_injectable_intr(vcpu)) { 5957 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
5958 /*
5959 * Because interrupts can be injected asynchronously, we are
5960 * calling check_nested_events again here to avoid a race condition.
5961 * See https://lkml.org/lkml/2014/7/2/60 for discussion about this
5962 * proposal and current concerns. Perhaps we should be setting
5963 * KVM_REQ_EVENT only on certain events and not unconditionally?
5964 */
5965 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
5966 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
5967 if (r != 0)
5968 return r;
5969 }
5890 if (kvm_x86_ops->interrupt_allowed(vcpu)) { 5970 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
5891 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), 5971 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
5892 false); 5972 false);
@@ -6835,9 +6915,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
6835 atomic_set(&vcpu->arch.nmi_queued, 0); 6915 atomic_set(&vcpu->arch.nmi_queued, 0);
6836 vcpu->arch.nmi_pending = 0; 6916 vcpu->arch.nmi_pending = 0;
6837 vcpu->arch.nmi_injected = false; 6917 vcpu->arch.nmi_injected = false;
6918 kvm_clear_interrupt_queue(vcpu);
6919 kvm_clear_exception_queue(vcpu);
6838 6920
6839 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); 6921 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
6840 vcpu->arch.dr6 = DR6_FIXED_1; 6922 vcpu->arch.dr6 = DR6_INIT;
6841 kvm_update_dr6(vcpu); 6923 kvm_update_dr6(vcpu);
6842 vcpu->arch.dr7 = DR7_FIXED_1; 6924 vcpu->arch.dr7 = DR7_FIXED_1;
6843 kvm_update_dr7(vcpu); 6925 kvm_update_dr7(vcpu);
@@ -7393,12 +7475,17 @@ unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
7393} 7475}
7394EXPORT_SYMBOL_GPL(kvm_get_rflags); 7476EXPORT_SYMBOL_GPL(kvm_get_rflags);
7395 7477
7396void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 7478static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
7397{ 7479{
7398 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && 7480 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
7399 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) 7481 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
7400 rflags |= X86_EFLAGS_TF; 7482 rflags |= X86_EFLAGS_TF;
7401 kvm_x86_ops->set_rflags(vcpu, rflags); 7483 kvm_x86_ops->set_rflags(vcpu, rflags);
7484}
7485
7486void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
7487{
7488 __kvm_set_rflags(vcpu, rflags);
7402 kvm_make_request(KVM_REQ_EVENT, vcpu); 7489 kvm_make_request(KVM_REQ_EVENT, vcpu);
7403} 7490}
7404EXPORT_SYMBOL_GPL(kvm_set_rflags); 7491EXPORT_SYMBOL_GPL(kvm_set_rflags);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 8c97bac9a895..306a1b77581f 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -47,6 +47,16 @@ static inline int is_long_mode(struct kvm_vcpu *vcpu)
47#endif 47#endif
48} 48}
49 49
50static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu)
51{
52 int cs_db, cs_l;
53
54 if (!is_long_mode(vcpu))
55 return false;
56 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
57 return cs_l;
58}
59
50static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) 60static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
51{ 61{
52 return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; 62 return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
@@ -108,6 +118,23 @@ static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
108 return false; 118 return false;
109} 119}
110 120
121static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu,
122 enum kvm_reg reg)
123{
124 unsigned long val = kvm_register_read(vcpu, reg);
125
126 return is_64_bit_mode(vcpu) ? val : (u32)val;
127}
128
129static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
130 enum kvm_reg reg,
131 unsigned long val)
132{
133 if (!is_64_bit_mode(vcpu))
134 val = (u32)val;
135 return kvm_register_write(vcpu, reg, val);
136}
137
111void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); 138void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
112void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); 139void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
113int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); 140int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 36642793e315..1dbade870f90 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -577,6 +577,8 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
577 577
578static const char nx_warning[] = KERN_CRIT 578static const char nx_warning[] = KERN_CRIT
579"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n"; 579"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n";
580static const char smep_warning[] = KERN_CRIT
581"unable to execute userspace code (SMEP?) (uid: %d)\n";
580 582
581static void 583static void
582show_fault_oops(struct pt_regs *regs, unsigned long error_code, 584show_fault_oops(struct pt_regs *regs, unsigned long error_code,
@@ -597,6 +599,10 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
597 599
598 if (pte && pte_present(*pte) && !pte_exec(*pte)) 600 if (pte && pte_present(*pte) && !pte_exec(*pte))
599 printk(nx_warning, from_kuid(&init_user_ns, current_uid())); 601 printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
602 if (pte && pte_present(*pte) && pte_exec(*pte) &&
603 (pgd_flags(*pgd) & _PAGE_USER) &&
604 (read_cr4() & X86_CR4_SMEP))
605 printk(smep_warning, from_kuid(&init_user_ns, current_uid()));
600 } 606 }
601 607
602 printk(KERN_ALERT "BUG: unable to handle kernel "); 608 printk(KERN_ALERT "BUG: unable to handle kernel ");
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index f97130618113..66dba36f2343 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -18,6 +18,13 @@
18#include <asm/dma.h> /* for MAX_DMA_PFN */ 18#include <asm/dma.h> /* for MAX_DMA_PFN */
19#include <asm/microcode.h> 19#include <asm/microcode.h>
20 20
21/*
22 * We need to define the tracepoints somewhere, and tlb.c
23 * is only compied when SMP=y.
24 */
25#define CREATE_TRACE_POINTS
26#include <trace/events/tlb.h>
27
21#include "mm_internal.h" 28#include "mm_internal.h"
22 29
23static unsigned long __initdata pgt_buf_start; 30static unsigned long __initdata pgt_buf_start;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index dd8dda167a24..1fe33987de02 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -49,6 +49,7 @@ void leave_mm(int cpu)
49 if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { 49 if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
50 cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); 50 cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
51 load_cr3(swapper_pg_dir); 51 load_cr3(swapper_pg_dir);
52 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
52 } 53 }
53} 54}
54EXPORT_SYMBOL_GPL(leave_mm); 55EXPORT_SYMBOL_GPL(leave_mm);
@@ -102,20 +103,24 @@ static void flush_tlb_func(void *info)
102 103
103 if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) 104 if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
104 return; 105 return;
106 if (!f->flush_end)
107 f->flush_end = f->flush_start + PAGE_SIZE;
105 108
106 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); 109 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
107 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { 110 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
108 if (f->flush_end == TLB_FLUSH_ALL) 111 if (f->flush_end == TLB_FLUSH_ALL) {
109 local_flush_tlb(); 112 local_flush_tlb();
110 else if (!f->flush_end) 113 trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
111 __flush_tlb_single(f->flush_start); 114 } else {
112 else {
113 unsigned long addr; 115 unsigned long addr;
116 unsigned long nr_pages =
117 f->flush_end - f->flush_start / PAGE_SIZE;
114 addr = f->flush_start; 118 addr = f->flush_start;
115 while (addr < f->flush_end) { 119 while (addr < f->flush_end) {
116 __flush_tlb_single(addr); 120 __flush_tlb_single(addr);
117 addr += PAGE_SIZE; 121 addr += PAGE_SIZE;
118 } 122 }
123 trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
119 } 124 }
120 } else 125 } else
121 leave_mm(smp_processor_id()); 126 leave_mm(smp_processor_id());
@@ -153,46 +158,45 @@ void flush_tlb_current_task(void)
153 158
154 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 159 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
155 local_flush_tlb(); 160 local_flush_tlb();
161 trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
156 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 162 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
157 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); 163 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
158 preempt_enable(); 164 preempt_enable();
159} 165}
160 166
167/*
168 * See Documentation/x86/tlb.txt for details. We choose 33
169 * because it is large enough to cover the vast majority (at
170 * least 95%) of allocations, and is small enough that we are
171 * confident it will not cause too much overhead. Each single
172 * flush is about 100 ns, so this caps the maximum overhead at
173 * _about_ 3,000 ns.
174 *
175 * This is in units of pages.
176 */
177unsigned long tlb_single_page_flush_ceiling = 33;
178
161void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, 179void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
162 unsigned long end, unsigned long vmflag) 180 unsigned long end, unsigned long vmflag)
163{ 181{
164 unsigned long addr; 182 unsigned long addr;
165 unsigned act_entries, tlb_entries = 0; 183 /* do a global flush by default */
166 unsigned long nr_base_pages; 184 unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
167 185
168 preempt_disable(); 186 preempt_disable();
169 if (current->active_mm != mm) 187 if (current->active_mm != mm)
170 goto flush_all; 188 goto out;
171 189
172 if (!current->mm) { 190 if (!current->mm) {
173 leave_mm(smp_processor_id()); 191 leave_mm(smp_processor_id());
174 goto flush_all; 192 goto out;
175 } 193 }
176 194
177 if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 195 if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
178 || vmflag & VM_HUGETLB) { 196 base_pages_to_flush = (end - start) >> PAGE_SHIFT;
179 local_flush_tlb();
180 goto flush_all;
181 }
182
183 /* In modern CPU, last level tlb used for both data/ins */
184 if (vmflag & VM_EXEC)
185 tlb_entries = tlb_lli_4k[ENTRIES];
186 else
187 tlb_entries = tlb_lld_4k[ENTRIES];
188 197
189 /* Assume all of TLB entries was occupied by this task */ 198 if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
190 act_entries = tlb_entries >> tlb_flushall_shift; 199 base_pages_to_flush = TLB_FLUSH_ALL;
191 act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm;
192 nr_base_pages = (end - start) >> PAGE_SHIFT;
193
194 /* tlb_flushall_shift is on balance point, details in commit log */
195 if (nr_base_pages > act_entries) {
196 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 200 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
197 local_flush_tlb(); 201 local_flush_tlb();
198 } else { 202 } else {
@@ -201,17 +205,15 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
201 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); 205 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
202 __flush_tlb_single(addr); 206 __flush_tlb_single(addr);
203 } 207 }
204
205 if (cpumask_any_but(mm_cpumask(mm),
206 smp_processor_id()) < nr_cpu_ids)
207 flush_tlb_others(mm_cpumask(mm), mm, start, end);
208 preempt_enable();
209 return;
210 } 208 }
211 209 trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush);
212flush_all: 210out:
211 if (base_pages_to_flush == TLB_FLUSH_ALL) {
212 start = 0UL;
213 end = TLB_FLUSH_ALL;
214 }
213 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 215 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
214 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); 216 flush_tlb_others(mm_cpumask(mm), mm, start, end);
215 preempt_enable(); 217 preempt_enable();
216} 218}
217 219
@@ -260,32 +262,26 @@ static void do_kernel_range_flush(void *info)
260 262
261void flush_tlb_kernel_range(unsigned long start, unsigned long end) 263void flush_tlb_kernel_range(unsigned long start, unsigned long end)
262{ 264{
263 unsigned act_entries;
264 struct flush_tlb_info info;
265
266 /* In modern CPU, last level tlb used for both data/ins */
267 act_entries = tlb_lld_4k[ENTRIES];
268 265
269 /* Balance as user space task's flush, a bit conservative */ 266 /* Balance as user space task's flush, a bit conservative */
270 if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 || 267 if (end == TLB_FLUSH_ALL ||
271 (end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) 268 (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
272
273 on_each_cpu(do_flush_tlb_all, NULL, 1); 269 on_each_cpu(do_flush_tlb_all, NULL, 1);
274 else { 270 } else {
271 struct flush_tlb_info info;
275 info.flush_start = start; 272 info.flush_start = start;
276 info.flush_end = end; 273 info.flush_end = end;
277 on_each_cpu(do_kernel_range_flush, &info, 1); 274 on_each_cpu(do_kernel_range_flush, &info, 1);
278 } 275 }
279} 276}
280 277
281#ifdef CONFIG_DEBUG_TLBFLUSH
282static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf, 278static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
283 size_t count, loff_t *ppos) 279 size_t count, loff_t *ppos)
284{ 280{
285 char buf[32]; 281 char buf[32];
286 unsigned int len; 282 unsigned int len;
287 283
288 len = sprintf(buf, "%hd\n", tlb_flushall_shift); 284 len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
289 return simple_read_from_buffer(user_buf, count, ppos, buf, len); 285 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
290} 286}
291 287
@@ -294,20 +290,20 @@ static ssize_t tlbflush_write_file(struct file *file,
294{ 290{
295 char buf[32]; 291 char buf[32];
296 ssize_t len; 292 ssize_t len;
297 s8 shift; 293 int ceiling;
298 294
299 len = min(count, sizeof(buf) - 1); 295 len = min(count, sizeof(buf) - 1);
300 if (copy_from_user(buf, user_buf, len)) 296 if (copy_from_user(buf, user_buf, len))
301 return -EFAULT; 297 return -EFAULT;
302 298
303 buf[len] = '\0'; 299 buf[len] = '\0';
304 if (kstrtos8(buf, 0, &shift)) 300 if (kstrtoint(buf, 0, &ceiling))
305 return -EINVAL; 301 return -EINVAL;
306 302
307 if (shift < -1 || shift >= BITS_PER_LONG) 303 if (ceiling < 0)
308 return -EINVAL; 304 return -EINVAL;
309 305
310 tlb_flushall_shift = shift; 306 tlb_single_page_flush_ceiling = ceiling;
311 return count; 307 return count;
312} 308}
313 309
@@ -317,11 +313,10 @@ static const struct file_operations fops_tlbflush = {
317 .llseek = default_llseek, 313 .llseek = default_llseek,
318}; 314};
319 315
320static int __init create_tlb_flushall_shift(void) 316static int __init create_tlb_single_page_flush_ceiling(void)
321{ 317{
322 debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR, 318 debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR,
323 arch_debugfs_dir, NULL, &fops_tlbflush); 319 arch_debugfs_dir, NULL, &fops_tlbflush);
324 return 0; 320 return 0;
325} 321}
326late_initcall(create_tlb_flushall_shift); 322late_initcall(create_tlb_single_page_flush_ceiling);
327#endif
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index b5e60268d93f..c61ea57d1ba1 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -326,6 +326,27 @@ static void pci_fixup_video(struct pci_dev *pdev)
326 struct pci_bus *bus; 326 struct pci_bus *bus;
327 u16 config; 327 u16 config;
328 328
329 if (!vga_default_device()) {
330 resource_size_t start, end;
331 int i;
332
333 /* Does firmware framebuffer belong to us? */
334 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
335 if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
336 continue;
337
338 start = pci_resource_start(pdev, i);
339 end = pci_resource_end(pdev, i);
340
341 if (!start || !end)
342 continue;
343
344 if (screen_info.lfb_base >= start &&
345 (screen_info.lfb_base + screen_info.lfb_size) < end)
346 vga_set_default_device(pdev);
347 }
348 }
349
329 /* Is VGA routed to us? */ 350 /* Is VGA routed to us? */
330 bus = pdev->bus; 351 bus = pdev->bus;
331 while (bus) { 352 while (bus) {
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index a19ed92e74e4..2ae525e0d8ba 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -162,6 +162,10 @@ pcibios_align_resource(void *data, const struct resource *res,
162 return start; 162 return start;
163 if (start & 0x300) 163 if (start & 0x300)
164 start = (start + 0x3ff) & ~0x3ff; 164 start = (start + 0x3ff) & ~0x3ff;
165 } else if (res->flags & IORESOURCE_MEM) {
166 /* The low 1MB range is reserved for ISA cards */
167 if (start < BIOS_END)
168 start = BIOS_END;
165 } 169 }
166 return start; 170 return start;
167} 171}
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index d51045afcaaf..2846aaab5103 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile
@@ -1,4 +1,4 @@
1obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o 1obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
2obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o 2obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o
3obj-$(CONFIG_EARLY_PRINTK_EFI) += early_printk.o 3obj-$(CONFIG_EARLY_PRINTK_EFI) += early_printk.o
4obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o 4obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 87fc96bcc13c..850da94fef30 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -56,13 +56,6 @@
56 56
57#define EFI_DEBUG 57#define EFI_DEBUG
58 58
59#define EFI_MIN_RESERVE 5120
60
61#define EFI_DUMMY_GUID \
62 EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9)
63
64static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 };
65
66struct efi_memory_map memmap; 59struct efi_memory_map memmap;
67 60
68static struct efi efi_phys __initdata; 61static struct efi efi_phys __initdata;
@@ -95,139 +88,6 @@ static int __init setup_add_efi_memmap(char *arg)
95} 88}
96early_param("add_efi_memmap", setup_add_efi_memmap); 89early_param("add_efi_memmap", setup_add_efi_memmap);
97 90
98static bool efi_no_storage_paranoia;
99
100static int __init setup_storage_paranoia(char *arg)
101{
102 efi_no_storage_paranoia = true;
103 return 0;
104}
105early_param("efi_no_storage_paranoia", setup_storage_paranoia);
106
107static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
108{
109 unsigned long flags;
110 efi_status_t status;
111
112 spin_lock_irqsave(&rtc_lock, flags);
113 status = efi_call_virt(get_time, tm, tc);
114 spin_unlock_irqrestore(&rtc_lock, flags);
115 return status;
116}
117
118static efi_status_t virt_efi_set_time(efi_time_t *tm)
119{
120 unsigned long flags;
121 efi_status_t status;
122
123 spin_lock_irqsave(&rtc_lock, flags);
124 status = efi_call_virt(set_time, tm);
125 spin_unlock_irqrestore(&rtc_lock, flags);
126 return status;
127}
128
129static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
130 efi_bool_t *pending,
131 efi_time_t *tm)
132{
133 unsigned long flags;
134 efi_status_t status;
135
136 spin_lock_irqsave(&rtc_lock, flags);
137 status = efi_call_virt(get_wakeup_time, enabled, pending, tm);
138 spin_unlock_irqrestore(&rtc_lock, flags);
139 return status;
140}
141
142static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
143{
144 unsigned long flags;
145 efi_status_t status;
146
147 spin_lock_irqsave(&rtc_lock, flags);
148 status = efi_call_virt(set_wakeup_time, enabled, tm);
149 spin_unlock_irqrestore(&rtc_lock, flags);
150 return status;
151}
152
153static efi_status_t virt_efi_get_variable(efi_char16_t *name,
154 efi_guid_t *vendor,
155 u32 *attr,
156 unsigned long *data_size,
157 void *data)
158{
159 return efi_call_virt(get_variable,
160 name, vendor, attr,
161 data_size, data);
162}
163
164static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
165 efi_char16_t *name,
166 efi_guid_t *vendor)
167{
168 return efi_call_virt(get_next_variable,
169 name_size, name, vendor);
170}
171
172static efi_status_t virt_efi_set_variable(efi_char16_t *name,
173 efi_guid_t *vendor,
174 u32 attr,
175 unsigned long data_size,
176 void *data)
177{
178 return efi_call_virt(set_variable,
179 name, vendor, attr,
180 data_size, data);
181}
182
183static efi_status_t virt_efi_query_variable_info(u32 attr,
184 u64 *storage_space,
185 u64 *remaining_space,
186 u64 *max_variable_size)
187{
188 if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
189 return EFI_UNSUPPORTED;
190
191 return efi_call_virt(query_variable_info, attr, storage_space,
192 remaining_space, max_variable_size);
193}
194
195static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
196{
197 return efi_call_virt(get_next_high_mono_count, count);
198}
199
200static void virt_efi_reset_system(int reset_type,
201 efi_status_t status,
202 unsigned long data_size,
203 efi_char16_t *data)
204{
205 __efi_call_virt(reset_system, reset_type, status,
206 data_size, data);
207}
208
209static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
210 unsigned long count,
211 unsigned long sg_list)
212{
213 if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
214 return EFI_UNSUPPORTED;
215
216 return efi_call_virt(update_capsule, capsules, count, sg_list);
217}
218
219static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
220 unsigned long count,
221 u64 *max_size,
222 int *reset_type)
223{
224 if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
225 return EFI_UNSUPPORTED;
226
227 return efi_call_virt(query_capsule_caps, capsules, count, max_size,
228 reset_type);
229}
230
231static efi_status_t __init phys_efi_set_virtual_address_map( 91static efi_status_t __init phys_efi_set_virtual_address_map(
232 unsigned long memory_map_size, 92 unsigned long memory_map_size,
233 unsigned long descriptor_size, 93 unsigned long descriptor_size,
@@ -244,42 +104,6 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
244 return status; 104 return status;
245} 105}
246 106
247int efi_set_rtc_mmss(const struct timespec *now)
248{
249 unsigned long nowtime = now->tv_sec;
250 efi_status_t status;
251 efi_time_t eft;
252 efi_time_cap_t cap;
253 struct rtc_time tm;
254
255 status = efi.get_time(&eft, &cap);
256 if (status != EFI_SUCCESS) {
257 pr_err("Oops: efitime: can't read time!\n");
258 return -1;
259 }
260
261 rtc_time_to_tm(nowtime, &tm);
262 if (!rtc_valid_tm(&tm)) {
263 eft.year = tm.tm_year + 1900;
264 eft.month = tm.tm_mon + 1;
265 eft.day = tm.tm_mday;
266 eft.minute = tm.tm_min;
267 eft.second = tm.tm_sec;
268 eft.nanosecond = 0;
269 } else {
270 pr_err("%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n",
271 __func__, nowtime);
272 return -1;
273 }
274
275 status = efi.set_time(&eft);
276 if (status != EFI_SUCCESS) {
277 pr_err("Oops: efitime: can't write time!\n");
278 return -1;
279 }
280 return 0;
281}
282
283void efi_get_time(struct timespec *now) 107void efi_get_time(struct timespec *now)
284{ 108{
285 efi_status_t status; 109 efi_status_t status;
@@ -350,6 +174,9 @@ int __init efi_memblock_x86_reserve_range(void)
350 struct efi_info *e = &boot_params.efi_info; 174 struct efi_info *e = &boot_params.efi_info;
351 unsigned long pmap; 175 unsigned long pmap;
352 176
177 if (efi_enabled(EFI_PARAVIRT))
178 return 0;
179
353#ifdef CONFIG_X86_32 180#ifdef CONFIG_X86_32
354 /* Can't handle data above 4GB at this time */ 181 /* Can't handle data above 4GB at this time */
355 if (e->efi_memmap_hi) { 182 if (e->efi_memmap_hi) {
@@ -392,69 +219,15 @@ static void __init print_efi_memmap(void)
392#endif /* EFI_DEBUG */ 219#endif /* EFI_DEBUG */
393} 220}
394 221
395void __init efi_reserve_boot_services(void)
396{
397 void *p;
398
399 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
400 efi_memory_desc_t *md = p;
401 u64 start = md->phys_addr;
402 u64 size = md->num_pages << EFI_PAGE_SHIFT;
403
404 if (md->type != EFI_BOOT_SERVICES_CODE &&
405 md->type != EFI_BOOT_SERVICES_DATA)
406 continue;
407 /* Only reserve where possible:
408 * - Not within any already allocated areas
409 * - Not over any memory area (really needed, if above?)
410 * - Not within any part of the kernel
411 * - Not the bios reserved area
412 */
413 if ((start + size > __pa_symbol(_text)
414 && start <= __pa_symbol(_end)) ||
415 !e820_all_mapped(start, start+size, E820_RAM) ||
416 memblock_is_region_reserved(start, size)) {
417 /* Could not reserve, skip it */
418 md->num_pages = 0;
419 memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n",
420 start, start+size-1);
421 } else
422 memblock_reserve(start, size);
423 }
424}
425
426void __init efi_unmap_memmap(void) 222void __init efi_unmap_memmap(void)
427{ 223{
428 clear_bit(EFI_MEMMAP, &efi.flags); 224 clear_bit(EFI_MEMMAP, &efi.flags);
429 if (memmap.map) { 225 if (memmap.map) {
430 early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); 226 early_memunmap(memmap.map, memmap.nr_map * memmap.desc_size);
431 memmap.map = NULL; 227 memmap.map = NULL;
432 } 228 }
433} 229}
434 230
435void __init efi_free_boot_services(void)
436{
437 void *p;
438
439 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
440 efi_memory_desc_t *md = p;
441 unsigned long long start = md->phys_addr;
442 unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
443
444 if (md->type != EFI_BOOT_SERVICES_CODE &&
445 md->type != EFI_BOOT_SERVICES_DATA)
446 continue;
447
448 /* Could not reserve boot area */
449 if (!size)
450 continue;
451
452 free_bootmem_late(start, size);
453 }
454
455 efi_unmap_memmap();
456}
457
458static int __init efi_systab_init(void *phys) 231static int __init efi_systab_init(void *phys)
459{ 232{
460 if (efi_enabled(EFI_64BIT)) { 233 if (efi_enabled(EFI_64BIT)) {
@@ -467,12 +240,12 @@ static int __init efi_systab_init(void *phys)
467 if (!data) 240 if (!data)
468 return -ENOMEM; 241 return -ENOMEM;
469 } 242 }
470 systab64 = early_ioremap((unsigned long)phys, 243 systab64 = early_memremap((unsigned long)phys,
471 sizeof(*systab64)); 244 sizeof(*systab64));
472 if (systab64 == NULL) { 245 if (systab64 == NULL) {
473 pr_err("Couldn't map the system table!\n"); 246 pr_err("Couldn't map the system table!\n");
474 if (data) 247 if (data)
475 early_iounmap(data, sizeof(*data)); 248 early_memunmap(data, sizeof(*data));
476 return -ENOMEM; 249 return -ENOMEM;
477 } 250 }
478 251
@@ -504,9 +277,9 @@ static int __init efi_systab_init(void *phys)
504 systab64->tables; 277 systab64->tables;
505 tmp |= data ? data->tables : systab64->tables; 278 tmp |= data ? data->tables : systab64->tables;
506 279
507 early_iounmap(systab64, sizeof(*systab64)); 280 early_memunmap(systab64, sizeof(*systab64));
508 if (data) 281 if (data)
509 early_iounmap(data, sizeof(*data)); 282 early_memunmap(data, sizeof(*data));
510#ifdef CONFIG_X86_32 283#ifdef CONFIG_X86_32
511 if (tmp >> 32) { 284 if (tmp >> 32) {
512 pr_err("EFI data located above 4GB, disabling EFI.\n"); 285 pr_err("EFI data located above 4GB, disabling EFI.\n");
@@ -516,7 +289,7 @@ static int __init efi_systab_init(void *phys)
516 } else { 289 } else {
517 efi_system_table_32_t *systab32; 290 efi_system_table_32_t *systab32;
518 291
519 systab32 = early_ioremap((unsigned long)phys, 292 systab32 = early_memremap((unsigned long)phys,
520 sizeof(*systab32)); 293 sizeof(*systab32));
521 if (systab32 == NULL) { 294 if (systab32 == NULL) {
522 pr_err("Couldn't map the system table!\n"); 295 pr_err("Couldn't map the system table!\n");
@@ -537,7 +310,7 @@ static int __init efi_systab_init(void *phys)
537 efi_systab.nr_tables = systab32->nr_tables; 310 efi_systab.nr_tables = systab32->nr_tables;
538 efi_systab.tables = systab32->tables; 311 efi_systab.tables = systab32->tables;
539 312
540 early_iounmap(systab32, sizeof(*systab32)); 313 early_memunmap(systab32, sizeof(*systab32));
541 } 314 }
542 315
543 efi.systab = &efi_systab; 316 efi.systab = &efi_systab;
@@ -563,7 +336,7 @@ static int __init efi_runtime_init32(void)
563{ 336{
564 efi_runtime_services_32_t *runtime; 337 efi_runtime_services_32_t *runtime;
565 338
566 runtime = early_ioremap((unsigned long)efi.systab->runtime, 339 runtime = early_memremap((unsigned long)efi.systab->runtime,
567 sizeof(efi_runtime_services_32_t)); 340 sizeof(efi_runtime_services_32_t));
568 if (!runtime) { 341 if (!runtime) {
569 pr_err("Could not map the runtime service table!\n"); 342 pr_err("Could not map the runtime service table!\n");
@@ -578,7 +351,7 @@ static int __init efi_runtime_init32(void)
578 efi_phys.set_virtual_address_map = 351 efi_phys.set_virtual_address_map =
579 (efi_set_virtual_address_map_t *) 352 (efi_set_virtual_address_map_t *)
580 (unsigned long)runtime->set_virtual_address_map; 353 (unsigned long)runtime->set_virtual_address_map;
581 early_iounmap(runtime, sizeof(efi_runtime_services_32_t)); 354 early_memunmap(runtime, sizeof(efi_runtime_services_32_t));
582 355
583 return 0; 356 return 0;
584} 357}
@@ -587,7 +360,7 @@ static int __init efi_runtime_init64(void)
587{ 360{
588 efi_runtime_services_64_t *runtime; 361 efi_runtime_services_64_t *runtime;
589 362
590 runtime = early_ioremap((unsigned long)efi.systab->runtime, 363 runtime = early_memremap((unsigned long)efi.systab->runtime,
591 sizeof(efi_runtime_services_64_t)); 364 sizeof(efi_runtime_services_64_t));
592 if (!runtime) { 365 if (!runtime) {
593 pr_err("Could not map the runtime service table!\n"); 366 pr_err("Could not map the runtime service table!\n");
@@ -602,7 +375,7 @@ static int __init efi_runtime_init64(void)
602 efi_phys.set_virtual_address_map = 375 efi_phys.set_virtual_address_map =
603 (efi_set_virtual_address_map_t *) 376 (efi_set_virtual_address_map_t *)
604 (unsigned long)runtime->set_virtual_address_map; 377 (unsigned long)runtime->set_virtual_address_map;
605 early_iounmap(runtime, sizeof(efi_runtime_services_64_t)); 378 early_memunmap(runtime, sizeof(efi_runtime_services_64_t));
606 379
607 return 0; 380 return 0;
608} 381}
@@ -616,14 +389,24 @@ static int __init efi_runtime_init(void)
616 * the runtime services table so that we can grab the physical 389 * the runtime services table so that we can grab the physical
617 * address of several of the EFI runtime functions, needed to 390 * address of several of the EFI runtime functions, needed to
618 * set the firmware into virtual mode. 391 * set the firmware into virtual mode.
392 *
393 * When EFI_PARAVIRT is in force then we could not map runtime
394 * service memory region because we do not have direct access to it.
395 * However, runtime services are available through proxy functions
396 * (e.g. in case of Xen dom0 EFI implementation they call special
397 * hypercall which executes relevant EFI functions) and that is why
398 * they are always enabled.
619 */ 399 */
620 if (efi_enabled(EFI_64BIT))
621 rv = efi_runtime_init64();
622 else
623 rv = efi_runtime_init32();
624 400
625 if (rv) 401 if (!efi_enabled(EFI_PARAVIRT)) {
626 return rv; 402 if (efi_enabled(EFI_64BIT))
403 rv = efi_runtime_init64();
404 else
405 rv = efi_runtime_init32();
406
407 if (rv)
408 return rv;
409 }
627 410
628 set_bit(EFI_RUNTIME_SERVICES, &efi.flags); 411 set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
629 412
@@ -632,8 +415,11 @@ static int __init efi_runtime_init(void)
632 415
633static int __init efi_memmap_init(void) 416static int __init efi_memmap_init(void)
634{ 417{
418 if (efi_enabled(EFI_PARAVIRT))
419 return 0;
420
635 /* Map the EFI memory map */ 421 /* Map the EFI memory map */
636 memmap.map = early_ioremap((unsigned long)memmap.phys_map, 422 memmap.map = early_memremap((unsigned long)memmap.phys_map,
637 memmap.nr_map * memmap.desc_size); 423 memmap.nr_map * memmap.desc_size);
638 if (memmap.map == NULL) { 424 if (memmap.map == NULL) {
639 pr_err("Could not map the memory map!\n"); 425 pr_err("Could not map the memory map!\n");
@@ -649,62 +435,6 @@ static int __init efi_memmap_init(void)
649 return 0; 435 return 0;
650} 436}
651 437
652/*
653 * A number of config table entries get remapped to virtual addresses
654 * after entering EFI virtual mode. However, the kexec kernel requires
655 * their physical addresses therefore we pass them via setup_data and
656 * correct those entries to their respective physical addresses here.
657 *
658 * Currently only handles smbios which is necessary for some firmware
659 * implementation.
660 */
661static int __init efi_reuse_config(u64 tables, int nr_tables)
662{
663 int i, sz, ret = 0;
664 void *p, *tablep;
665 struct efi_setup_data *data;
666
667 if (!efi_setup)
668 return 0;
669
670 if (!efi_enabled(EFI_64BIT))
671 return 0;
672
673 data = early_memremap(efi_setup, sizeof(*data));
674 if (!data) {
675 ret = -ENOMEM;
676 goto out;
677 }
678
679 if (!data->smbios)
680 goto out_memremap;
681
682 sz = sizeof(efi_config_table_64_t);
683
684 p = tablep = early_memremap(tables, nr_tables * sz);
685 if (!p) {
686 pr_err("Could not map Configuration table!\n");
687 ret = -ENOMEM;
688 goto out_memremap;
689 }
690
691 for (i = 0; i < efi.systab->nr_tables; i++) {
692 efi_guid_t guid;
693
694 guid = ((efi_config_table_64_t *)p)->guid;
695
696 if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID))
697 ((efi_config_table_64_t *)p)->table = data->smbios;
698 p += sz;
699 }
700 early_iounmap(tablep, nr_tables * sz);
701
702out_memremap:
703 early_iounmap(data, sizeof(*data));
704out:
705 return ret;
706}
707
708void __init efi_init(void) 438void __init efi_init(void)
709{ 439{
710 efi_char16_t *c16; 440 efi_char16_t *c16;
@@ -728,8 +458,6 @@ void __init efi_init(void)
728 if (efi_systab_init(efi_phys.systab)) 458 if (efi_systab_init(efi_phys.systab))
729 return; 459 return;
730 460
731 set_bit(EFI_SYSTEM_TABLES, &efi.flags);
732
733 efi.config_table = (unsigned long)efi.systab->tables; 461 efi.config_table = (unsigned long)efi.systab->tables;
734 efi.fw_vendor = (unsigned long)efi.systab->fw_vendor; 462 efi.fw_vendor = (unsigned long)efi.systab->fw_vendor;
735 efi.runtime = (unsigned long)efi.systab->runtime; 463 efi.runtime = (unsigned long)efi.systab->runtime;
@@ -737,14 +465,14 @@ void __init efi_init(void)
737 /* 465 /*
738 * Show what we know for posterity 466 * Show what we know for posterity
739 */ 467 */
740 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); 468 c16 = tmp = early_memremap(efi.systab->fw_vendor, 2);
741 if (c16) { 469 if (c16) {
742 for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) 470 for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
743 vendor[i] = *c16++; 471 vendor[i] = *c16++;
744 vendor[i] = '\0'; 472 vendor[i] = '\0';
745 } else 473 } else
746 pr_err("Could not map the firmware vendor!\n"); 474 pr_err("Could not map the firmware vendor!\n");
747 early_iounmap(tmp, 2); 475 early_memunmap(tmp, 2);
748 476
749 pr_info("EFI v%u.%.02u by %s\n", 477 pr_info("EFI v%u.%.02u by %s\n",
750 efi.systab->hdr.revision >> 16, 478 efi.systab->hdr.revision >> 16,
@@ -770,8 +498,6 @@ void __init efi_init(void)
770 if (efi_memmap_init()) 498 if (efi_memmap_init())
771 return; 499 return;
772 500
773 set_bit(EFI_MEMMAP, &efi.flags);
774
775 print_efi_memmap(); 501 print_efi_memmap();
776} 502}
777 503
@@ -847,22 +573,6 @@ void __init old_map_region(efi_memory_desc_t *md)
847 (unsigned long long)md->phys_addr); 573 (unsigned long long)md->phys_addr);
848} 574}
849 575
850static void native_runtime_setup(void)
851{
852 efi.get_time = virt_efi_get_time;
853 efi.set_time = virt_efi_set_time;
854 efi.get_wakeup_time = virt_efi_get_wakeup_time;
855 efi.set_wakeup_time = virt_efi_set_wakeup_time;
856 efi.get_variable = virt_efi_get_variable;
857 efi.get_next_variable = virt_efi_get_next_variable;
858 efi.set_variable = virt_efi_set_variable;
859 efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
860 efi.reset_system = virt_efi_reset_system;
861 efi.query_variable_info = virt_efi_query_variable_info;
862 efi.update_capsule = virt_efi_update_capsule;
863 efi.query_capsule_caps = virt_efi_query_capsule_caps;
864}
865
866/* Merge contiguous regions of the same type and attribute */ 576/* Merge contiguous regions of the same type and attribute */
867static void __init efi_merge_regions(void) 577static void __init efi_merge_regions(void)
868{ 578{
@@ -1049,7 +759,7 @@ static void __init kexec_enter_virtual_mode(void)
1049 */ 759 */
1050 efi.runtime_version = efi_systab.hdr.revision; 760 efi.runtime_version = efi_systab.hdr.revision;
1051 761
1052 native_runtime_setup(); 762 efi_native_runtime_setup();
1053 763
1054 efi.set_virtual_address_map = NULL; 764 efi.set_virtual_address_map = NULL;
1055 765
@@ -1057,11 +767,7 @@ static void __init kexec_enter_virtual_mode(void)
1057 runtime_code_page_mkexec(); 767 runtime_code_page_mkexec();
1058 768
1059 /* clean DUMMY object */ 769 /* clean DUMMY object */
1060 efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, 770 efi_delete_dummy_variable();
1061 EFI_VARIABLE_NON_VOLATILE |
1062 EFI_VARIABLE_BOOTSERVICE_ACCESS |
1063 EFI_VARIABLE_RUNTIME_ACCESS,
1064 0, NULL);
1065#endif 771#endif
1066} 772}
1067 773
@@ -1142,7 +848,7 @@ static void __init __efi_enter_virtual_mode(void)
1142 efi.runtime_version = efi_systab.hdr.revision; 848 efi.runtime_version = efi_systab.hdr.revision;
1143 849
1144 if (efi_is_native()) 850 if (efi_is_native())
1145 native_runtime_setup(); 851 efi_native_runtime_setup();
1146 else 852 else
1147 efi_thunk_runtime_setup(); 853 efi_thunk_runtime_setup();
1148 854
@@ -1179,15 +885,14 @@ static void __init __efi_enter_virtual_mode(void)
1179 free_pages((unsigned long)new_memmap, pg_shift); 885 free_pages((unsigned long)new_memmap, pg_shift);
1180 886
1181 /* clean DUMMY object */ 887 /* clean DUMMY object */
1182 efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, 888 efi_delete_dummy_variable();
1183 EFI_VARIABLE_NON_VOLATILE |
1184 EFI_VARIABLE_BOOTSERVICE_ACCESS |
1185 EFI_VARIABLE_RUNTIME_ACCESS,
1186 0, NULL);
1187} 889}
1188 890
1189void __init efi_enter_virtual_mode(void) 891void __init efi_enter_virtual_mode(void)
1190{ 892{
893 if (efi_enabled(EFI_PARAVIRT))
894 return;
895
1191 if (efi_setup) 896 if (efi_setup)
1192 kexec_enter_virtual_mode(); 897 kexec_enter_virtual_mode();
1193 else 898 else
@@ -1220,6 +925,9 @@ u64 efi_mem_attributes(unsigned long phys_addr)
1220 efi_memory_desc_t *md; 925 efi_memory_desc_t *md;
1221 void *p; 926 void *p;
1222 927
928 if (!efi_enabled(EFI_MEMMAP))
929 return 0;
930
1223 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 931 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
1224 md = p; 932 md = p;
1225 if ((md->phys_addr <= phys_addr) && 933 if ((md->phys_addr <= phys_addr) &&
@@ -1230,86 +938,6 @@ u64 efi_mem_attributes(unsigned long phys_addr)
1230 return 0; 938 return 0;
1231} 939}
1232 940
1233/*
1234 * Some firmware implementations refuse to boot if there's insufficient space
1235 * in the variable store. Ensure that we never use more than a safe limit.
1236 *
1237 * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable
1238 * store.
1239 */
1240efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
1241{
1242 efi_status_t status;
1243 u64 storage_size, remaining_size, max_size;
1244
1245 if (!(attributes & EFI_VARIABLE_NON_VOLATILE))
1246 return 0;
1247
1248 status = efi.query_variable_info(attributes, &storage_size,
1249 &remaining_size, &max_size);
1250 if (status != EFI_SUCCESS)
1251 return status;
1252
1253 /*
1254 * We account for that by refusing the write if permitting it would
1255 * reduce the available space to under 5KB. This figure was provided by
1256 * Samsung, so should be safe.
1257 */
1258 if ((remaining_size - size < EFI_MIN_RESERVE) &&
1259 !efi_no_storage_paranoia) {
1260
1261 /*
1262 * Triggering garbage collection may require that the firmware
1263 * generate a real EFI_OUT_OF_RESOURCES error. We can force
1264 * that by attempting to use more space than is available.
1265 */
1266 unsigned long dummy_size = remaining_size + 1024;
1267 void *dummy = kzalloc(dummy_size, GFP_ATOMIC);
1268
1269 if (!dummy)
1270 return EFI_OUT_OF_RESOURCES;
1271
1272 status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
1273 EFI_VARIABLE_NON_VOLATILE |
1274 EFI_VARIABLE_BOOTSERVICE_ACCESS |
1275 EFI_VARIABLE_RUNTIME_ACCESS,
1276 dummy_size, dummy);
1277
1278 if (status == EFI_SUCCESS) {
1279 /*
1280 * This should have failed, so if it didn't make sure
1281 * that we delete it...
1282 */
1283 efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
1284 EFI_VARIABLE_NON_VOLATILE |
1285 EFI_VARIABLE_BOOTSERVICE_ACCESS |
1286 EFI_VARIABLE_RUNTIME_ACCESS,
1287 0, dummy);
1288 }
1289
1290 kfree(dummy);
1291
1292 /*
1293 * The runtime code may now have triggered a garbage collection
1294 * run, so check the variable info again
1295 */
1296 status = efi.query_variable_info(attributes, &storage_size,
1297 &remaining_size, &max_size);
1298
1299 if (status != EFI_SUCCESS)
1300 return status;
1301
1302 /*
1303 * There still isn't enough room, so return an error
1304 */
1305 if (remaining_size - size < EFI_MIN_RESERVE)
1306 return EFI_OUT_OF_RESOURCES;
1307 }
1308
1309 return EFI_SUCCESS;
1310}
1311EXPORT_SYMBOL_GPL(efi_query_variable_store);
1312
1313static int __init parse_efi_cmdline(char *str) 941static int __init parse_efi_cmdline(char *str)
1314{ 942{
1315 if (*str == '=') 943 if (*str == '=')
@@ -1321,22 +949,3 @@ static int __init parse_efi_cmdline(char *str)
1321 return 0; 949 return 0;
1322} 950}
1323early_param("efi", parse_efi_cmdline); 951early_param("efi", parse_efi_cmdline);
1324
1325void __init efi_apply_memmap_quirks(void)
1326{
1327 /*
1328 * Once setup is done earlier, unmap the EFI memory map on mismatched
1329 * firmware/kernel architectures since there is no support for runtime
1330 * services.
1331 */
1332 if (!efi_runtime_supported()) {
1333 pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
1334 efi_unmap_memmap();
1335 }
1336
1337 /*
1338 * UV doesn't support the new EFI pagetable mapping yet.
1339 */
1340 if (is_uv_system())
1341 set_bit(EFI_OLD_MEMMAP, &efi.flags);
1342}
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
new file mode 100644
index 000000000000..1c7380da65ff
--- /dev/null
+++ b/arch/x86/platform/efi/quirks.c
@@ -0,0 +1,290 @@
1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/string.h>
4#include <linux/time.h>
5#include <linux/types.h>
6#include <linux/efi.h>
7#include <linux/slab.h>
8#include <linux/memblock.h>
9#include <linux/bootmem.h>
10#include <linux/acpi.h>
11#include <asm/efi.h>
12#include <asm/uv/uv.h>
13
14#define EFI_MIN_RESERVE 5120
15
16#define EFI_DUMMY_GUID \
17 EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9)
18
19static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 };
20
21static bool efi_no_storage_paranoia;
22
23/*
24 * Some firmware implementations refuse to boot if there's insufficient
25 * space in the variable store. The implementation of garbage collection
26 * in some FW versions causes stale (deleted) variables to take up space
27 * longer than intended and space is only freed once the store becomes
28 * almost completely full.
29 *
30 * Enabling this option disables the space checks in
31 * efi_query_variable_store() and forces garbage collection.
32 *
33 * Only enable this option if deleting EFI variables does not free up
34 * space in your variable store, e.g. if despite deleting variables
35 * you're unable to create new ones.
36 */
37static int __init setup_storage_paranoia(char *arg)
38{
39 efi_no_storage_paranoia = true;
40 return 0;
41}
42early_param("efi_no_storage_paranoia", setup_storage_paranoia);
43
44/*
45 * Deleting the dummy variable which kicks off garbage collection
46*/
47void efi_delete_dummy_variable(void)
48{
49 efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
50 EFI_VARIABLE_NON_VOLATILE |
51 EFI_VARIABLE_BOOTSERVICE_ACCESS |
52 EFI_VARIABLE_RUNTIME_ACCESS,
53 0, NULL);
54}
55
56/*
57 * Some firmware implementations refuse to boot if there's insufficient space
58 * in the variable store. Ensure that we never use more than a safe limit.
59 *
60 * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable
61 * store.
62 */
63efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
64{
65 efi_status_t status;
66 u64 storage_size, remaining_size, max_size;
67
68 if (!(attributes & EFI_VARIABLE_NON_VOLATILE))
69 return 0;
70
71 status = efi.query_variable_info(attributes, &storage_size,
72 &remaining_size, &max_size);
73 if (status != EFI_SUCCESS)
74 return status;
75
76 /*
77 * We account for that by refusing the write if permitting it would
78 * reduce the available space to under 5KB. This figure was provided by
79 * Samsung, so should be safe.
80 */
81 if ((remaining_size - size < EFI_MIN_RESERVE) &&
82 !efi_no_storage_paranoia) {
83
84 /*
85 * Triggering garbage collection may require that the firmware
86 * generate a real EFI_OUT_OF_RESOURCES error. We can force
87 * that by attempting to use more space than is available.
88 */
89 unsigned long dummy_size = remaining_size + 1024;
90 void *dummy = kzalloc(dummy_size, GFP_ATOMIC);
91
92 if (!dummy)
93 return EFI_OUT_OF_RESOURCES;
94
95 status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
96 EFI_VARIABLE_NON_VOLATILE |
97 EFI_VARIABLE_BOOTSERVICE_ACCESS |
98 EFI_VARIABLE_RUNTIME_ACCESS,
99 dummy_size, dummy);
100
101 if (status == EFI_SUCCESS) {
102 /*
103 * This should have failed, so if it didn't make sure
104 * that we delete it...
105 */
106 efi_delete_dummy_variable();
107 }
108
109 kfree(dummy);
110
111 /*
112 * The runtime code may now have triggered a garbage collection
113 * run, so check the variable info again
114 */
115 status = efi.query_variable_info(attributes, &storage_size,
116 &remaining_size, &max_size);
117
118 if (status != EFI_SUCCESS)
119 return status;
120
121 /*
122 * There still isn't enough room, so return an error
123 */
124 if (remaining_size - size < EFI_MIN_RESERVE)
125 return EFI_OUT_OF_RESOURCES;
126 }
127
128 return EFI_SUCCESS;
129}
130EXPORT_SYMBOL_GPL(efi_query_variable_store);
131
132/*
133 * The UEFI specification makes it clear that the operating system is free to do
134 * whatever it wants with boot services code after ExitBootServices() has been
135 * called. Ignoring this recommendation a significant bunch of EFI implementations
136 * continue calling into boot services code (SetVirtualAddressMap). In order to
137 * work around such buggy implementations we reserve boot services region during
138 * EFI init and make sure it stays executable. Then, after SetVirtualAddressMap(), it
139* is discarded.
140*/
141void __init efi_reserve_boot_services(void)
142{
143 void *p;
144
145 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
146 efi_memory_desc_t *md = p;
147 u64 start = md->phys_addr;
148 u64 size = md->num_pages << EFI_PAGE_SHIFT;
149
150 if (md->type != EFI_BOOT_SERVICES_CODE &&
151 md->type != EFI_BOOT_SERVICES_DATA)
152 continue;
153 /* Only reserve where possible:
154 * - Not within any already allocated areas
155 * - Not over any memory area (really needed, if above?)
156 * - Not within any part of the kernel
157 * - Not the bios reserved area
158 */
159 if ((start + size > __pa_symbol(_text)
160 && start <= __pa_symbol(_end)) ||
161 !e820_all_mapped(start, start+size, E820_RAM) ||
162 memblock_is_region_reserved(start, size)) {
163 /* Could not reserve, skip it */
164 md->num_pages = 0;
165 memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n",
166 start, start+size-1);
167 } else
168 memblock_reserve(start, size);
169 }
170}
171
172void __init efi_free_boot_services(void)
173{
174 void *p;
175
176 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
177 efi_memory_desc_t *md = p;
178 unsigned long long start = md->phys_addr;
179 unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
180
181 if (md->type != EFI_BOOT_SERVICES_CODE &&
182 md->type != EFI_BOOT_SERVICES_DATA)
183 continue;
184
185 /* Could not reserve boot area */
186 if (!size)
187 continue;
188
189 free_bootmem_late(start, size);
190 }
191
192 efi_unmap_memmap();
193}
194
195/*
196 * A number of config table entries get remapped to virtual addresses
197 * after entering EFI virtual mode. However, the kexec kernel requires
198 * their physical addresses therefore we pass them via setup_data and
199 * correct those entries to their respective physical addresses here.
200 *
201 * Currently only handles smbios which is necessary for some firmware
202 * implementation.
203 */
204int __init efi_reuse_config(u64 tables, int nr_tables)
205{
206 int i, sz, ret = 0;
207 void *p, *tablep;
208 struct efi_setup_data *data;
209
210 if (!efi_setup)
211 return 0;
212
213 if (!efi_enabled(EFI_64BIT))
214 return 0;
215
216 data = early_memremap(efi_setup, sizeof(*data));
217 if (!data) {
218 ret = -ENOMEM;
219 goto out;
220 }
221
222 if (!data->smbios)
223 goto out_memremap;
224
225 sz = sizeof(efi_config_table_64_t);
226
227 p = tablep = early_memremap(tables, nr_tables * sz);
228 if (!p) {
229 pr_err("Could not map Configuration table!\n");
230 ret = -ENOMEM;
231 goto out_memremap;
232 }
233
234 for (i = 0; i < efi.systab->nr_tables; i++) {
235 efi_guid_t guid;
236
237 guid = ((efi_config_table_64_t *)p)->guid;
238
239 if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID))
240 ((efi_config_table_64_t *)p)->table = data->smbios;
241 p += sz;
242 }
243 early_memunmap(tablep, nr_tables * sz);
244
245out_memremap:
246 early_memunmap(data, sizeof(*data));
247out:
248 return ret;
249}
250
251void __init efi_apply_memmap_quirks(void)
252{
253 /*
254 * Once setup is done earlier, unmap the EFI memory map on mismatched
255 * firmware/kernel architectures since there is no support for runtime
256 * services.
257 */
258 if (!efi_runtime_supported()) {
259 pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
260 efi_unmap_memmap();
261 }
262
263 /*
264 * UV doesn't support the new EFI pagetable mapping yet.
265 */
266 if (is_uv_system())
267 set_bit(EFI_OLD_MEMMAP, &efi.flags);
268}
269
270/*
271 * For most modern platforms the preferred method of powering off is via
272 * ACPI. However, there are some that are known to require the use of
273 * EFI runtime services and for which ACPI does not work at all.
274 *
275 * Using EFI is a last resort, to be used only if no other option
276 * exists.
277 */
278bool efi_reboot_required(void)
279{
280 if (!acpi_gbl_reduced_hardware)
281 return false;
282
283 efi_reboot_quirk_mode = EFI_RESET_WARM;
284 return true;
285}
286
287bool efi_poweroff_required(void)
288{
289 return !!acpi_gbl_reduced_hardware;
290}
diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c
index 9471b9456f25..baf16e72e668 100644
--- a/arch/x86/platform/ts5500/ts5500.c
+++ b/arch/x86/platform/ts5500/ts5500.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Technologic Systems TS-5500 Single Board Computer support 2 * Technologic Systems TS-5500 Single Board Computer support
3 * 3 *
4 * Copyright (C) 2013 Savoir-faire Linux Inc. 4 * Copyright (C) 2013-2014 Savoir-faire Linux Inc.
5 * Vivien Didelot <vivien.didelot@savoirfairelinux.com> 5 * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify it under 7 * This program is free software; you can redistribute it and/or modify it under
@@ -15,8 +15,8 @@
15 * state or available options. For further information about sysfs entries, see 15 * state or available options. For further information about sysfs entries, see
16 * Documentation/ABI/testing/sysfs-platform-ts5500. 16 * Documentation/ABI/testing/sysfs-platform-ts5500.
17 * 17 *
18 * This code actually supports the TS-5500 platform, but it may be extended to 18 * This code may be extended to support similar x86-based platforms.
19 * support similar Technologic Systems x86-based platforms, such as the TS-5600. 19 * Actually, the TS-5500 and TS-5400 are supported.
20 */ 20 */
21 21
22#include <linux/delay.h> 22#include <linux/delay.h>
@@ -32,6 +32,7 @@
32/* Product code register */ 32/* Product code register */
33#define TS5500_PRODUCT_CODE_ADDR 0x74 33#define TS5500_PRODUCT_CODE_ADDR 0x74
34#define TS5500_PRODUCT_CODE 0x60 /* TS-5500 product code */ 34#define TS5500_PRODUCT_CODE 0x60 /* TS-5500 product code */
35#define TS5400_PRODUCT_CODE 0x40 /* TS-5400 product code */
35 36
36/* SRAM/RS-485/ADC options, and RS-485 RTS/Automatic RS-485 flags register */ 37/* SRAM/RS-485/ADC options, and RS-485 RTS/Automatic RS-485 flags register */
37#define TS5500_SRAM_RS485_ADC_ADDR 0x75 38#define TS5500_SRAM_RS485_ADC_ADDR 0x75
@@ -66,6 +67,7 @@
66 67
67/** 68/**
68 * struct ts5500_sbc - TS-5500 board description 69 * struct ts5500_sbc - TS-5500 board description
70 * @name: Board model name.
69 * @id: Board product ID. 71 * @id: Board product ID.
70 * @sram: Flag for SRAM option. 72 * @sram: Flag for SRAM option.
71 * @rs485: Flag for RS-485 option. 73 * @rs485: Flag for RS-485 option.
@@ -75,6 +77,7 @@
75 * @jumpers: Bitfield for jumpers' state. 77 * @jumpers: Bitfield for jumpers' state.
76 */ 78 */
77struct ts5500_sbc { 79struct ts5500_sbc {
80 const char *name;
78 int id; 81 int id;
79 bool sram; 82 bool sram;
80 bool rs485; 83 bool rs485;
@@ -122,13 +125,16 @@ static int __init ts5500_detect_config(struct ts5500_sbc *sbc)
122 if (!request_region(TS5500_PRODUCT_CODE_ADDR, 4, "ts5500")) 125 if (!request_region(TS5500_PRODUCT_CODE_ADDR, 4, "ts5500"))
123 return -EBUSY; 126 return -EBUSY;
124 127
125 tmp = inb(TS5500_PRODUCT_CODE_ADDR); 128 sbc->id = inb(TS5500_PRODUCT_CODE_ADDR);
126 if (tmp != TS5500_PRODUCT_CODE) { 129 if (sbc->id == TS5500_PRODUCT_CODE) {
127 pr_err("This platform is not a TS-5500 (found ID 0x%x)\n", tmp); 130 sbc->name = "TS-5500";
131 } else if (sbc->id == TS5400_PRODUCT_CODE) {
132 sbc->name = "TS-5400";
133 } else {
134 pr_err("ts5500: unknown product code 0x%x\n", sbc->id);
128 ret = -ENODEV; 135 ret = -ENODEV;
129 goto cleanup; 136 goto cleanup;
130 } 137 }
131 sbc->id = tmp;
132 138
133 tmp = inb(TS5500_SRAM_RS485_ADC_ADDR); 139 tmp = inb(TS5500_SRAM_RS485_ADC_ADDR);
134 sbc->sram = tmp & TS5500_SRAM; 140 sbc->sram = tmp & TS5500_SRAM;
@@ -147,48 +153,52 @@ cleanup:
147 return ret; 153 return ret;
148} 154}
149 155
150static ssize_t ts5500_show_id(struct device *dev, 156static ssize_t name_show(struct device *dev, struct device_attribute *attr,
151 struct device_attribute *attr, char *buf) 157 char *buf)
152{ 158{
153 struct ts5500_sbc *sbc = dev_get_drvdata(dev); 159 struct ts5500_sbc *sbc = dev_get_drvdata(dev);
154 160
155 return sprintf(buf, "0x%.2x\n", sbc->id); 161 return sprintf(buf, "%s\n", sbc->name);
156} 162}
163static DEVICE_ATTR_RO(name);
157 164
158static ssize_t ts5500_show_jumpers(struct device *dev, 165static ssize_t id_show(struct device *dev, struct device_attribute *attr,
159 struct device_attribute *attr, 166 char *buf)
160 char *buf)
161{ 167{
162 struct ts5500_sbc *sbc = dev_get_drvdata(dev); 168 struct ts5500_sbc *sbc = dev_get_drvdata(dev);
163 169
164 return sprintf(buf, "0x%.2x\n", sbc->jumpers >> 1); 170 return sprintf(buf, "0x%.2x\n", sbc->id);
165} 171}
172static DEVICE_ATTR_RO(id);
166 173
167#define TS5500_SHOW(field) \ 174static ssize_t jumpers_show(struct device *dev, struct device_attribute *attr,
168 static ssize_t ts5500_show_##field(struct device *dev, \ 175 char *buf)
169 struct device_attribute *attr, \ 176{
170 char *buf) \ 177 struct ts5500_sbc *sbc = dev_get_drvdata(dev);
171 { \
172 struct ts5500_sbc *sbc = dev_get_drvdata(dev); \
173 return sprintf(buf, "%d\n", sbc->field); \
174 }
175
176TS5500_SHOW(sram)
177TS5500_SHOW(rs485)
178TS5500_SHOW(adc)
179TS5500_SHOW(ereset)
180TS5500_SHOW(itr)
181 178
182static DEVICE_ATTR(id, S_IRUGO, ts5500_show_id, NULL); 179 return sprintf(buf, "0x%.2x\n", sbc->jumpers >> 1);
183static DEVICE_ATTR(jumpers, S_IRUGO, ts5500_show_jumpers, NULL); 180}
184static DEVICE_ATTR(sram, S_IRUGO, ts5500_show_sram, NULL); 181static DEVICE_ATTR_RO(jumpers);
185static DEVICE_ATTR(rs485, S_IRUGO, ts5500_show_rs485, NULL); 182
186static DEVICE_ATTR(adc, S_IRUGO, ts5500_show_adc, NULL); 183#define TS5500_ATTR_BOOL(_field) \
187static DEVICE_ATTR(ereset, S_IRUGO, ts5500_show_ereset, NULL); 184 static ssize_t _field##_show(struct device *dev, \
188static DEVICE_ATTR(itr, S_IRUGO, ts5500_show_itr, NULL); 185 struct device_attribute *attr, char *buf) \
186 { \
187 struct ts5500_sbc *sbc = dev_get_drvdata(dev); \
188 \
189 return sprintf(buf, "%d\n", sbc->_field); \
190 } \
191 static DEVICE_ATTR_RO(_field)
192
193TS5500_ATTR_BOOL(sram);
194TS5500_ATTR_BOOL(rs485);
195TS5500_ATTR_BOOL(adc);
196TS5500_ATTR_BOOL(ereset);
197TS5500_ATTR_BOOL(itr);
189 198
190static struct attribute *ts5500_attributes[] = { 199static struct attribute *ts5500_attributes[] = {
191 &dev_attr_id.attr, 200 &dev_attr_id.attr,
201 &dev_attr_name.attr,
192 &dev_attr_jumpers.attr, 202 &dev_attr_jumpers.attr,
193 &dev_attr_sram.attr, 203 &dev_attr_sram.attr,
194 &dev_attr_rs485.attr, 204 &dev_attr_rs485.attr,
@@ -311,12 +321,14 @@ static int __init ts5500_init(void)
311 if (err) 321 if (err)
312 goto error; 322 goto error;
313 323
314 ts5500_dio1_pdev.dev.parent = &pdev->dev; 324 if (sbc->id == TS5500_PRODUCT_CODE) {
315 if (platform_device_register(&ts5500_dio1_pdev)) 325 ts5500_dio1_pdev.dev.parent = &pdev->dev;
316 dev_warn(&pdev->dev, "DIO1 block registration failed\n"); 326 if (platform_device_register(&ts5500_dio1_pdev))
317 ts5500_dio2_pdev.dev.parent = &pdev->dev; 327 dev_warn(&pdev->dev, "DIO1 block registration failed\n");
318 if (platform_device_register(&ts5500_dio2_pdev)) 328 ts5500_dio2_pdev.dev.parent = &pdev->dev;
319 dev_warn(&pdev->dev, "DIO2 block registration failed\n"); 329 if (platform_device_register(&ts5500_dio2_pdev))
330 dev_warn(&pdev->dev, "DIO2 block registration failed\n");
331 }
320 332
321 if (led_classdev_register(&pdev->dev, &ts5500_led_cdev)) 333 if (led_classdev_register(&pdev->dev, &ts5500_led_cdev))
322 dev_warn(&pdev->dev, "LED registration failed\n"); 334 dev_warn(&pdev->dev, "LED registration failed\n");
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 424f4c97a44d..6ec7910f59bf 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -165,7 +165,7 @@ static void fix_processor_context(void)
165 * by __save_processor_state() 165 * by __save_processor_state()
166 * @ctxt - structure to load the registers contents from 166 * @ctxt - structure to load the registers contents from
167 */ 167 */
168static void __restore_processor_state(struct saved_context *ctxt) 168static void notrace __restore_processor_state(struct saved_context *ctxt)
169{ 169{
170 if (ctxt->misc_enable_saved) 170 if (ctxt->misc_enable_saved)
171 wrmsrl(MSR_IA32_MISC_ENABLE, ctxt->misc_enable); 171 wrmsrl(MSR_IA32_MISC_ENABLE, ctxt->misc_enable);
@@ -239,7 +239,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
239} 239}
240 240
241/* Needed by apm.c */ 241/* Needed by apm.c */
242void restore_processor_state(void) 242void notrace restore_processor_state(void)
243{ 243{
244 __restore_processor_state(&saved_context); 244 __restore_processor_state(&saved_context);
245} 245}
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h
index 04f82e020f2b..2a206d2b14ab 100644
--- a/arch/x86/um/asm/processor.h
+++ b/arch/x86/um/asm/processor.h
@@ -25,7 +25,8 @@ static inline void rep_nop(void)
25 __asm__ __volatile__("rep;nop": : :"memory"); 25 __asm__ __volatile__("rep;nop": : :"memory");
26} 26}
27 27
28#define cpu_relax() rep_nop() 28#define cpu_relax() rep_nop()
29#define cpu_relax_lowlatency() cpu_relax()
29 30
30#include <asm/processor-generic.h> 31#include <asm/processor-generic.h>
31 32
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 96ab2c09cb68..7322755f337a 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -22,3 +22,4 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
22obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o 22obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
23obj-$(CONFIG_XEN_DOM0) += apic.o vga.o 23obj-$(CONFIG_XEN_DOM0) += apic.o vga.o
24obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o 24obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
25obj-$(CONFIG_XEN_EFI) += efi.o
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
new file mode 100644
index 000000000000..a02e09e18f57
--- /dev/null
+++ b/arch/x86/xen/efi.c
@@ -0,0 +1,43 @@
1/*
2 * Copyright (c) 2014 Oracle Co., Daniel Kiper
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <linux/efi.h>
19#include <linux/init.h>
20#include <linux/string.h>
21
22#include <xen/xen-ops.h>
23
24#include <asm/setup.h>
25
26void __init xen_efi_init(void)
27{
28 efi_system_table_t *efi_systab_xen;
29
30 efi_systab_xen = xen_efi_probe();
31
32 if (efi_systab_xen == NULL)
33 return;
34
35 strncpy((char *)&boot_params.efi_info.efi_loader_signature, "Xen",
36 sizeof(boot_params.efi_info.efi_loader_signature));
37 boot_params.efi_info.efi_systab = (__u32)__pa(efi_systab_xen);
38 boot_params.efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32);
39
40 set_bit(EFI_BOOT, &efi.flags);
41 set_bit(EFI_PARAVIRT, &efi.flags);
42 set_bit(EFI_64BIT, &efi.flags);
43}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index ffb101e45731..94813515fdd6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1718,6 +1718,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
1718 1718
1719 xen_setup_runstate_info(0); 1719 xen_setup_runstate_info(0);
1720 1720
1721 xen_efi_init();
1722
1721 /* Start the world */ 1723 /* Start the world */
1722#ifdef CONFIG_X86_32 1724#ifdef CONFIG_X86_32
1723 i386_start_kernel(); 1725 i386_start_kernel();
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index c98583588580..ebfa9b2c871d 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -36,99 +36,133 @@
36 36
37#include <linux/sched.h> 37#include <linux/sched.h>
38#include <linux/mm.h> 38#include <linux/mm.h>
39#include <linux/slab.h>
39#include <linux/vmalloc.h> 40#include <linux/vmalloc.h>
40 41
41#include <xen/interface/xen.h> 42#include <xen/interface/xen.h>
42#include <xen/page.h> 43#include <xen/page.h>
43#include <xen/grant_table.h> 44#include <xen/grant_table.h>
45#include <xen/xen.h>
44 46
45#include <asm/pgtable.h> 47#include <asm/pgtable.h>
46 48
47static int map_pte_fn(pte_t *pte, struct page *pmd_page, 49static struct gnttab_vm_area {
48 unsigned long addr, void *data) 50 struct vm_struct *area;
51 pte_t **ptes;
52} gnttab_shared_vm_area, gnttab_status_vm_area;
53
54int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
55 unsigned long max_nr_gframes,
56 void **__shared)
49{ 57{
50 unsigned long **frames = (unsigned long **)data; 58 void *shared = *__shared;
59 unsigned long addr;
60 unsigned long i;
51 61
52 set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); 62 if (shared == NULL)
53 (*frames)++; 63 *__shared = shared = gnttab_shared_vm_area.area->addr;
54 return 0;
55}
56 64
57/* 65 addr = (unsigned long)shared;
58 * This function is used to map shared frames to store grant status. It is 66
59 * different from map_pte_fn above, the frames type here is uint64_t. 67 for (i = 0; i < nr_gframes; i++) {
60 */ 68 set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i],
61static int map_pte_fn_status(pte_t *pte, struct page *pmd_page, 69 mfn_pte(frames[i], PAGE_KERNEL));
62 unsigned long addr, void *data) 70 addr += PAGE_SIZE;
63{ 71 }
64 uint64_t **frames = (uint64_t **)data;
65 72
66 set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
67 (*frames)++;
68 return 0; 73 return 0;
69} 74}
70 75
71static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, 76int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
72 unsigned long addr, void *data) 77 unsigned long max_nr_gframes,
78 grant_status_t **__shared)
73{ 79{
80 grant_status_t *shared = *__shared;
81 unsigned long addr;
82 unsigned long i;
83
84 if (shared == NULL)
85 *__shared = shared = gnttab_status_vm_area.area->addr;
86
87 addr = (unsigned long)shared;
88
89 for (i = 0; i < nr_gframes; i++) {
90 set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i],
91 mfn_pte(frames[i], PAGE_KERNEL));
92 addr += PAGE_SIZE;
93 }
74 94
75 set_pte_at(&init_mm, addr, pte, __pte(0));
76 return 0; 95 return 0;
77} 96}
78 97
79int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, 98void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
80 unsigned long max_nr_gframes,
81 void **__shared)
82{ 99{
83 int rc; 100 pte_t **ptes;
84 void *shared = *__shared; 101 unsigned long addr;
102 unsigned long i;
85 103
86 if (shared == NULL) { 104 if (shared == gnttab_status_vm_area.area->addr)
87 struct vm_struct *area = 105 ptes = gnttab_status_vm_area.ptes;
88 alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); 106 else
89 BUG_ON(area == NULL); 107 ptes = gnttab_shared_vm_area.ptes;
90 shared = area->addr;
91 *__shared = shared;
92 }
93 108
94 rc = apply_to_page_range(&init_mm, (unsigned long)shared, 109 addr = (unsigned long)shared;
95 PAGE_SIZE * nr_gframes, 110
96 map_pte_fn, &frames); 111 for (i = 0; i < nr_gframes; i++) {
97 return rc; 112 set_pte_at(&init_mm, addr, ptes[i], __pte(0));
113 addr += PAGE_SIZE;
114 }
98} 115}
99 116
100int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, 117static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames)
101 unsigned long max_nr_gframes,
102 grant_status_t **__shared)
103{ 118{
104 int rc; 119 area->ptes = kmalloc(sizeof(pte_t *) * nr_frames, GFP_KERNEL);
105 grant_status_t *shared = *__shared; 120 if (area->ptes == NULL)
121 return -ENOMEM;
106 122
107 if (shared == NULL) { 123 area->area = alloc_vm_area(PAGE_SIZE * nr_frames, area->ptes);
108 /* No need to pass in PTE as we are going to do it 124 if (area->area == NULL) {
109 * in apply_to_page_range anyhow. */ 125 kfree(area->ptes);
110 struct vm_struct *area = 126 return -ENOMEM;
111 alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL);
112 BUG_ON(area == NULL);
113 shared = area->addr;
114 *__shared = shared;
115 } 127 }
116 128
117 rc = apply_to_page_range(&init_mm, (unsigned long)shared, 129 return 0;
118 PAGE_SIZE * nr_gframes,
119 map_pte_fn_status, &frames);
120 return rc;
121} 130}
122 131
123void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) 132static void arch_gnttab_vfree(struct gnttab_vm_area *area)
133{
134 free_vm_area(area->area);
135 kfree(area->ptes);
136}
137
138int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status)
124{ 139{
125 apply_to_page_range(&init_mm, (unsigned long)shared, 140 int ret;
126 PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); 141
142 if (!xen_pv_domain())
143 return 0;
144
145 ret = arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared);
146 if (ret < 0)
147 return ret;
148
149 /*
150 * Always allocate the space for the status frames in case
151 * we're migrated to a host with V2 support.
152 */
153 ret = arch_gnttab_valloc(&gnttab_status_vm_area, nr_status);
154 if (ret < 0)
155 goto err;
156
157 return 0;
158 err:
159 arch_gnttab_vfree(&gnttab_shared_vm_area);
160 return -ENOMEM;
127} 161}
162
128#ifdef CONFIG_XEN_PVH 163#ifdef CONFIG_XEN_PVH
129#include <xen/balloon.h> 164#include <xen/balloon.h>
130#include <xen/events.h> 165#include <xen/events.h>
131#include <xen/xen.h>
132#include <linux/slab.h> 166#include <linux/slab.h>
133static int __init xlated_setup_gnttab_pages(void) 167static int __init xlated_setup_gnttab_pages(void)
134{ 168{
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 97d87659f779..28c7e0be56e4 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -105,6 +105,14 @@ static inline void __init xen_init_apic(void)
105} 105}
106#endif 106#endif
107 107
108#ifdef CONFIG_XEN_EFI
109extern void xen_efi_init(void);
110#else
111static inline void __init xen_efi_init(void)
112{
113}
114#endif
115
108/* Declare an asm function, along with symbols needed to make it 116/* Declare an asm function, along with symbols needed to make it
109 inlineable */ 117 inlineable */
110#define DECL_ASM(ret, name, ...) \ 118#define DECL_ASM(ret, name, ...) \