aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@linux.intel.com>2014-02-07 14:27:30 -0500
committerH. Peter Anvin <hpa@linux.intel.com>2014-02-07 14:27:30 -0500
commita3b072cd180c12e8fe0ece9487b9065808327640 (patch)
tree62b982041be84748852d77cdf6ca5639ef40858f /arch/x86
parent75a1ba5b2c529db60ca49626bcaf0bddf4548438 (diff)
parent081cd62a010f97b5bc1d2b0cd123c5abc692b68a (diff)
Merge tag 'efi-urgent' into x86/urgent
* Avoid WARN_ON() when mapping BGRT on Baytrail (EFI 32-bit). Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--arch/x86/Makefile22
-rw-r--r--arch/x86/boot/Makefile15
-rw-r--r--arch/x86/boot/cpuflags.c25
-rw-r--r--arch/x86/boot/video.h2
-rw-r--r--arch/x86/crypto/Makefile1
-rw-r--r--arch/x86/crypto/aesni-intel_avx-x86_64.S2811
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c147
-rw-r--r--arch/x86/include/asm/dmi.h6
-rw-r--r--arch/x86/include/asm/fixmap.h59
-rw-r--r--arch/x86/include/asm/hash.h7
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/kvm_para.h33
-rw-r--r--arch/x86/include/asm/paravirt.h2
-rw-r--r--arch/x86/include/asm/paravirt_types.h9
-rw-r--r--arch/x86/include/asm/pci.h3
-rw-r--r--arch/x86/include/asm/pgtable_types.h3
-rw-r--r--arch/x86/include/asm/thread_info.h8
-rw-r--r--arch/x86/include/asm/uv/uv.h2
-rw-r--r--arch/x86/include/asm/vmx.h1
-rw-r--r--arch/x86/include/asm/x86_init.h2
-rw-r--r--arch/x86/include/asm/xen/page.h13
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h13
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/include/uapi/asm/sembuf.h10
-rw-r--r--arch/x86/kernel/acpi/boot.c7
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c4
-rw-r--r--arch/x86/kernel/apic/io_apic.c3
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/check.c2
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c2
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c2
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/kvm.c34
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/vsmp_64.c8
-rw-r--r--arch/x86/kernel/x86_init.c4
-rw-r--r--arch/x86/kvm/Kconfig2
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/i8254.c18
-rw-r--r--arch/x86/kvm/lapic.c9
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c12
-rw-r--r--arch/x86/kvm/paging_tmpl.h8
-rw-r--r--arch/x86/kvm/svm.c15
-rw-r--r--arch/x86/kvm/vmx.c332
-rw-r--r--arch/x86/kvm/x86.c140
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/lguest/boot.c12
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/hash.c88
-rw-r--r--arch/x86/math-emu/errors.c5
-rw-r--r--arch/x86/mm/gup.c8
-rw-r--r--arch/x86/mm/init_32.c2
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/memtest.c2
-rw-r--r--arch/x86/mm/numa.c52
-rw-r--r--arch/x86/mm/srat.c5
-rw-r--r--arch/x86/pci/mmconfig-shared.c1
-rw-r--r--arch/x86/pci/mmconfig_32.c1
-rw-r--r--arch/x86/pci/xen.c2
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c10
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_ipc.h5
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic.h4
-rw-r--r--arch/x86/platform/intel-mid/intel_mid_weak_decls.h6
-rw-r--r--arch/x86/platform/intel-mid/mfld.c6
-rw-r--r--arch/x86/platform/intel-mid/mrfl.c2
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c3
-rw-r--r--arch/x86/platform/uv/uv_nmi.c65
-rw-r--r--arch/x86/realmode/rm/Makefile17
-rw-r--r--arch/x86/tools/relocs.c30
-rw-r--r--arch/x86/tools/relocs.h7
-rw-r--r--arch/x86/tools/relocs_common.c16
-rw-r--r--arch/x86/xen/Kconfig4
-rw-r--r--arch/x86/xen/enlighten.c126
-rw-r--r--arch/x86/xen/grant-table.c64
-rw-r--r--arch/x86/xen/irq.c13
-rw-r--r--arch/x86/xen/mmu.c182
-rw-r--r--arch/x86/xen/p2m.c32
-rw-r--r--arch/x86/xen/platform-pci-unplug.c79
-rw-r--r--arch/x86/xen/setup.c44
-rw-r--r--arch/x86/xen/smp.c49
-rw-r--r--arch/x86/xen/spinlock.c2
-rw-r--r--arch/x86/xen/time.c1
-rw-r--r--arch/x86/xen/xen-head.S25
-rw-r--r--arch/x86/xen/xen-ops.h1
86 files changed, 4228 insertions, 576 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3b6922ebf170..0af5250d914f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,6 +23,7 @@ config X86
23 def_bool y 23 def_bool y
24 select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS 24 select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
25 select ARCH_MIGHT_HAVE_PC_PARPORT 25 select ARCH_MIGHT_HAVE_PC_PARPORT
26 select ARCH_MIGHT_HAVE_PC_SERIO
26 select HAVE_AOUT if X86_32 27 select HAVE_AOUT if X86_32
27 select HAVE_UNSTABLE_SCHED_CLOCK 28 select HAVE_UNSTABLE_SCHED_CLOCK
28 select ARCH_SUPPORTS_NUMA_BALANCING 29 select ARCH_SUPPORTS_NUMA_BALANCING
@@ -279,13 +280,13 @@ config SMP
279 bool "Symmetric multi-processing support" 280 bool "Symmetric multi-processing support"
280 ---help--- 281 ---help---
281 This enables support for systems with more than one CPU. If you have 282 This enables support for systems with more than one CPU. If you have
282 a system with only one CPU, like most personal computers, say N. If 283 a system with only one CPU, say N. If you have a system with more
283 you have a system with more than one CPU, say Y. 284 than one CPU, say Y.
284 285
285 If you say N here, the kernel will run on single and multiprocessor 286 If you say N here, the kernel will run on uni- and multiprocessor
286 machines, but will use only one CPU of a multiprocessor machine. If 287 machines, but will use only one CPU of a multiprocessor machine. If
287 you say Y here, the kernel will run on many, but not all, 288 you say Y here, the kernel will run on many, but not all,
288 singleprocessor machines. On a singleprocessor machine, the kernel 289 uniprocessor machines. On a uniprocessor machine, the kernel
289 will run faster if you say N here. 290 will run faster if you say N here.
290 291
291 Note that if you say Y here and choose architecture "586" or 292 Note that if you say Y here and choose architecture "586" or
@@ -732,6 +733,7 @@ config APB_TIMER
732# The code disables itself when not needed. 733# The code disables itself when not needed.
733config DMI 734config DMI
734 default y 735 default y
736 select DMI_SCAN_MACHINE_NON_EFI_FALLBACK
735 bool "Enable DMI scanning" if EXPERT 737 bool "Enable DMI scanning" if EXPERT
736 ---help--- 738 ---help---
737 Enabled scanning of DMI to identify machine quirks. Say Y 739 Enabled scanning of DMI to identify machine quirks. Say Y
@@ -939,7 +941,7 @@ config X86_ANCIENT_MCE
939 depends on X86_32 && X86_MCE 941 depends on X86_32 && X86_MCE
940 ---help--- 942 ---help---
941 Include support for machine check handling on old Pentium 5 or WinChip 943 Include support for machine check handling on old Pentium 5 or WinChip
942 systems. These typically need to be enabled explicitely on the command 944 systems. These typically need to be enabled explicitly on the command
943 line. 945 line.
944 946
945config X86_MCE_THRESHOLD 947config X86_MCE_THRESHOLD
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 13b22e0f681d..eeda43abed6e 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -11,6 +11,28 @@ else
11 KBUILD_DEFCONFIG := $(ARCH)_defconfig 11 KBUILD_DEFCONFIG := $(ARCH)_defconfig
12endif 12endif
13 13
14# How to compile the 16-bit code. Note we always compile for -march=i386;
15# that way we can complain to the user if the CPU is insufficient.
16#
17# The -m16 option is supported by GCC >= 4.9 and clang >= 3.5. For
18# older versions of GCC, we need to play evil and unreliable tricks to
19# attempt to ensure that our asm(".code16gcc") is first in the asm
20# output.
21CODE16GCC_CFLAGS := -m32 -include $(srctree)/arch/x86/boot/code16gcc.h \
22 $(call cc-option, -fno-toplevel-reorder,\
23 $(call cc-option, -fno-unit-at-a-time))
24M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS))
25
26REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \
27 -DDISABLE_BRANCH_PROFILING \
28 -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
29 -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
30 -mno-mmx -mno-sse \
31 $(call cc-option, -ffreestanding) \
32 $(call cc-option, -fno-stack-protector) \
33 $(call cc-option, -mpreferred-stack-boundary=2)
34export REALMODE_CFLAGS
35
14# BITS is used as extension for files which are available in a 32 bit 36# BITS is used as extension for files which are available in a 32 bit
15# and a 64 bit version to simplify shared Makefiles. 37# and a 64 bit version to simplify shared Makefiles.
16# e.g.: obj-y += foo_$(BITS).o 38# e.g.: obj-y += foo_$(BITS).o
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index de7066918005..878df7e88cd4 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -51,20 +51,7 @@ $(obj)/cpustr.h: $(obj)/mkcpustr FORCE
51 51
52# --------------------------------------------------------------------------- 52# ---------------------------------------------------------------------------
53 53
54# How to compile the 16-bit code. Note we always compile for -march=i386, 54KBUILD_CFLAGS := $(USERINCLUDE) $(REALMODE_CFLAGS) -D_SETUP
55# that way we can complain to the user if the CPU is insufficient.
56KBUILD_CFLAGS := $(USERINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ \
57 -DDISABLE_BRANCH_PROFILING \
58 -Wall -Wstrict-prototypes \
59 -march=i386 -mregparm=3 \
60 -include $(srctree)/$(src)/code16gcc.h \
61 -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
62 -mno-mmx -mno-sse \
63 $(call cc-option, -ffreestanding) \
64 $(call cc-option, -fno-toplevel-reorder,\
65 $(call cc-option, -fno-unit-at-a-time)) \
66 $(call cc-option, -fno-stack-protector) \
67 $(call cc-option, -mpreferred-stack-boundary=2)
68KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ 55KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
69GCOV_PROFILE := n 56GCOV_PROFILE := n
70 57
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index a9fcb7cfb241..431fa5f84537 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -28,20 +28,35 @@ static int has_fpu(void)
28 return fsw == 0 && (fcw & 0x103f) == 0x003f; 28 return fsw == 0 && (fcw & 0x103f) == 0x003f;
29} 29}
30 30
31/*
32 * For building the 16-bit code we want to explicitly specify 32-bit
33 * push/pop operations, rather than just saying 'pushf' or 'popf' and
34 * letting the compiler choose. But this is also included from the
35 * compressed/ directory where it may be 64-bit code, and thus needs
36 * to be 'pushfq' or 'popfq' in that case.
37 */
38#ifdef __x86_64__
39#define PUSHF "pushfq"
40#define POPF "popfq"
41#else
42#define PUSHF "pushfl"
43#define POPF "popfl"
44#endif
45
31int has_eflag(unsigned long mask) 46int has_eflag(unsigned long mask)
32{ 47{
33 unsigned long f0, f1; 48 unsigned long f0, f1;
34 49
35 asm volatile("pushf \n\t" 50 asm volatile(PUSHF " \n\t"
36 "pushf \n\t" 51 PUSHF " \n\t"
37 "pop %0 \n\t" 52 "pop %0 \n\t"
38 "mov %0,%1 \n\t" 53 "mov %0,%1 \n\t"
39 "xor %2,%1 \n\t" 54 "xor %2,%1 \n\t"
40 "push %1 \n\t" 55 "push %1 \n\t"
41 "popf \n\t" 56 POPF " \n\t"
42 "pushf \n\t" 57 PUSHF " \n\t"
43 "pop %1 \n\t" 58 "pop %1 \n\t"
44 "popf" 59 POPF
45 : "=&r" (f0), "=&r" (f1) 60 : "=&r" (f0), "=&r" (f1)
46 : "ri" (mask)); 61 : "ri" (mask));
47 62
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index ff339c5db311..0bb25491262d 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -80,7 +80,7 @@ struct card_info {
80 u16 xmode_n; /* Size of unprobed mode range */ 80 u16 xmode_n; /* Size of unprobed mode range */
81}; 81};
82 82
83#define __videocard struct card_info __attribute__((section(".videocards"))) 83#define __videocard struct card_info __attribute__((used,section(".videocards")))
84extern struct card_info video_cards[], video_cards_end[]; 84extern struct card_info video_cards[], video_cards_end[];
85 85
86int mode_defined(u16 mode); /* video.c */ 86int mode_defined(u16 mode); /* video.c */
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index e0fc24db234a..6ba54d640383 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -76,6 +76,7 @@ ifeq ($(avx2_supported),yes)
76endif 76endif
77 77
78aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o 78aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
79aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o
79ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o 80ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
80sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o 81sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
81crc32c-intel-y := crc32c-intel_glue.o 82crc32c-intel-y := crc32c-intel_glue.o
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
new file mode 100644
index 000000000000..522ab68d1c88
--- /dev/null
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -0,0 +1,2811 @@
1########################################################################
2# Copyright (c) 2013, Intel Corporation
3#
4# This software is available to you under a choice of one of two
5# licenses. You may choose to be licensed under the terms of the GNU
6# General Public License (GPL) Version 2, available from the file
7# COPYING in the main directory of this source tree, or the
8# OpenIB.org BSD license below:
9#
10# Redistribution and use in source and binary forms, with or without
11# modification, are permitted provided that the following conditions are
12# met:
13#
14# * Redistributions of source code must retain the above copyright
15# notice, this list of conditions and the following disclaimer.
16#
17# * Redistributions in binary form must reproduce the above copyright
18# notice, this list of conditions and the following disclaimer in the
19# documentation and/or other materials provided with the
20# distribution.
21#
22# * Neither the name of the Intel Corporation nor the names of its
23# contributors may be used to endorse or promote products derived from
24# this software without specific prior written permission.
25#
26#
27# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
28# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
31# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES# LOSS OF USE, DATA, OR
34# PROFITS# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38########################################################################
39##
40## Authors:
41## Erdinc Ozturk <erdinc.ozturk@intel.com>
42## Vinodh Gopal <vinodh.gopal@intel.com>
43## James Guilford <james.guilford@intel.com>
44## Tim Chen <tim.c.chen@linux.intel.com>
45##
46## References:
47## This code was derived and highly optimized from the code described in paper:
48## Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation
49## on Intel Architecture Processors. August, 2010
50## The details of the implementation is explained in:
51## Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode
52## on Intel Architecture Processors. October, 2012.
53##
54## Assumptions:
55##
56##
57##
58## iv:
59## 0 1 2 3
60## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
61## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
62## | Salt (From the SA) |
63## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
64## | Initialization Vector |
65## | (This is the sequence number from IPSec header) |
66## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
67## | 0x1 |
68## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
69##
70##
71##
72## AAD:
73## AAD padded to 128 bits with 0
74## for example, assume AAD is a u32 vector
75##
76## if AAD is 8 bytes:
77## AAD[3] = {A0, A1}#
78## padded AAD in xmm register = {A1 A0 0 0}
79##
80## 0 1 2 3
81## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
82## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
83## | SPI (A1) |
84## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
85## | 32-bit Sequence Number (A0) |
86## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
87## | 0x0 |
88## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
89##
90## AAD Format with 32-bit Sequence Number
91##
92## if AAD is 12 bytes:
93## AAD[3] = {A0, A1, A2}#
94## padded AAD in xmm register = {A2 A1 A0 0}
95##
96## 0 1 2 3
97## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
98## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
99## | SPI (A2) |
100## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
101## | 64-bit Extended Sequence Number {A1,A0} |
102## | |
103## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
104## | 0x0 |
105## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
106##
107## AAD Format with 64-bit Extended Sequence Number
108##
109##
110## aadLen:
111## from the definition of the spec, aadLen can only be 8 or 12 bytes.
112## The code additionally supports aadLen of length 16 bytes.
113##
114## TLen:
115## from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
116##
117## poly = x^128 + x^127 + x^126 + x^121 + 1
118## throughout the code, one tab and two tab indentations are used. one tab is
119## for GHASH part, two tabs is for AES part.
120##
121
122#include <linux/linkage.h>
123#include <asm/inst.h>
124
125.data
126.align 16
127
128POLY: .octa 0xC2000000000000000000000000000001
129POLY2: .octa 0xC20000000000000000000001C2000000
130TWOONE: .octa 0x00000001000000000000000000000001
131
132# order of these constants should not change.
133# more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F
134
135SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
136SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
137ALL_F: .octa 0xffffffffffffffffffffffffffffffff
138ZERO: .octa 0x00000000000000000000000000000000
139ONE: .octa 0x00000000000000000000000000000001
140ONEf: .octa 0x01000000000000000000000000000000
141
142.text
143
144
145##define the fields of the gcm aes context
146#{
147# u8 expanded_keys[16*11] store expanded keys
148# u8 shifted_hkey_1[16] store HashKey <<1 mod poly here
149# u8 shifted_hkey_2[16] store HashKey^2 <<1 mod poly here
150# u8 shifted_hkey_3[16] store HashKey^3 <<1 mod poly here
151# u8 shifted_hkey_4[16] store HashKey^4 <<1 mod poly here
152# u8 shifted_hkey_5[16] store HashKey^5 <<1 mod poly here
153# u8 shifted_hkey_6[16] store HashKey^6 <<1 mod poly here
154# u8 shifted_hkey_7[16] store HashKey^7 <<1 mod poly here
155# u8 shifted_hkey_8[16] store HashKey^8 <<1 mod poly here
156# u8 shifted_hkey_1_k[16] store XOR HashKey <<1 mod poly here (for Karatsuba purposes)
157# u8 shifted_hkey_2_k[16] store XOR HashKey^2 <<1 mod poly here (for Karatsuba purposes)
158# u8 shifted_hkey_3_k[16] store XOR HashKey^3 <<1 mod poly here (for Karatsuba purposes)
159# u8 shifted_hkey_4_k[16] store XOR HashKey^4 <<1 mod poly here (for Karatsuba purposes)
160# u8 shifted_hkey_5_k[16] store XOR HashKey^5 <<1 mod poly here (for Karatsuba purposes)
161# u8 shifted_hkey_6_k[16] store XOR HashKey^6 <<1 mod poly here (for Karatsuba purposes)
162# u8 shifted_hkey_7_k[16] store XOR HashKey^7 <<1 mod poly here (for Karatsuba purposes)
163# u8 shifted_hkey_8_k[16] store XOR HashKey^8 <<1 mod poly here (for Karatsuba purposes)
164#} gcm_ctx#
165
166HashKey = 16*11 # store HashKey <<1 mod poly here
167HashKey_2 = 16*12 # store HashKey^2 <<1 mod poly here
168HashKey_3 = 16*13 # store HashKey^3 <<1 mod poly here
169HashKey_4 = 16*14 # store HashKey^4 <<1 mod poly here
170HashKey_5 = 16*15 # store HashKey^5 <<1 mod poly here
171HashKey_6 = 16*16 # store HashKey^6 <<1 mod poly here
172HashKey_7 = 16*17 # store HashKey^7 <<1 mod poly here
173HashKey_8 = 16*18 # store HashKey^8 <<1 mod poly here
174HashKey_k = 16*19 # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes)
175HashKey_2_k = 16*20 # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes)
176HashKey_3_k = 16*21 # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes)
177HashKey_4_k = 16*22 # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes)
178HashKey_5_k = 16*23 # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes)
179HashKey_6_k = 16*24 # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes)
180HashKey_7_k = 16*25 # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes)
181HashKey_8_k = 16*26 # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes)
182
183#define arg1 %rdi
184#define arg2 %rsi
185#define arg3 %rdx
186#define arg4 %rcx
187#define arg5 %r8
188#define arg6 %r9
189#define arg7 STACK_OFFSET+8*1(%r14)
190#define arg8 STACK_OFFSET+8*2(%r14)
191#define arg9 STACK_OFFSET+8*3(%r14)
192
193i = 0
194j = 0
195
196out_order = 0
197in_order = 1
198DEC = 0
199ENC = 1
200
201.macro define_reg r n
202reg_\r = %xmm\n
203.endm
204
205.macro setreg
206.altmacro
207define_reg i %i
208define_reg j %j
209.noaltmacro
210.endm
211
212# need to push 4 registers into stack to maintain
213STACK_OFFSET = 8*4
214
215TMP1 = 16*0 # Temporary storage for AAD
216TMP2 = 16*1 # Temporary storage for AES State 2 (State 1 is stored in an XMM register)
217TMP3 = 16*2 # Temporary storage for AES State 3
218TMP4 = 16*3 # Temporary storage for AES State 4
219TMP5 = 16*4 # Temporary storage for AES State 5
220TMP6 = 16*5 # Temporary storage for AES State 6
221TMP7 = 16*6 # Temporary storage for AES State 7
222TMP8 = 16*7 # Temporary storage for AES State 8
223
224VARIABLE_OFFSET = 16*8
225
226################################
227# Utility Macros
228################################
229
230# Encryption of a single block
231.macro ENCRYPT_SINGLE_BLOCK XMM0
232 vpxor (arg1), \XMM0, \XMM0
233 i = 1
234 setreg
235.rep 9
236 vaesenc 16*i(arg1), \XMM0, \XMM0
237 i = (i+1)
238 setreg
239.endr
240 vaesenclast 16*10(arg1), \XMM0, \XMM0
241.endm
242
243#ifdef CONFIG_AS_AVX
244###############################################################################
245# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
246# Input: A and B (128-bits each, bit-reflected)
247# Output: C = A*B*x mod poly, (i.e. >>1 )
248# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
249# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
250###############################################################################
251.macro GHASH_MUL_AVX GH HK T1 T2 T3 T4 T5
252
253 vpshufd $0b01001110, \GH, \T2
254 vpshufd $0b01001110, \HK, \T3
255 vpxor \GH , \T2, \T2 # T2 = (a1+a0)
256 vpxor \HK , \T3, \T3 # T3 = (b1+b0)
257
258 vpclmulqdq $0x11, \HK, \GH, \T1 # T1 = a1*b1
259 vpclmulqdq $0x00, \HK, \GH, \GH # GH = a0*b0
260 vpclmulqdq $0x00, \T3, \T2, \T2 # T2 = (a1+a0)*(b1+b0)
261 vpxor \GH, \T2,\T2
262 vpxor \T1, \T2,\T2 # T2 = a0*b1+a1*b0
263
264 vpslldq $8, \T2,\T3 # shift-L T3 2 DWs
265 vpsrldq $8, \T2,\T2 # shift-R T2 2 DWs
266 vpxor \T3, \GH, \GH
267 vpxor \T2, \T1, \T1 # <T1:GH> = GH x HK
268
269 #first phase of the reduction
270 vpslld $31, \GH, \T2 # packed right shifting << 31
271 vpslld $30, \GH, \T3 # packed right shifting shift << 30
272 vpslld $25, \GH, \T4 # packed right shifting shift << 25
273
274 vpxor \T3, \T2, \T2 # xor the shifted versions
275 vpxor \T4, \T2, \T2
276
277 vpsrldq $4, \T2, \T5 # shift-R T5 1 DW
278
279 vpslldq $12, \T2, \T2 # shift-L T2 3 DWs
280 vpxor \T2, \GH, \GH # first phase of the reduction complete
281
282 #second phase of the reduction
283
284 vpsrld $1,\GH, \T2 # packed left shifting >> 1
285 vpsrld $2,\GH, \T3 # packed left shifting >> 2
286 vpsrld $7,\GH, \T4 # packed left shifting >> 7
287 vpxor \T3, \T2, \T2 # xor the shifted versions
288 vpxor \T4, \T2, \T2
289
290 vpxor \T5, \T2, \T2
291 vpxor \T2, \GH, \GH
292 vpxor \T1, \GH, \GH # the result is in GH
293
294
295.endm
296
297.macro PRECOMPUTE_AVX HK T1 T2 T3 T4 T5 T6
298
299 # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
300 vmovdqa \HK, \T5
301
302 vpshufd $0b01001110, \T5, \T1
303 vpxor \T5, \T1, \T1
304 vmovdqa \T1, HashKey_k(arg1)
305
306 GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly
307 vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly
308 vpshufd $0b01001110, \T5, \T1
309 vpxor \T5, \T1, \T1
310 vmovdqa \T1, HashKey_2_k(arg1)
311
312 GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly
313 vmovdqa \T5, HashKey_3(arg1)
314 vpshufd $0b01001110, \T5, \T1
315 vpxor \T5, \T1, \T1
316 vmovdqa \T1, HashKey_3_k(arg1)
317
318 GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly
319 vmovdqa \T5, HashKey_4(arg1)
320 vpshufd $0b01001110, \T5, \T1
321 vpxor \T5, \T1, \T1
322 vmovdqa \T1, HashKey_4_k(arg1)
323
324 GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly
325 vmovdqa \T5, HashKey_5(arg1)
326 vpshufd $0b01001110, \T5, \T1
327 vpxor \T5, \T1, \T1
328 vmovdqa \T1, HashKey_5_k(arg1)
329
330 GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly
331 vmovdqa \T5, HashKey_6(arg1)
332 vpshufd $0b01001110, \T5, \T1
333 vpxor \T5, \T1, \T1
334 vmovdqa \T1, HashKey_6_k(arg1)
335
336 GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly
337 vmovdqa \T5, HashKey_7(arg1)
338 vpshufd $0b01001110, \T5, \T1
339 vpxor \T5, \T1, \T1
340 vmovdqa \T1, HashKey_7_k(arg1)
341
342 GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly
343 vmovdqa \T5, HashKey_8(arg1)
344 vpshufd $0b01001110, \T5, \T1
345 vpxor \T5, \T1, \T1
346 vmovdqa \T1, HashKey_8_k(arg1)
347
348.endm
349
350## if a = number of total plaintext bytes
351## b = floor(a/16)
352## num_initial_blocks = b mod 4#
353## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
354## r10, r11, r12, rax are clobbered
355## arg1, arg2, arg3, r14 are used as a pointer only, not modified
356
357.macro INITIAL_BLOCKS_AVX num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC
358 i = (8-\num_initial_blocks)
359 setreg
360
361 mov arg6, %r10 # r10 = AAD
362 mov arg7, %r12 # r12 = aadLen
363
364
365 mov %r12, %r11
366
367 vpxor reg_i, reg_i, reg_i
368_get_AAD_loop\@:
369 vmovd (%r10), \T1
370 vpslldq $12, \T1, \T1
371 vpsrldq $4, reg_i, reg_i
372 vpxor \T1, reg_i, reg_i
373
374 add $4, %r10
375 sub $4, %r12
376 jg _get_AAD_loop\@
377
378
379 cmp $16, %r11
380 je _get_AAD_loop2_done\@
381 mov $16, %r12
382
383_get_AAD_loop2\@:
384 vpsrldq $4, reg_i, reg_i
385 sub $4, %r12
386 cmp %r11, %r12
387 jg _get_AAD_loop2\@
388
389_get_AAD_loop2_done\@:
390
391 #byte-reflect the AAD data
392 vpshufb SHUF_MASK(%rip), reg_i, reg_i
393
394 # initialize the data pointer offset as zero
395 xor %r11, %r11
396
397 # start AES for num_initial_blocks blocks
398 mov arg5, %rax # rax = *Y0
399 vmovdqu (%rax), \CTR # CTR = Y0
400 vpshufb SHUF_MASK(%rip), \CTR, \CTR
401
402
403 i = (9-\num_initial_blocks)
404 setreg
405.rep \num_initial_blocks
406 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
407 vmovdqa \CTR, reg_i
408 vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap
409 i = (i+1)
410 setreg
411.endr
412
413 vmovdqa (arg1), \T_key
414 i = (9-\num_initial_blocks)
415 setreg
416.rep \num_initial_blocks
417 vpxor \T_key, reg_i, reg_i
418 i = (i+1)
419 setreg
420.endr
421
422 j = 1
423 setreg
424.rep 9
425 vmovdqa 16*j(arg1), \T_key
426 i = (9-\num_initial_blocks)
427 setreg
428.rep \num_initial_blocks
429 vaesenc \T_key, reg_i, reg_i
430 i = (i+1)
431 setreg
432.endr
433
434 j = (j+1)
435 setreg
436.endr
437
438
439 vmovdqa 16*10(arg1), \T_key
440 i = (9-\num_initial_blocks)
441 setreg
442.rep \num_initial_blocks
443 vaesenclast \T_key, reg_i, reg_i
444 i = (i+1)
445 setreg
446.endr
447
448 i = (9-\num_initial_blocks)
449 setreg
450.rep \num_initial_blocks
451 vmovdqu (arg3, %r11), \T1
452 vpxor \T1, reg_i, reg_i
453 vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for num_initial_blocks blocks
454 add $16, %r11
455.if \ENC_DEC == DEC
456 vmovdqa \T1, reg_i
457.endif
458 vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations
459 i = (i+1)
460 setreg
461.endr
462
463
464 i = (8-\num_initial_blocks)
465 j = (9-\num_initial_blocks)
466 setreg
467 GHASH_MUL_AVX reg_i, \T2, \T1, \T3, \T4, \T5, \T6
468
469.rep \num_initial_blocks
470 vpxor reg_i, reg_j, reg_j
471 GHASH_MUL_AVX reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks
472 i = (i+1)
473 j = (j+1)
474 setreg
475.endr
476 # XMM8 has the combined result here
477
478 vmovdqa \XMM8, TMP1(%rsp)
479 vmovdqa \XMM8, \T3
480
481 cmp $128, %r13
482 jl _initial_blocks_done\@ # no need for precomputed constants
483
484###############################################################################
485# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
486 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
487 vmovdqa \CTR, \XMM1
488 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
489
490 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
491 vmovdqa \CTR, \XMM2
492 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
493
494 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
495 vmovdqa \CTR, \XMM3
496 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
497
498 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
499 vmovdqa \CTR, \XMM4
500 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
501
502 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
503 vmovdqa \CTR, \XMM5
504 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
505
506 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
507 vmovdqa \CTR, \XMM6
508 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
509
510 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
511 vmovdqa \CTR, \XMM7
512 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
513
514 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
515 vmovdqa \CTR, \XMM8
516 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
517
518 vmovdqa (arg1), \T_key
519 vpxor \T_key, \XMM1, \XMM1
520 vpxor \T_key, \XMM2, \XMM2
521 vpxor \T_key, \XMM3, \XMM3
522 vpxor \T_key, \XMM4, \XMM4
523 vpxor \T_key, \XMM5, \XMM5
524 vpxor \T_key, \XMM6, \XMM6
525 vpxor \T_key, \XMM7, \XMM7
526 vpxor \T_key, \XMM8, \XMM8
527
528 i = 1
529 setreg
530.rep 9 # do 9 rounds
531 vmovdqa 16*i(arg1), \T_key
532 vaesenc \T_key, \XMM1, \XMM1
533 vaesenc \T_key, \XMM2, \XMM2
534 vaesenc \T_key, \XMM3, \XMM3
535 vaesenc \T_key, \XMM4, \XMM4
536 vaesenc \T_key, \XMM5, \XMM5
537 vaesenc \T_key, \XMM6, \XMM6
538 vaesenc \T_key, \XMM7, \XMM7
539 vaesenc \T_key, \XMM8, \XMM8
540 i = (i+1)
541 setreg
542.endr
543
544
545 vmovdqa 16*i(arg1), \T_key
546 vaesenclast \T_key, \XMM1, \XMM1
547 vaesenclast \T_key, \XMM2, \XMM2
548 vaesenclast \T_key, \XMM3, \XMM3
549 vaesenclast \T_key, \XMM4, \XMM4
550 vaesenclast \T_key, \XMM5, \XMM5
551 vaesenclast \T_key, \XMM6, \XMM6
552 vaesenclast \T_key, \XMM7, \XMM7
553 vaesenclast \T_key, \XMM8, \XMM8
554
555 vmovdqu (arg3, %r11), \T1
556 vpxor \T1, \XMM1, \XMM1
557 vmovdqu \XMM1, (arg2 , %r11)
558 .if \ENC_DEC == DEC
559 vmovdqa \T1, \XMM1
560 .endif
561
562 vmovdqu 16*1(arg3, %r11), \T1
563 vpxor \T1, \XMM2, \XMM2
564 vmovdqu \XMM2, 16*1(arg2 , %r11)
565 .if \ENC_DEC == DEC
566 vmovdqa \T1, \XMM2
567 .endif
568
569 vmovdqu 16*2(arg3, %r11), \T1
570 vpxor \T1, \XMM3, \XMM3
571 vmovdqu \XMM3, 16*2(arg2 , %r11)
572 .if \ENC_DEC == DEC
573 vmovdqa \T1, \XMM3
574 .endif
575
576 vmovdqu 16*3(arg3, %r11), \T1
577 vpxor \T1, \XMM4, \XMM4
578 vmovdqu \XMM4, 16*3(arg2 , %r11)
579 .if \ENC_DEC == DEC
580 vmovdqa \T1, \XMM4
581 .endif
582
583 vmovdqu 16*4(arg3, %r11), \T1
584 vpxor \T1, \XMM5, \XMM5
585 vmovdqu \XMM5, 16*4(arg2 , %r11)
586 .if \ENC_DEC == DEC
587 vmovdqa \T1, \XMM5
588 .endif
589
590 vmovdqu 16*5(arg3, %r11), \T1
591 vpxor \T1, \XMM6, \XMM6
592 vmovdqu \XMM6, 16*5(arg2 , %r11)
593 .if \ENC_DEC == DEC
594 vmovdqa \T1, \XMM6
595 .endif
596
597 vmovdqu 16*6(arg3, %r11), \T1
598 vpxor \T1, \XMM7, \XMM7
599 vmovdqu \XMM7, 16*6(arg2 , %r11)
600 .if \ENC_DEC == DEC
601 vmovdqa \T1, \XMM7
602 .endif
603
604 vmovdqu 16*7(arg3, %r11), \T1
605 vpxor \T1, \XMM8, \XMM8
606 vmovdqu \XMM8, 16*7(arg2 , %r11)
607 .if \ENC_DEC == DEC
608 vmovdqa \T1, \XMM8
609 .endif
610
611 add $128, %r11
612
613 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
614 vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with the corresponding ciphertext
615 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
616 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
617 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
618 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
619 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
620 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
621 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
622
623###############################################################################
624
625_initial_blocks_done\@:
626
627.endm
628
629# encrypt 8 blocks at a time
630# ghash the 8 previously encrypted ciphertext blocks
631# arg1, arg2, arg3 are used as pointers only, not modified
632# r11 is the data offset value
633.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
634
635 vmovdqa \XMM1, \T2
636 vmovdqa \XMM2, TMP2(%rsp)
637 vmovdqa \XMM3, TMP3(%rsp)
638 vmovdqa \XMM4, TMP4(%rsp)
639 vmovdqa \XMM5, TMP5(%rsp)
640 vmovdqa \XMM6, TMP6(%rsp)
641 vmovdqa \XMM7, TMP7(%rsp)
642 vmovdqa \XMM8, TMP8(%rsp)
643
644.if \loop_idx == in_order
645 vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT
646 vpaddd ONE(%rip), \XMM1, \XMM2
647 vpaddd ONE(%rip), \XMM2, \XMM3
648 vpaddd ONE(%rip), \XMM3, \XMM4
649 vpaddd ONE(%rip), \XMM4, \XMM5
650 vpaddd ONE(%rip), \XMM5, \XMM6
651 vpaddd ONE(%rip), \XMM6, \XMM7
652 vpaddd ONE(%rip), \XMM7, \XMM8
653 vmovdqa \XMM8, \CTR
654
655 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
656 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
657 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
658 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
659 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
660 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
661 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
662 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
663.else
664 vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT
665 vpaddd ONEf(%rip), \XMM1, \XMM2
666 vpaddd ONEf(%rip), \XMM2, \XMM3
667 vpaddd ONEf(%rip), \XMM3, \XMM4
668 vpaddd ONEf(%rip), \XMM4, \XMM5
669 vpaddd ONEf(%rip), \XMM5, \XMM6
670 vpaddd ONEf(%rip), \XMM6, \XMM7
671 vpaddd ONEf(%rip), \XMM7, \XMM8
672 vmovdqa \XMM8, \CTR
673.endif
674
675
676 #######################################################################
677
678 vmovdqu (arg1), \T1
679 vpxor \T1, \XMM1, \XMM1
680 vpxor \T1, \XMM2, \XMM2
681 vpxor \T1, \XMM3, \XMM3
682 vpxor \T1, \XMM4, \XMM4
683 vpxor \T1, \XMM5, \XMM5
684 vpxor \T1, \XMM6, \XMM6
685 vpxor \T1, \XMM7, \XMM7
686 vpxor \T1, \XMM8, \XMM8
687
688 #######################################################################
689
690
691
692
693
694 vmovdqu 16*1(arg1), \T1
695 vaesenc \T1, \XMM1, \XMM1
696 vaesenc \T1, \XMM2, \XMM2
697 vaesenc \T1, \XMM3, \XMM3
698 vaesenc \T1, \XMM4, \XMM4
699 vaesenc \T1, \XMM5, \XMM5
700 vaesenc \T1, \XMM6, \XMM6
701 vaesenc \T1, \XMM7, \XMM7
702 vaesenc \T1, \XMM8, \XMM8
703
704 vmovdqu 16*2(arg1), \T1
705 vaesenc \T1, \XMM1, \XMM1
706 vaesenc \T1, \XMM2, \XMM2
707 vaesenc \T1, \XMM3, \XMM3
708 vaesenc \T1, \XMM4, \XMM4
709 vaesenc \T1, \XMM5, \XMM5
710 vaesenc \T1, \XMM6, \XMM6
711 vaesenc \T1, \XMM7, \XMM7
712 vaesenc \T1, \XMM8, \XMM8
713
714
715 #######################################################################
716
717 vmovdqa HashKey_8(arg1), \T5
718 vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1
719 vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0
720
721 vpshufd $0b01001110, \T2, \T6
722 vpxor \T2, \T6, \T6
723
724 vmovdqa HashKey_8_k(arg1), \T5
725 vpclmulqdq $0x00, \T5, \T6, \T6
726
727 vmovdqu 16*3(arg1), \T1
728 vaesenc \T1, \XMM1, \XMM1
729 vaesenc \T1, \XMM2, \XMM2
730 vaesenc \T1, \XMM3, \XMM3
731 vaesenc \T1, \XMM4, \XMM4
732 vaesenc \T1, \XMM5, \XMM5
733 vaesenc \T1, \XMM6, \XMM6
734 vaesenc \T1, \XMM7, \XMM7
735 vaesenc \T1, \XMM8, \XMM8
736
737 vmovdqa TMP2(%rsp), \T1
738 vmovdqa HashKey_7(arg1), \T5
739 vpclmulqdq $0x11, \T5, \T1, \T3
740 vpxor \T3, \T4, \T4
741 vpclmulqdq $0x00, \T5, \T1, \T3
742 vpxor \T3, \T7, \T7
743
744 vpshufd $0b01001110, \T1, \T3
745 vpxor \T1, \T3, \T3
746 vmovdqa HashKey_7_k(arg1), \T5
747 vpclmulqdq $0x10, \T5, \T3, \T3
748 vpxor \T3, \T6, \T6
749
750 vmovdqu 16*4(arg1), \T1
751 vaesenc \T1, \XMM1, \XMM1
752 vaesenc \T1, \XMM2, \XMM2
753 vaesenc \T1, \XMM3, \XMM3
754 vaesenc \T1, \XMM4, \XMM4
755 vaesenc \T1, \XMM5, \XMM5
756 vaesenc \T1, \XMM6, \XMM6
757 vaesenc \T1, \XMM7, \XMM7
758 vaesenc \T1, \XMM8, \XMM8
759
760 #######################################################################
761
762 vmovdqa TMP3(%rsp), \T1
763 vmovdqa HashKey_6(arg1), \T5
764 vpclmulqdq $0x11, \T5, \T1, \T3
765 vpxor \T3, \T4, \T4
766 vpclmulqdq $0x00, \T5, \T1, \T3
767 vpxor \T3, \T7, \T7
768
769 vpshufd $0b01001110, \T1, \T3
770 vpxor \T1, \T3, \T3
771 vmovdqa HashKey_6_k(arg1), \T5
772 vpclmulqdq $0x10, \T5, \T3, \T3
773 vpxor \T3, \T6, \T6
774
775 vmovdqu 16*5(arg1), \T1
776 vaesenc \T1, \XMM1, \XMM1
777 vaesenc \T1, \XMM2, \XMM2
778 vaesenc \T1, \XMM3, \XMM3
779 vaesenc \T1, \XMM4, \XMM4
780 vaesenc \T1, \XMM5, \XMM5
781 vaesenc \T1, \XMM6, \XMM6
782 vaesenc \T1, \XMM7, \XMM7
783 vaesenc \T1, \XMM8, \XMM8
784
785 vmovdqa TMP4(%rsp), \T1
786 vmovdqa HashKey_5(arg1), \T5
787 vpclmulqdq $0x11, \T5, \T1, \T3
788 vpxor \T3, \T4, \T4
789 vpclmulqdq $0x00, \T5, \T1, \T3
790 vpxor \T3, \T7, \T7
791
792 vpshufd $0b01001110, \T1, \T3
793 vpxor \T1, \T3, \T3
794 vmovdqa HashKey_5_k(arg1), \T5
795 vpclmulqdq $0x10, \T5, \T3, \T3
796 vpxor \T3, \T6, \T6
797
798 vmovdqu 16*6(arg1), \T1
799 vaesenc \T1, \XMM1, \XMM1
800 vaesenc \T1, \XMM2, \XMM2
801 vaesenc \T1, \XMM3, \XMM3
802 vaesenc \T1, \XMM4, \XMM4
803 vaesenc \T1, \XMM5, \XMM5
804 vaesenc \T1, \XMM6, \XMM6
805 vaesenc \T1, \XMM7, \XMM7
806 vaesenc \T1, \XMM8, \XMM8
807
808
809 vmovdqa TMP5(%rsp), \T1
810 vmovdqa HashKey_4(arg1), \T5
811 vpclmulqdq $0x11, \T5, \T1, \T3
812 vpxor \T3, \T4, \T4
813 vpclmulqdq $0x00, \T5, \T1, \T3
814 vpxor \T3, \T7, \T7
815
816 vpshufd $0b01001110, \T1, \T3
817 vpxor \T1, \T3, \T3
818 vmovdqa HashKey_4_k(arg1), \T5
819 vpclmulqdq $0x10, \T5, \T3, \T3
820 vpxor \T3, \T6, \T6
821
822 vmovdqu 16*7(arg1), \T1
823 vaesenc \T1, \XMM1, \XMM1
824 vaesenc \T1, \XMM2, \XMM2
825 vaesenc \T1, \XMM3, \XMM3
826 vaesenc \T1, \XMM4, \XMM4
827 vaesenc \T1, \XMM5, \XMM5
828 vaesenc \T1, \XMM6, \XMM6
829 vaesenc \T1, \XMM7, \XMM7
830 vaesenc \T1, \XMM8, \XMM8
831
832 vmovdqa TMP6(%rsp), \T1
833 vmovdqa HashKey_3(arg1), \T5
834 vpclmulqdq $0x11, \T5, \T1, \T3
835 vpxor \T3, \T4, \T4
836 vpclmulqdq $0x00, \T5, \T1, \T3
837 vpxor \T3, \T7, \T7
838
839 vpshufd $0b01001110, \T1, \T3
840 vpxor \T1, \T3, \T3
841 vmovdqa HashKey_3_k(arg1), \T5
842 vpclmulqdq $0x10, \T5, \T3, \T3
843 vpxor \T3, \T6, \T6
844
845
846 vmovdqu 16*8(arg1), \T1
847 vaesenc \T1, \XMM1, \XMM1
848 vaesenc \T1, \XMM2, \XMM2
849 vaesenc \T1, \XMM3, \XMM3
850 vaesenc \T1, \XMM4, \XMM4
851 vaesenc \T1, \XMM5, \XMM5
852 vaesenc \T1, \XMM6, \XMM6
853 vaesenc \T1, \XMM7, \XMM7
854 vaesenc \T1, \XMM8, \XMM8
855
856 vmovdqa TMP7(%rsp), \T1
857 vmovdqa HashKey_2(arg1), \T5
858 vpclmulqdq $0x11, \T5, \T1, \T3
859 vpxor \T3, \T4, \T4
860 vpclmulqdq $0x00, \T5, \T1, \T3
861 vpxor \T3, \T7, \T7
862
863 vpshufd $0b01001110, \T1, \T3
864 vpxor \T1, \T3, \T3
865 vmovdqa HashKey_2_k(arg1), \T5
866 vpclmulqdq $0x10, \T5, \T3, \T3
867 vpxor \T3, \T6, \T6
868
869 #######################################################################
870
871 vmovdqu 16*9(arg1), \T5
872 vaesenc \T5, \XMM1, \XMM1
873 vaesenc \T5, \XMM2, \XMM2
874 vaesenc \T5, \XMM3, \XMM3
875 vaesenc \T5, \XMM4, \XMM4
876 vaesenc \T5, \XMM5, \XMM5
877 vaesenc \T5, \XMM6, \XMM6
878 vaesenc \T5, \XMM7, \XMM7
879 vaesenc \T5, \XMM8, \XMM8
880
881 vmovdqa TMP8(%rsp), \T1
882 vmovdqa HashKey(arg1), \T5
883 vpclmulqdq $0x11, \T5, \T1, \T3
884 vpxor \T3, \T4, \T4
885 vpclmulqdq $0x00, \T5, \T1, \T3
886 vpxor \T3, \T7, \T7
887
888 vpshufd $0b01001110, \T1, \T3
889 vpxor \T1, \T3, \T3
890 vmovdqa HashKey_k(arg1), \T5
891 vpclmulqdq $0x10, \T5, \T3, \T3
892 vpxor \T3, \T6, \T6
893
894 vpxor \T4, \T6, \T6
895 vpxor \T7, \T6, \T6
896
897 vmovdqu 16*10(arg1), \T5
898
899 i = 0
900 j = 1
901 setreg
902.rep 8
903 vpxor 16*i(arg3, %r11), \T5, \T2
904 .if \ENC_DEC == ENC
905 vaesenclast \T2, reg_j, reg_j
906 .else
907 vaesenclast \T2, reg_j, \T3
908 vmovdqu 16*i(arg3, %r11), reg_j
909 vmovdqu \T3, 16*i(arg2, %r11)
910 .endif
911 i = (i+1)
912 j = (j+1)
913 setreg
914.endr
915 #######################################################################
916
917
918 vpslldq $8, \T6, \T3 # shift-L T3 2 DWs
919 vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs
920 vpxor \T3, \T7, \T7
921 vpxor \T4, \T6, \T6 # accumulate the results in T6:T7
922
923
924
925 #######################################################################
926 #first phase of the reduction
927 #######################################################################
928 vpslld $31, \T7, \T2 # packed right shifting << 31
929 vpslld $30, \T7, \T3 # packed right shifting shift << 30
930 vpslld $25, \T7, \T4 # packed right shifting shift << 25
931
932 vpxor \T3, \T2, \T2 # xor the shifted versions
933 vpxor \T4, \T2, \T2
934
935 vpsrldq $4, \T2, \T1 # shift-R T1 1 DW
936
937 vpslldq $12, \T2, \T2 # shift-L T2 3 DWs
938 vpxor \T2, \T7, \T7 # first phase of the reduction complete
939 #######################################################################
940 .if \ENC_DEC == ENC
941 vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer
942 vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer
943 vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer
944 vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer
945 vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer
946 vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer
947 vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer
948 vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer
949 .endif
950
951 #######################################################################
952 #second phase of the reduction
953 vpsrld $1, \T7, \T2 # packed left shifting >> 1
954 vpsrld $2, \T7, \T3 # packed left shifting >> 2
955 vpsrld $7, \T7, \T4 # packed left shifting >> 7
956 vpxor \T3, \T2, \T2 # xor the shifted versions
957 vpxor \T4, \T2, \T2
958
959 vpxor \T1, \T2, \T2
960 vpxor \T2, \T7, \T7
961 vpxor \T7, \T6, \T6 # the result is in T6
962 #######################################################################
963
964 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
965 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
966 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
967 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
968 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
969 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
970 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
971 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
972
973
974 vpxor \T6, \XMM1, \XMM1
975
976
977
978.endm
979
980
981# GHASH the last 4 ciphertext blocks.
982.macro GHASH_LAST_8_AVX T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8
983
984 ## Karatsuba Method
985
986
987 vpshufd $0b01001110, \XMM1, \T2
988 vpxor \XMM1, \T2, \T2
989 vmovdqa HashKey_8(arg1), \T5
990 vpclmulqdq $0x11, \T5, \XMM1, \T6
991 vpclmulqdq $0x00, \T5, \XMM1, \T7
992
993 vmovdqa HashKey_8_k(arg1), \T3
994 vpclmulqdq $0x00, \T3, \T2, \XMM1
995
996 ######################
997
998 vpshufd $0b01001110, \XMM2, \T2
999 vpxor \XMM2, \T2, \T2
1000 vmovdqa HashKey_7(arg1), \T5
1001 vpclmulqdq $0x11, \T5, \XMM2, \T4
1002 vpxor \T4, \T6, \T6
1003
1004 vpclmulqdq $0x00, \T5, \XMM2, \T4
1005 vpxor \T4, \T7, \T7
1006
1007 vmovdqa HashKey_7_k(arg1), \T3
1008 vpclmulqdq $0x00, \T3, \T2, \T2
1009 vpxor \T2, \XMM1, \XMM1
1010
1011 ######################
1012
1013 vpshufd $0b01001110, \XMM3, \T2
1014 vpxor \XMM3, \T2, \T2
1015 vmovdqa HashKey_6(arg1), \T5
1016 vpclmulqdq $0x11, \T5, \XMM3, \T4
1017 vpxor \T4, \T6, \T6
1018
1019 vpclmulqdq $0x00, \T5, \XMM3, \T4
1020 vpxor \T4, \T7, \T7
1021
1022 vmovdqa HashKey_6_k(arg1), \T3
1023 vpclmulqdq $0x00, \T3, \T2, \T2
1024 vpxor \T2, \XMM1, \XMM1
1025
1026 ######################
1027
1028 vpshufd $0b01001110, \XMM4, \T2
1029 vpxor \XMM4, \T2, \T2
1030 vmovdqa HashKey_5(arg1), \T5
1031 vpclmulqdq $0x11, \T5, \XMM4, \T4
1032 vpxor \T4, \T6, \T6
1033
1034 vpclmulqdq $0x00, \T5, \XMM4, \T4
1035 vpxor \T4, \T7, \T7
1036
1037 vmovdqa HashKey_5_k(arg1), \T3
1038 vpclmulqdq $0x00, \T3, \T2, \T2
1039 vpxor \T2, \XMM1, \XMM1
1040
1041 ######################
1042
1043 vpshufd $0b01001110, \XMM5, \T2
1044 vpxor \XMM5, \T2, \T2
1045 vmovdqa HashKey_4(arg1), \T5
1046 vpclmulqdq $0x11, \T5, \XMM5, \T4
1047 vpxor \T4, \T6, \T6
1048
1049 vpclmulqdq $0x00, \T5, \XMM5, \T4
1050 vpxor \T4, \T7, \T7
1051
1052 vmovdqa HashKey_4_k(arg1), \T3
1053 vpclmulqdq $0x00, \T3, \T2, \T2
1054 vpxor \T2, \XMM1, \XMM1
1055
1056 ######################
1057
1058 vpshufd $0b01001110, \XMM6, \T2
1059 vpxor \XMM6, \T2, \T2
1060 vmovdqa HashKey_3(arg1), \T5
1061 vpclmulqdq $0x11, \T5, \XMM6, \T4
1062 vpxor \T4, \T6, \T6
1063
1064 vpclmulqdq $0x00, \T5, \XMM6, \T4
1065 vpxor \T4, \T7, \T7
1066
1067 vmovdqa HashKey_3_k(arg1), \T3
1068 vpclmulqdq $0x00, \T3, \T2, \T2
1069 vpxor \T2, \XMM1, \XMM1
1070
1071 ######################
1072
1073 vpshufd $0b01001110, \XMM7, \T2
1074 vpxor \XMM7, \T2, \T2
1075 vmovdqa HashKey_2(arg1), \T5
1076 vpclmulqdq $0x11, \T5, \XMM7, \T4
1077 vpxor \T4, \T6, \T6
1078
1079 vpclmulqdq $0x00, \T5, \XMM7, \T4
1080 vpxor \T4, \T7, \T7
1081
1082 vmovdqa HashKey_2_k(arg1), \T3
1083 vpclmulqdq $0x00, \T3, \T2, \T2
1084 vpxor \T2, \XMM1, \XMM1
1085
1086 ######################
1087
1088 vpshufd $0b01001110, \XMM8, \T2
1089 vpxor \XMM8, \T2, \T2
1090 vmovdqa HashKey(arg1), \T5
1091 vpclmulqdq $0x11, \T5, \XMM8, \T4
1092 vpxor \T4, \T6, \T6
1093
1094 vpclmulqdq $0x00, \T5, \XMM8, \T4
1095 vpxor \T4, \T7, \T7
1096
1097 vmovdqa HashKey_k(arg1), \T3
1098 vpclmulqdq $0x00, \T3, \T2, \T2
1099
1100 vpxor \T2, \XMM1, \XMM1
1101 vpxor \T6, \XMM1, \XMM1
1102 vpxor \T7, \XMM1, \T2
1103
1104
1105
1106
1107 vpslldq $8, \T2, \T4
1108 vpsrldq $8, \T2, \T2
1109
1110 vpxor \T4, \T7, \T7
1111 vpxor \T2, \T6, \T6 # <T6:T7> holds the result of
1112 # the accumulated carry-less multiplications
1113
1114 #######################################################################
1115 #first phase of the reduction
1116 vpslld $31, \T7, \T2 # packed right shifting << 31
1117 vpslld $30, \T7, \T3 # packed right shifting shift << 30
1118 vpslld $25, \T7, \T4 # packed right shifting shift << 25
1119
1120 vpxor \T3, \T2, \T2 # xor the shifted versions
1121 vpxor \T4, \T2, \T2
1122
1123 vpsrldq $4, \T2, \T1 # shift-R T1 1 DW
1124
1125 vpslldq $12, \T2, \T2 # shift-L T2 3 DWs
1126 vpxor \T2, \T7, \T7 # first phase of the reduction complete
1127 #######################################################################
1128
1129
1130 #second phase of the reduction
1131 vpsrld $1, \T7, \T2 # packed left shifting >> 1
1132 vpsrld $2, \T7, \T3 # packed left shifting >> 2
1133 vpsrld $7, \T7, \T4 # packed left shifting >> 7
1134 vpxor \T3, \T2, \T2 # xor the shifted versions
1135 vpxor \T4, \T2, \T2
1136
1137 vpxor \T1, \T2, \T2
1138 vpxor \T2, \T7, \T7
1139 vpxor \T7, \T6, \T6 # the result is in T6
1140
1141.endm
1142
1143
1144# combined for GCM encrypt and decrypt functions
1145# clobbering all xmm registers
1146# clobbering r10, r11, r12, r13, r14, r15
1147.macro GCM_ENC_DEC_AVX ENC_DEC
1148
1149 #the number of pushes must equal STACK_OFFSET
1150 push %r12
1151 push %r13
1152 push %r14
1153 push %r15
1154
1155 mov %rsp, %r14
1156
1157
1158
1159
1160 sub $VARIABLE_OFFSET, %rsp
1161 and $~63, %rsp # align rsp to 64 bytes
1162
1163
1164 vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey
1165
1166 mov arg4, %r13 # save the number of bytes of plaintext/ciphertext
1167 and $-16, %r13 # r13 = r13 - (r13 mod 16)
1168
1169 mov %r13, %r12
1170 shr $4, %r12
1171 and $7, %r12
1172 jz _initial_num_blocks_is_0\@
1173
1174 cmp $7, %r12
1175 je _initial_num_blocks_is_7\@
1176 cmp $6, %r12
1177 je _initial_num_blocks_is_6\@
1178 cmp $5, %r12
1179 je _initial_num_blocks_is_5\@
1180 cmp $4, %r12
1181 je _initial_num_blocks_is_4\@
1182 cmp $3, %r12
1183 je _initial_num_blocks_is_3\@
1184 cmp $2, %r12
1185 je _initial_num_blocks_is_2\@
1186
1187 jmp _initial_num_blocks_is_1\@
1188
1189_initial_num_blocks_is_7\@:
1190 INITIAL_BLOCKS_AVX 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1191 sub $16*7, %r13
1192 jmp _initial_blocks_encrypted\@
1193
1194_initial_num_blocks_is_6\@:
1195 INITIAL_BLOCKS_AVX 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1196 sub $16*6, %r13
1197 jmp _initial_blocks_encrypted\@
1198
1199_initial_num_blocks_is_5\@:
1200 INITIAL_BLOCKS_AVX 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1201 sub $16*5, %r13
1202 jmp _initial_blocks_encrypted\@
1203
1204_initial_num_blocks_is_4\@:
1205 INITIAL_BLOCKS_AVX 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1206 sub $16*4, %r13
1207 jmp _initial_blocks_encrypted\@
1208
1209_initial_num_blocks_is_3\@:
1210 INITIAL_BLOCKS_AVX 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1211 sub $16*3, %r13
1212 jmp _initial_blocks_encrypted\@
1213
1214_initial_num_blocks_is_2\@:
1215 INITIAL_BLOCKS_AVX 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1216 sub $16*2, %r13
1217 jmp _initial_blocks_encrypted\@
1218
1219_initial_num_blocks_is_1\@:
1220 INITIAL_BLOCKS_AVX 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1221 sub $16*1, %r13
1222 jmp _initial_blocks_encrypted\@
1223
1224_initial_num_blocks_is_0\@:
1225 INITIAL_BLOCKS_AVX 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1226
1227
1228_initial_blocks_encrypted\@:
1229 cmp $0, %r13
1230 je _zero_cipher_left\@
1231
1232 sub $128, %r13
1233 je _eight_cipher_left\@
1234
1235
1236
1237
1238 vmovd %xmm9, %r15d
1239 and $255, %r15d
1240 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1241
1242
1243_encrypt_by_8_new\@:
1244 cmp $(255-8), %r15d
1245 jg _encrypt_by_8\@
1246
1247
1248
1249 add $8, %r15b
1250 GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC
1251 add $128, %r11
1252 sub $128, %r13
1253 jne _encrypt_by_8_new\@
1254
1255 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1256 jmp _eight_cipher_left\@
1257
1258_encrypt_by_8\@:
1259 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1260 add $8, %r15b
1261 GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC
1262 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1263 add $128, %r11
1264 sub $128, %r13
1265 jne _encrypt_by_8_new\@
1266
1267 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1268
1269
1270
1271
1272_eight_cipher_left\@:
1273 GHASH_LAST_8_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8
1274
1275
1276_zero_cipher_left\@:
1277 cmp $16, arg4
1278 jl _only_less_than_16\@
1279
1280 mov arg4, %r13
1281 and $15, %r13 # r13 = (arg4 mod 16)
1282
1283 je _multiple_of_16_bytes\@
1284
1285 # handle the last <16 Byte block seperately
1286
1287
1288 vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
1289 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1290 ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn)
1291
1292 sub $16, %r11
1293 add %r13, %r11
1294 vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block
1295
1296 lea SHIFT_MASK+16(%rip), %r12
1297 sub %r13, %r12 # adjust the shuffle mask pointer to be
1298 # able to shift 16-r13 bytes (r13 is the
1299 # number of bytes in plaintext mod 16)
1300 vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask
1301 vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes
1302 jmp _final_ghash_mul\@
1303
1304_only_less_than_16\@:
1305 # check for 0 length
1306 mov arg4, %r13
1307 and $15, %r13 # r13 = (arg4 mod 16)
1308
1309 je _multiple_of_16_bytes\@
1310
1311 # handle the last <16 Byte block seperately
1312
1313
1314 vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
1315 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1316 ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn)
1317
1318
1319 lea SHIFT_MASK+16(%rip), %r12
1320 sub %r13, %r12 # adjust the shuffle mask pointer to be
1321 # able to shift 16-r13 bytes (r13 is the
1322 # number of bytes in plaintext mod 16)
1323
1324_get_last_16_byte_loop\@:
1325 movb (arg3, %r11), %al
1326 movb %al, TMP1 (%rsp , %r11)
1327 add $1, %r11
1328 cmp %r13, %r11
1329 jne _get_last_16_byte_loop\@
1330
1331 vmovdqu TMP1(%rsp), %xmm1
1332
1333 sub $16, %r11
1334
1335_final_ghash_mul\@:
1336 .if \ENC_DEC == DEC
1337 vmovdqa %xmm1, %xmm2
1338 vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
1339 vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to
1340 # mask out top 16-r13 bytes of xmm9
1341 vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9
1342 vpand %xmm1, %xmm2, %xmm2
1343 vpshufb SHUF_MASK(%rip), %xmm2, %xmm2
1344 vpxor %xmm2, %xmm14, %xmm14
1345 #GHASH computation for the last <16 Byte block
1346 GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
1347 sub %r13, %r11
1348 add $16, %r11
1349 .else
1350 vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
1351 vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to
1352 # mask out top 16-r13 bytes of xmm9
1353 vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9
1354 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1355 vpxor %xmm9, %xmm14, %xmm14
1356 #GHASH computation for the last <16 Byte block
1357 GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
1358 sub %r13, %r11
1359 add $16, %r11
1360 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext
1361 .endif
1362
1363
1364 #############################
1365 # output r13 Bytes
1366 vmovq %xmm9, %rax
1367 cmp $8, %r13
1368 jle _less_than_8_bytes_left\@
1369
1370 mov %rax, (arg2 , %r11)
1371 add $8, %r11
1372 vpsrldq $8, %xmm9, %xmm9
1373 vmovq %xmm9, %rax
1374 sub $8, %r13
1375
1376_less_than_8_bytes_left\@:
1377 movb %al, (arg2 , %r11)
1378 add $1, %r11
1379 shr $8, %rax
1380 sub $1, %r13
1381 jne _less_than_8_bytes_left\@
1382 #############################
1383
1384_multiple_of_16_bytes\@:
1385 mov arg7, %r12 # r12 = aadLen (number of bytes)
1386 shl $3, %r12 # convert into number of bits
1387 vmovd %r12d, %xmm15 # len(A) in xmm15
1388
1389 shl $3, arg4 # len(C) in bits (*128)
1390 vmovq arg4, %xmm1
1391 vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000
1392 vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C)
1393
1394 vpxor %xmm15, %xmm14, %xmm14
1395 GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation
1396 vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap
1397
1398 mov arg5, %rax # rax = *Y0
1399 vmovdqu (%rax), %xmm9 # xmm9 = Y0
1400
1401 ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0)
1402
1403 vpxor %xmm14, %xmm9, %xmm9
1404
1405
1406
1407_return_T\@:
1408 mov arg8, %r10 # r10 = authTag
1409 mov arg9, %r11 # r11 = auth_tag_len
1410
1411 cmp $16, %r11
1412 je _T_16\@
1413
1414 cmp $12, %r11
1415 je _T_12\@
1416
1417_T_8\@:
1418 vmovq %xmm9, %rax
1419 mov %rax, (%r10)
1420 jmp _return_T_done\@
1421_T_12\@:
1422 vmovq %xmm9, %rax
1423 mov %rax, (%r10)
1424 vpsrldq $8, %xmm9, %xmm9
1425 vmovd %xmm9, %eax
1426 mov %eax, 8(%r10)
1427 jmp _return_T_done\@
1428
1429_T_16\@:
1430 vmovdqu %xmm9, (%r10)
1431
1432_return_T_done\@:
1433 mov %r14, %rsp
1434
1435 pop %r15
1436 pop %r14
1437 pop %r13
1438 pop %r12
1439.endm
1440
1441
1442#############################################################
1443#void aesni_gcm_precomp_avx_gen2
1444# (gcm_data *my_ctx_data,
1445# u8 *hash_subkey)# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */
1446#############################################################
1447ENTRY(aesni_gcm_precomp_avx_gen2)
1448 #the number of pushes must equal STACK_OFFSET
1449 push %r12
1450 push %r13
1451 push %r14
1452 push %r15
1453
1454 mov %rsp, %r14
1455
1456
1457
1458 sub $VARIABLE_OFFSET, %rsp
1459 and $~63, %rsp # align rsp to 64 bytes
1460
1461 vmovdqu (arg2), %xmm6 # xmm6 = HashKey
1462
1463 vpshufb SHUF_MASK(%rip), %xmm6, %xmm6
1464 ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey
1465 vmovdqa %xmm6, %xmm2
1466 vpsllq $1, %xmm6, %xmm6
1467 vpsrlq $63, %xmm2, %xmm2
1468 vmovdqa %xmm2, %xmm1
1469 vpslldq $8, %xmm2, %xmm2
1470 vpsrldq $8, %xmm1, %xmm1
1471 vpor %xmm2, %xmm6, %xmm6
1472 #reduction
1473 vpshufd $0b00100100, %xmm1, %xmm2
1474 vpcmpeqd TWOONE(%rip), %xmm2, %xmm2
1475 vpand POLY(%rip), %xmm2, %xmm2
1476 vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly
1477 #######################################################################
1478 vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly
1479
1480
1481 PRECOMPUTE_AVX %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
1482
1483 mov %r14, %rsp
1484
1485 pop %r15
1486 pop %r14
1487 pop %r13
1488 pop %r12
1489 ret
1490ENDPROC(aesni_gcm_precomp_avx_gen2)
1491
1492###############################################################################
1493#void aesni_gcm_enc_avx_gen2(
1494# gcm_data *my_ctx_data, /* aligned to 16 Bytes */
1495# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */
1496# const u8 *in, /* Plaintext input */
1497# u64 plaintext_len, /* Length of data in Bytes for encryption. */
1498# u8 *iv, /* Pre-counter block j0: 4 byte salt
1499# (from Security Association) concatenated with 8 byte
1500# Initialisation Vector (from IPSec ESP Payload)
1501# concatenated with 0x00000001. 16-byte aligned pointer. */
1502# const u8 *aad, /* Additional Authentication Data (AAD)*/
1503# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
1504# u8 *auth_tag, /* Authenticated Tag output. */
1505# u64 auth_tag_len)# /* Authenticated Tag Length in bytes.
1506# Valid values are 16 (most likely), 12 or 8. */
1507###############################################################################
1508ENTRY(aesni_gcm_enc_avx_gen2)
1509 GCM_ENC_DEC_AVX ENC
1510 ret
1511ENDPROC(aesni_gcm_enc_avx_gen2)
1512
1513###############################################################################
1514#void aesni_gcm_dec_avx_gen2(
1515# gcm_data *my_ctx_data, /* aligned to 16 Bytes */
1516# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */
1517# const u8 *in, /* Ciphertext input */
1518# u64 plaintext_len, /* Length of data in Bytes for encryption. */
1519# u8 *iv, /* Pre-counter block j0: 4 byte salt
1520# (from Security Association) concatenated with 8 byte
1521# Initialisation Vector (from IPSec ESP Payload)
1522# concatenated with 0x00000001. 16-byte aligned pointer. */
1523# const u8 *aad, /* Additional Authentication Data (AAD)*/
1524# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
1525# u8 *auth_tag, /* Authenticated Tag output. */
1526# u64 auth_tag_len)# /* Authenticated Tag Length in bytes.
1527# Valid values are 16 (most likely), 12 or 8. */
1528###############################################################################
1529ENTRY(aesni_gcm_dec_avx_gen2)
1530 GCM_ENC_DEC_AVX DEC
1531 ret
1532ENDPROC(aesni_gcm_dec_avx_gen2)
1533#endif /* CONFIG_AS_AVX */
1534
1535#ifdef CONFIG_AS_AVX2
1536###############################################################################
1537# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
1538# Input: A and B (128-bits each, bit-reflected)
1539# Output: C = A*B*x mod poly, (i.e. >>1 )
1540# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
1541# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
1542###############################################################################
1543.macro GHASH_MUL_AVX2 GH HK T1 T2 T3 T4 T5
1544
1545 vpclmulqdq $0x11,\HK,\GH,\T1 # T1 = a1*b1
1546 vpclmulqdq $0x00,\HK,\GH,\T2 # T2 = a0*b0
1547 vpclmulqdq $0x01,\HK,\GH,\T3 # T3 = a1*b0
1548 vpclmulqdq $0x10,\HK,\GH,\GH # GH = a0*b1
1549 vpxor \T3, \GH, \GH
1550
1551
1552 vpsrldq $8 , \GH, \T3 # shift-R GH 2 DWs
1553 vpslldq $8 , \GH, \GH # shift-L GH 2 DWs
1554
1555 vpxor \T3, \T1, \T1
1556 vpxor \T2, \GH, \GH
1557
1558 #######################################################################
1559 #first phase of the reduction
1560 vmovdqa POLY2(%rip), \T3
1561
1562 vpclmulqdq $0x01, \GH, \T3, \T2
1563 vpslldq $8, \T2, \T2 # shift-L T2 2 DWs
1564
1565 vpxor \T2, \GH, \GH # first phase of the reduction complete
1566 #######################################################################
1567 #second phase of the reduction
1568 vpclmulqdq $0x00, \GH, \T3, \T2
1569 vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
1570
1571 vpclmulqdq $0x10, \GH, \T3, \GH
1572 vpslldq $4, \GH, \GH # shift-L GH 1 DW (Shift-L 1-DW to obtain result with no shifts)
1573
1574 vpxor \T2, \GH, \GH # second phase of the reduction complete
1575 #######################################################################
1576 vpxor \T1, \GH, \GH # the result is in GH
1577
1578
1579.endm
1580
1581.macro PRECOMPUTE_AVX2 HK T1 T2 T3 T4 T5 T6
1582
1583 # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
1584 vmovdqa \HK, \T5
1585 GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly
1586 vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly
1587
1588 GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly
1589 vmovdqa \T5, HashKey_3(arg1)
1590
1591 GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly
1592 vmovdqa \T5, HashKey_4(arg1)
1593
1594 GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly
1595 vmovdqa \T5, HashKey_5(arg1)
1596
1597 GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly
1598 vmovdqa \T5, HashKey_6(arg1)
1599
1600 GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly
1601 vmovdqa \T5, HashKey_7(arg1)
1602
1603 GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly
1604 vmovdqa \T5, HashKey_8(arg1)
1605
1606.endm
1607
1608
1609## if a = number of total plaintext bytes
1610## b = floor(a/16)
1611## num_initial_blocks = b mod 4#
1612## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
1613## r10, r11, r12, rax are clobbered
1614## arg1, arg2, arg3, r14 are used as a pointer only, not modified
1615
1616.macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER
1617 i = (8-\num_initial_blocks)
1618 setreg
1619
1620 mov arg6, %r10 # r10 = AAD
1621 mov arg7, %r12 # r12 = aadLen
1622
1623
1624 mov %r12, %r11
1625
1626 vpxor reg_i, reg_i, reg_i
1627_get_AAD_loop\@:
1628 vmovd (%r10), \T1
1629 vpslldq $12, \T1, \T1
1630 vpsrldq $4, reg_i, reg_i
1631 vpxor \T1, reg_i, reg_i
1632
1633 add $4, %r10
1634 sub $4, %r12
1635 jg _get_AAD_loop\@
1636
1637
1638 cmp $16, %r11
1639 je _get_AAD_loop2_done\@
1640 mov $16, %r12
1641
1642_get_AAD_loop2\@:
1643 vpsrldq $4, reg_i, reg_i
1644 sub $4, %r12
1645 cmp %r11, %r12
1646 jg _get_AAD_loop2\@
1647
1648_get_AAD_loop2_done\@:
1649
1650 #byte-reflect the AAD data
1651 vpshufb SHUF_MASK(%rip), reg_i, reg_i
1652
1653 # initialize the data pointer offset as zero
1654 xor %r11, %r11
1655
1656 # start AES for num_initial_blocks blocks
1657 mov arg5, %rax # rax = *Y0
1658 vmovdqu (%rax), \CTR # CTR = Y0
1659 vpshufb SHUF_MASK(%rip), \CTR, \CTR
1660
1661
1662 i = (9-\num_initial_blocks)
1663 setreg
1664.rep \num_initial_blocks
1665 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1666 vmovdqa \CTR, reg_i
1667 vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap
1668 i = (i+1)
1669 setreg
1670.endr
1671
1672 vmovdqa (arg1), \T_key
1673 i = (9-\num_initial_blocks)
1674 setreg
1675.rep \num_initial_blocks
1676 vpxor \T_key, reg_i, reg_i
1677 i = (i+1)
1678 setreg
1679.endr
1680
1681 j = 1
1682 setreg
1683.rep 9
1684 vmovdqa 16*j(arg1), \T_key
1685 i = (9-\num_initial_blocks)
1686 setreg
1687.rep \num_initial_blocks
1688 vaesenc \T_key, reg_i, reg_i
1689 i = (i+1)
1690 setreg
1691.endr
1692
1693 j = (j+1)
1694 setreg
1695.endr
1696
1697
1698 vmovdqa 16*10(arg1), \T_key
1699 i = (9-\num_initial_blocks)
1700 setreg
1701.rep \num_initial_blocks
1702 vaesenclast \T_key, reg_i, reg_i
1703 i = (i+1)
1704 setreg
1705.endr
1706
1707 i = (9-\num_initial_blocks)
1708 setreg
1709.rep \num_initial_blocks
1710 vmovdqu (arg3, %r11), \T1
1711 vpxor \T1, reg_i, reg_i
1712 vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for
1713 # num_initial_blocks blocks
1714 add $16, %r11
1715.if \ENC_DEC == DEC
1716 vmovdqa \T1, reg_i
1717.endif
1718 vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations
1719 i = (i+1)
1720 setreg
1721.endr
1722
1723
1724 i = (8-\num_initial_blocks)
1725 j = (9-\num_initial_blocks)
1726 setreg
1727 GHASH_MUL_AVX2 reg_i, \T2, \T1, \T3, \T4, \T5, \T6
1728
1729.rep \num_initial_blocks
1730 vpxor reg_i, reg_j, reg_j
1731 GHASH_MUL_AVX2 reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks
1732 i = (i+1)
1733 j = (j+1)
1734 setreg
1735.endr
1736 # XMM8 has the combined result here
1737
1738 vmovdqa \XMM8, TMP1(%rsp)
1739 vmovdqa \XMM8, \T3
1740
1741 cmp $128, %r13
1742 jl _initial_blocks_done\@ # no need for precomputed constants
1743
1744###############################################################################
1745# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
1746 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1747 vmovdqa \CTR, \XMM1
1748 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
1749
1750 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1751 vmovdqa \CTR, \XMM2
1752 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
1753
1754 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1755 vmovdqa \CTR, \XMM3
1756 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
1757
1758 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1759 vmovdqa \CTR, \XMM4
1760 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
1761
1762 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1763 vmovdqa \CTR, \XMM5
1764 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
1765
1766 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1767 vmovdqa \CTR, \XMM6
1768 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
1769
1770 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1771 vmovdqa \CTR, \XMM7
1772 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
1773
1774 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1775 vmovdqa \CTR, \XMM8
1776 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
1777
1778 vmovdqa (arg1), \T_key
1779 vpxor \T_key, \XMM1, \XMM1
1780 vpxor \T_key, \XMM2, \XMM2
1781 vpxor \T_key, \XMM3, \XMM3
1782 vpxor \T_key, \XMM4, \XMM4
1783 vpxor \T_key, \XMM5, \XMM5
1784 vpxor \T_key, \XMM6, \XMM6
1785 vpxor \T_key, \XMM7, \XMM7
1786 vpxor \T_key, \XMM8, \XMM8
1787
1788 i = 1
1789 setreg
1790.rep 9 # do 9 rounds
1791 vmovdqa 16*i(arg1), \T_key
1792 vaesenc \T_key, \XMM1, \XMM1
1793 vaesenc \T_key, \XMM2, \XMM2
1794 vaesenc \T_key, \XMM3, \XMM3
1795 vaesenc \T_key, \XMM4, \XMM4
1796 vaesenc \T_key, \XMM5, \XMM5
1797 vaesenc \T_key, \XMM6, \XMM6
1798 vaesenc \T_key, \XMM7, \XMM7
1799 vaesenc \T_key, \XMM8, \XMM8
1800 i = (i+1)
1801 setreg
1802.endr
1803
1804
1805 vmovdqa 16*i(arg1), \T_key
1806 vaesenclast \T_key, \XMM1, \XMM1
1807 vaesenclast \T_key, \XMM2, \XMM2
1808 vaesenclast \T_key, \XMM3, \XMM3
1809 vaesenclast \T_key, \XMM4, \XMM4
1810 vaesenclast \T_key, \XMM5, \XMM5
1811 vaesenclast \T_key, \XMM6, \XMM6
1812 vaesenclast \T_key, \XMM7, \XMM7
1813 vaesenclast \T_key, \XMM8, \XMM8
1814
1815 vmovdqu (arg3, %r11), \T1
1816 vpxor \T1, \XMM1, \XMM1
1817 vmovdqu \XMM1, (arg2 , %r11)
1818 .if \ENC_DEC == DEC
1819 vmovdqa \T1, \XMM1
1820 .endif
1821
1822 vmovdqu 16*1(arg3, %r11), \T1
1823 vpxor \T1, \XMM2, \XMM2
1824 vmovdqu \XMM2, 16*1(arg2 , %r11)
1825 .if \ENC_DEC == DEC
1826 vmovdqa \T1, \XMM2
1827 .endif
1828
1829 vmovdqu 16*2(arg3, %r11), \T1
1830 vpxor \T1, \XMM3, \XMM3
1831 vmovdqu \XMM3, 16*2(arg2 , %r11)
1832 .if \ENC_DEC == DEC
1833 vmovdqa \T1, \XMM3
1834 .endif
1835
1836 vmovdqu 16*3(arg3, %r11), \T1
1837 vpxor \T1, \XMM4, \XMM4
1838 vmovdqu \XMM4, 16*3(arg2 , %r11)
1839 .if \ENC_DEC == DEC
1840 vmovdqa \T1, \XMM4
1841 .endif
1842
1843 vmovdqu 16*4(arg3, %r11), \T1
1844 vpxor \T1, \XMM5, \XMM5
1845 vmovdqu \XMM5, 16*4(arg2 , %r11)
1846 .if \ENC_DEC == DEC
1847 vmovdqa \T1, \XMM5
1848 .endif
1849
1850 vmovdqu 16*5(arg3, %r11), \T1
1851 vpxor \T1, \XMM6, \XMM6
1852 vmovdqu \XMM6, 16*5(arg2 , %r11)
1853 .if \ENC_DEC == DEC
1854 vmovdqa \T1, \XMM6
1855 .endif
1856
1857 vmovdqu 16*6(arg3, %r11), \T1
1858 vpxor \T1, \XMM7, \XMM7
1859 vmovdqu \XMM7, 16*6(arg2 , %r11)
1860 .if \ENC_DEC == DEC
1861 vmovdqa \T1, \XMM7
1862 .endif
1863
1864 vmovdqu 16*7(arg3, %r11), \T1
1865 vpxor \T1, \XMM8, \XMM8
1866 vmovdqu \XMM8, 16*7(arg2 , %r11)
1867 .if \ENC_DEC == DEC
1868 vmovdqa \T1, \XMM8
1869 .endif
1870
1871 add $128, %r11
1872
1873 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
1874 vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with
1875 # the corresponding ciphertext
1876 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
1877 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
1878 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
1879 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
1880 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
1881 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
1882 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
1883
1884###############################################################################
1885
1886_initial_blocks_done\@:
1887
1888
1889.endm
1890
1891
1892
1893# encrypt 8 blocks at a time
1894# ghash the 8 previously encrypted ciphertext blocks
1895# arg1, arg2, arg3 are used as pointers only, not modified
1896# r11 is the data offset value
1897.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
1898
1899 vmovdqa \XMM1, \T2
1900 vmovdqa \XMM2, TMP2(%rsp)
1901 vmovdqa \XMM3, TMP3(%rsp)
1902 vmovdqa \XMM4, TMP4(%rsp)
1903 vmovdqa \XMM5, TMP5(%rsp)
1904 vmovdqa \XMM6, TMP6(%rsp)
1905 vmovdqa \XMM7, TMP7(%rsp)
1906 vmovdqa \XMM8, TMP8(%rsp)
1907
1908.if \loop_idx == in_order
1909 vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT
1910 vpaddd ONE(%rip), \XMM1, \XMM2
1911 vpaddd ONE(%rip), \XMM2, \XMM3
1912 vpaddd ONE(%rip), \XMM3, \XMM4
1913 vpaddd ONE(%rip), \XMM4, \XMM5
1914 vpaddd ONE(%rip), \XMM5, \XMM6
1915 vpaddd ONE(%rip), \XMM6, \XMM7
1916 vpaddd ONE(%rip), \XMM7, \XMM8
1917 vmovdqa \XMM8, \CTR
1918
1919 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
1920 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
1921 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
1922 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
1923 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
1924 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
1925 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
1926 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
1927.else
1928 vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT
1929 vpaddd ONEf(%rip), \XMM1, \XMM2
1930 vpaddd ONEf(%rip), \XMM2, \XMM3
1931 vpaddd ONEf(%rip), \XMM3, \XMM4
1932 vpaddd ONEf(%rip), \XMM4, \XMM5
1933 vpaddd ONEf(%rip), \XMM5, \XMM6
1934 vpaddd ONEf(%rip), \XMM6, \XMM7
1935 vpaddd ONEf(%rip), \XMM7, \XMM8
1936 vmovdqa \XMM8, \CTR
1937.endif
1938
1939
1940 #######################################################################
1941
1942 vmovdqu (arg1), \T1
1943 vpxor \T1, \XMM1, \XMM1
1944 vpxor \T1, \XMM2, \XMM2
1945 vpxor \T1, \XMM3, \XMM3
1946 vpxor \T1, \XMM4, \XMM4
1947 vpxor \T1, \XMM5, \XMM5
1948 vpxor \T1, \XMM6, \XMM6
1949 vpxor \T1, \XMM7, \XMM7
1950 vpxor \T1, \XMM8, \XMM8
1951
1952 #######################################################################
1953
1954
1955
1956
1957
1958 vmovdqu 16*1(arg1), \T1
1959 vaesenc \T1, \XMM1, \XMM1
1960 vaesenc \T1, \XMM2, \XMM2
1961 vaesenc \T1, \XMM3, \XMM3
1962 vaesenc \T1, \XMM4, \XMM4
1963 vaesenc \T1, \XMM5, \XMM5
1964 vaesenc \T1, \XMM6, \XMM6
1965 vaesenc \T1, \XMM7, \XMM7
1966 vaesenc \T1, \XMM8, \XMM8
1967
1968 vmovdqu 16*2(arg1), \T1
1969 vaesenc \T1, \XMM1, \XMM1
1970 vaesenc \T1, \XMM2, \XMM2
1971 vaesenc \T1, \XMM3, \XMM3
1972 vaesenc \T1, \XMM4, \XMM4
1973 vaesenc \T1, \XMM5, \XMM5
1974 vaesenc \T1, \XMM6, \XMM6
1975 vaesenc \T1, \XMM7, \XMM7
1976 vaesenc \T1, \XMM8, \XMM8
1977
1978
1979 #######################################################################
1980
1981 vmovdqa HashKey_8(arg1), \T5
1982 vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1
1983 vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0
1984 vpclmulqdq $0x01, \T5, \T2, \T6 # T6 = a1*b0
1985 vpclmulqdq $0x10, \T5, \T2, \T5 # T5 = a0*b1
1986 vpxor \T5, \T6, \T6
1987
1988 vmovdqu 16*3(arg1), \T1
1989 vaesenc \T1, \XMM1, \XMM1
1990 vaesenc \T1, \XMM2, \XMM2
1991 vaesenc \T1, \XMM3, \XMM3
1992 vaesenc \T1, \XMM4, \XMM4
1993 vaesenc \T1, \XMM5, \XMM5
1994 vaesenc \T1, \XMM6, \XMM6
1995 vaesenc \T1, \XMM7, \XMM7
1996 vaesenc \T1, \XMM8, \XMM8
1997
1998 vmovdqa TMP2(%rsp), \T1
1999 vmovdqa HashKey_7(arg1), \T5
2000 vpclmulqdq $0x11, \T5, \T1, \T3
2001 vpxor \T3, \T4, \T4
2002
2003 vpclmulqdq $0x00, \T5, \T1, \T3
2004 vpxor \T3, \T7, \T7
2005
2006 vpclmulqdq $0x01, \T5, \T1, \T3
2007 vpxor \T3, \T6, \T6
2008
2009 vpclmulqdq $0x10, \T5, \T1, \T3
2010 vpxor \T3, \T6, \T6
2011
2012 vmovdqu 16*4(arg1), \T1
2013 vaesenc \T1, \XMM1, \XMM1
2014 vaesenc \T1, \XMM2, \XMM2
2015 vaesenc \T1, \XMM3, \XMM3
2016 vaesenc \T1, \XMM4, \XMM4
2017 vaesenc \T1, \XMM5, \XMM5
2018 vaesenc \T1, \XMM6, \XMM6
2019 vaesenc \T1, \XMM7, \XMM7
2020 vaesenc \T1, \XMM8, \XMM8
2021
2022 #######################################################################
2023
2024 vmovdqa TMP3(%rsp), \T1
2025 vmovdqa HashKey_6(arg1), \T5
2026 vpclmulqdq $0x11, \T5, \T1, \T3
2027 vpxor \T3, \T4, \T4
2028
2029 vpclmulqdq $0x00, \T5, \T1, \T3
2030 vpxor \T3, \T7, \T7
2031
2032 vpclmulqdq $0x01, \T5, \T1, \T3
2033 vpxor \T3, \T6, \T6
2034
2035 vpclmulqdq $0x10, \T5, \T1, \T3
2036 vpxor \T3, \T6, \T6
2037
2038 vmovdqu 16*5(arg1), \T1
2039 vaesenc \T1, \XMM1, \XMM1
2040 vaesenc \T1, \XMM2, \XMM2
2041 vaesenc \T1, \XMM3, \XMM3
2042 vaesenc \T1, \XMM4, \XMM4
2043 vaesenc \T1, \XMM5, \XMM5
2044 vaesenc \T1, \XMM6, \XMM6
2045 vaesenc \T1, \XMM7, \XMM7
2046 vaesenc \T1, \XMM8, \XMM8
2047
2048 vmovdqa TMP4(%rsp), \T1
2049 vmovdqa HashKey_5(arg1), \T5
2050 vpclmulqdq $0x11, \T5, \T1, \T3
2051 vpxor \T3, \T4, \T4
2052
2053 vpclmulqdq $0x00, \T5, \T1, \T3
2054 vpxor \T3, \T7, \T7
2055
2056 vpclmulqdq $0x01, \T5, \T1, \T3
2057 vpxor \T3, \T6, \T6
2058
2059 vpclmulqdq $0x10, \T5, \T1, \T3
2060 vpxor \T3, \T6, \T6
2061
2062 vmovdqu 16*6(arg1), \T1
2063 vaesenc \T1, \XMM1, \XMM1
2064 vaesenc \T1, \XMM2, \XMM2
2065 vaesenc \T1, \XMM3, \XMM3
2066 vaesenc \T1, \XMM4, \XMM4
2067 vaesenc \T1, \XMM5, \XMM5
2068 vaesenc \T1, \XMM6, \XMM6
2069 vaesenc \T1, \XMM7, \XMM7
2070 vaesenc \T1, \XMM8, \XMM8
2071
2072
2073 vmovdqa TMP5(%rsp), \T1
2074 vmovdqa HashKey_4(arg1), \T5
2075 vpclmulqdq $0x11, \T5, \T1, \T3
2076 vpxor \T3, \T4, \T4
2077
2078 vpclmulqdq $0x00, \T5, \T1, \T3
2079 vpxor \T3, \T7, \T7
2080
2081 vpclmulqdq $0x01, \T5, \T1, \T3
2082 vpxor \T3, \T6, \T6
2083
2084 vpclmulqdq $0x10, \T5, \T1, \T3
2085 vpxor \T3, \T6, \T6
2086
2087 vmovdqu 16*7(arg1), \T1
2088 vaesenc \T1, \XMM1, \XMM1
2089 vaesenc \T1, \XMM2, \XMM2
2090 vaesenc \T1, \XMM3, \XMM3
2091 vaesenc \T1, \XMM4, \XMM4
2092 vaesenc \T1, \XMM5, \XMM5
2093 vaesenc \T1, \XMM6, \XMM6
2094 vaesenc \T1, \XMM7, \XMM7
2095 vaesenc \T1, \XMM8, \XMM8
2096
2097 vmovdqa TMP6(%rsp), \T1
2098 vmovdqa HashKey_3(arg1), \T5
2099 vpclmulqdq $0x11, \T5, \T1, \T3
2100 vpxor \T3, \T4, \T4
2101
2102 vpclmulqdq $0x00, \T5, \T1, \T3
2103 vpxor \T3, \T7, \T7
2104
2105 vpclmulqdq $0x01, \T5, \T1, \T3
2106 vpxor \T3, \T6, \T6
2107
2108 vpclmulqdq $0x10, \T5, \T1, \T3
2109 vpxor \T3, \T6, \T6
2110
2111 vmovdqu 16*8(arg1), \T1
2112 vaesenc \T1, \XMM1, \XMM1
2113 vaesenc \T1, \XMM2, \XMM2
2114 vaesenc \T1, \XMM3, \XMM3
2115 vaesenc \T1, \XMM4, \XMM4
2116 vaesenc \T1, \XMM5, \XMM5
2117 vaesenc \T1, \XMM6, \XMM6
2118 vaesenc \T1, \XMM7, \XMM7
2119 vaesenc \T1, \XMM8, \XMM8
2120
2121 vmovdqa TMP7(%rsp), \T1
2122 vmovdqa HashKey_2(arg1), \T5
2123 vpclmulqdq $0x11, \T5, \T1, \T3
2124 vpxor \T3, \T4, \T4
2125
2126 vpclmulqdq $0x00, \T5, \T1, \T3
2127 vpxor \T3, \T7, \T7
2128
2129 vpclmulqdq $0x01, \T5, \T1, \T3
2130 vpxor \T3, \T6, \T6
2131
2132 vpclmulqdq $0x10, \T5, \T1, \T3
2133 vpxor \T3, \T6, \T6
2134
2135
2136 #######################################################################
2137
2138 vmovdqu 16*9(arg1), \T5
2139 vaesenc \T5, \XMM1, \XMM1
2140 vaesenc \T5, \XMM2, \XMM2
2141 vaesenc \T5, \XMM3, \XMM3
2142 vaesenc \T5, \XMM4, \XMM4
2143 vaesenc \T5, \XMM5, \XMM5
2144 vaesenc \T5, \XMM6, \XMM6
2145 vaesenc \T5, \XMM7, \XMM7
2146 vaesenc \T5, \XMM8, \XMM8
2147
2148 vmovdqa TMP8(%rsp), \T1
2149 vmovdqa HashKey(arg1), \T5
2150
2151 vpclmulqdq $0x00, \T5, \T1, \T3
2152 vpxor \T3, \T7, \T7
2153
2154 vpclmulqdq $0x01, \T5, \T1, \T3
2155 vpxor \T3, \T6, \T6
2156
2157 vpclmulqdq $0x10, \T5, \T1, \T3
2158 vpxor \T3, \T6, \T6
2159
2160 vpclmulqdq $0x11, \T5, \T1, \T3
2161 vpxor \T3, \T4, \T1
2162
2163
2164 vmovdqu 16*10(arg1), \T5
2165
2166 i = 0
2167 j = 1
2168 setreg
2169.rep 8
2170 vpxor 16*i(arg3, %r11), \T5, \T2
2171 .if \ENC_DEC == ENC
2172 vaesenclast \T2, reg_j, reg_j
2173 .else
2174 vaesenclast \T2, reg_j, \T3
2175 vmovdqu 16*i(arg3, %r11), reg_j
2176 vmovdqu \T3, 16*i(arg2, %r11)
2177 .endif
2178 i = (i+1)
2179 j = (j+1)
2180 setreg
2181.endr
2182 #######################################################################
2183
2184
2185 vpslldq $8, \T6, \T3 # shift-L T3 2 DWs
2186 vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs
2187 vpxor \T3, \T7, \T7
2188 vpxor \T6, \T1, \T1 # accumulate the results in T1:T7
2189
2190
2191
2192 #######################################################################
2193 #first phase of the reduction
2194 vmovdqa POLY2(%rip), \T3
2195
2196 vpclmulqdq $0x01, \T7, \T3, \T2
2197 vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs
2198
2199 vpxor \T2, \T7, \T7 # first phase of the reduction complete
2200 #######################################################################
2201 .if \ENC_DEC == ENC
2202 vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer
2203 vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer
2204 vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer
2205 vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer
2206 vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer
2207 vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer
2208 vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer
2209 vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer
2210 .endif
2211
2212 #######################################################################
2213 #second phase of the reduction
2214 vpclmulqdq $0x00, \T7, \T3, \T2
2215 vpsrldq $4, \T2, \T2 # shift-R xmm2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
2216
2217 vpclmulqdq $0x10, \T7, \T3, \T4
2218 vpslldq $4, \T4, \T4 # shift-L xmm0 1 DW (Shift-L 1-DW to obtain result with no shifts)
2219
2220 vpxor \T2, \T4, \T4 # second phase of the reduction complete
2221 #######################################################################
2222 vpxor \T4, \T1, \T1 # the result is in T1
2223
2224 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
2225 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
2226 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
2227 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
2228 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
2229 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
2230 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
2231 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
2232
2233
2234 vpxor \T1, \XMM1, \XMM1
2235
2236
2237
2238.endm
2239
2240
2241# GHASH the last 4 ciphertext blocks.
2242.macro GHASH_LAST_8_AVX2 T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8
2243
2244 ## Karatsuba Method
2245
2246 vmovdqa HashKey_8(arg1), \T5
2247
2248 vpshufd $0b01001110, \XMM1, \T2
2249 vpshufd $0b01001110, \T5, \T3
2250 vpxor \XMM1, \T2, \T2
2251 vpxor \T5, \T3, \T3
2252
2253 vpclmulqdq $0x11, \T5, \XMM1, \T6
2254 vpclmulqdq $0x00, \T5, \XMM1, \T7
2255
2256 vpclmulqdq $0x00, \T3, \T2, \XMM1
2257
2258 ######################
2259
2260 vmovdqa HashKey_7(arg1), \T5
2261 vpshufd $0b01001110, \XMM2, \T2
2262 vpshufd $0b01001110, \T5, \T3
2263 vpxor \XMM2, \T2, \T2
2264 vpxor \T5, \T3, \T3
2265
2266 vpclmulqdq $0x11, \T5, \XMM2, \T4
2267 vpxor \T4, \T6, \T6
2268
2269 vpclmulqdq $0x00, \T5, \XMM2, \T4
2270 vpxor \T4, \T7, \T7
2271
2272 vpclmulqdq $0x00, \T3, \T2, \T2
2273
2274 vpxor \T2, \XMM1, \XMM1
2275
2276 ######################
2277
2278 vmovdqa HashKey_6(arg1), \T5
2279 vpshufd $0b01001110, \XMM3, \T2
2280 vpshufd $0b01001110, \T5, \T3
2281 vpxor \XMM3, \T2, \T2
2282 vpxor \T5, \T3, \T3
2283
2284 vpclmulqdq $0x11, \T5, \XMM3, \T4
2285 vpxor \T4, \T6, \T6
2286
2287 vpclmulqdq $0x00, \T5, \XMM3, \T4
2288 vpxor \T4, \T7, \T7
2289
2290 vpclmulqdq $0x00, \T3, \T2, \T2
2291
2292 vpxor \T2, \XMM1, \XMM1
2293
2294 ######################
2295
2296 vmovdqa HashKey_5(arg1), \T5
2297 vpshufd $0b01001110, \XMM4, \T2
2298 vpshufd $0b01001110, \T5, \T3
2299 vpxor \XMM4, \T2, \T2
2300 vpxor \T5, \T3, \T3
2301
2302 vpclmulqdq $0x11, \T5, \XMM4, \T4
2303 vpxor \T4, \T6, \T6
2304
2305 vpclmulqdq $0x00, \T5, \XMM4, \T4
2306 vpxor \T4, \T7, \T7
2307
2308 vpclmulqdq $0x00, \T3, \T2, \T2
2309
2310 vpxor \T2, \XMM1, \XMM1
2311
2312 ######################
2313
2314 vmovdqa HashKey_4(arg1), \T5
2315 vpshufd $0b01001110, \XMM5, \T2
2316 vpshufd $0b01001110, \T5, \T3
2317 vpxor \XMM5, \T2, \T2
2318 vpxor \T5, \T3, \T3
2319
2320 vpclmulqdq $0x11, \T5, \XMM5, \T4
2321 vpxor \T4, \T6, \T6
2322
2323 vpclmulqdq $0x00, \T5, \XMM5, \T4
2324 vpxor \T4, \T7, \T7
2325
2326 vpclmulqdq $0x00, \T3, \T2, \T2
2327
2328 vpxor \T2, \XMM1, \XMM1
2329
2330 ######################
2331
2332 vmovdqa HashKey_3(arg1), \T5
2333 vpshufd $0b01001110, \XMM6, \T2
2334 vpshufd $0b01001110, \T5, \T3
2335 vpxor \XMM6, \T2, \T2
2336 vpxor \T5, \T3, \T3
2337
2338 vpclmulqdq $0x11, \T5, \XMM6, \T4
2339 vpxor \T4, \T6, \T6
2340
2341 vpclmulqdq $0x00, \T5, \XMM6, \T4
2342 vpxor \T4, \T7, \T7
2343
2344 vpclmulqdq $0x00, \T3, \T2, \T2
2345
2346 vpxor \T2, \XMM1, \XMM1
2347
2348 ######################
2349
2350 vmovdqa HashKey_2(arg1), \T5
2351 vpshufd $0b01001110, \XMM7, \T2
2352 vpshufd $0b01001110, \T5, \T3
2353 vpxor \XMM7, \T2, \T2
2354 vpxor \T5, \T3, \T3
2355
2356 vpclmulqdq $0x11, \T5, \XMM7, \T4
2357 vpxor \T4, \T6, \T6
2358
2359 vpclmulqdq $0x00, \T5, \XMM7, \T4
2360 vpxor \T4, \T7, \T7
2361
2362 vpclmulqdq $0x00, \T3, \T2, \T2
2363
2364 vpxor \T2, \XMM1, \XMM1
2365
2366 ######################
2367
2368 vmovdqa HashKey(arg1), \T5
2369 vpshufd $0b01001110, \XMM8, \T2
2370 vpshufd $0b01001110, \T5, \T3
2371 vpxor \XMM8, \T2, \T2
2372 vpxor \T5, \T3, \T3
2373
2374 vpclmulqdq $0x11, \T5, \XMM8, \T4
2375 vpxor \T4, \T6, \T6
2376
2377 vpclmulqdq $0x00, \T5, \XMM8, \T4
2378 vpxor \T4, \T7, \T7
2379
2380 vpclmulqdq $0x00, \T3, \T2, \T2
2381
2382 vpxor \T2, \XMM1, \XMM1
2383 vpxor \T6, \XMM1, \XMM1
2384 vpxor \T7, \XMM1, \T2
2385
2386
2387
2388
2389 vpslldq $8, \T2, \T4
2390 vpsrldq $8, \T2, \T2
2391
2392 vpxor \T4, \T7, \T7
2393 vpxor \T2, \T6, \T6 # <T6:T7> holds the result of the
2394 # accumulated carry-less multiplications
2395
2396 #######################################################################
2397 #first phase of the reduction
2398 vmovdqa POLY2(%rip), \T3
2399
2400 vpclmulqdq $0x01, \T7, \T3, \T2
2401 vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs
2402
2403 vpxor \T2, \T7, \T7 # first phase of the reduction complete
2404 #######################################################################
2405
2406
2407 #second phase of the reduction
2408 vpclmulqdq $0x00, \T7, \T3, \T2
2409 vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
2410
2411 vpclmulqdq $0x10, \T7, \T3, \T4
2412 vpslldq $4, \T4, \T4 # shift-L T4 1 DW (Shift-L 1-DW to obtain result with no shifts)
2413
2414 vpxor \T2, \T4, \T4 # second phase of the reduction complete
2415 #######################################################################
2416 vpxor \T4, \T6, \T6 # the result is in T6
2417.endm
2418
2419
2420
2421# combined for GCM encrypt and decrypt functions
2422# clobbering all xmm registers
2423# clobbering r10, r11, r12, r13, r14, r15
2424.macro GCM_ENC_DEC_AVX2 ENC_DEC
2425
2426 #the number of pushes must equal STACK_OFFSET
2427 push %r12
2428 push %r13
2429 push %r14
2430 push %r15
2431
2432 mov %rsp, %r14
2433
2434
2435
2436
2437 sub $VARIABLE_OFFSET, %rsp
2438 and $~63, %rsp # align rsp to 64 bytes
2439
2440
2441 vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey
2442
2443 mov arg4, %r13 # save the number of bytes of plaintext/ciphertext
2444 and $-16, %r13 # r13 = r13 - (r13 mod 16)
2445
2446 mov %r13, %r12
2447 shr $4, %r12
2448 and $7, %r12
2449 jz _initial_num_blocks_is_0\@
2450
2451 cmp $7, %r12
2452 je _initial_num_blocks_is_7\@
2453 cmp $6, %r12
2454 je _initial_num_blocks_is_6\@
2455 cmp $5, %r12
2456 je _initial_num_blocks_is_5\@
2457 cmp $4, %r12
2458 je _initial_num_blocks_is_4\@
2459 cmp $3, %r12
2460 je _initial_num_blocks_is_3\@
2461 cmp $2, %r12
2462 je _initial_num_blocks_is_2\@
2463
2464 jmp _initial_num_blocks_is_1\@
2465
2466_initial_num_blocks_is_7\@:
2467 INITIAL_BLOCKS_AVX2 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2468 sub $16*7, %r13
2469 jmp _initial_blocks_encrypted\@
2470
2471_initial_num_blocks_is_6\@:
2472 INITIAL_BLOCKS_AVX2 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2473 sub $16*6, %r13
2474 jmp _initial_blocks_encrypted\@
2475
2476_initial_num_blocks_is_5\@:
2477 INITIAL_BLOCKS_AVX2 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2478 sub $16*5, %r13
2479 jmp _initial_blocks_encrypted\@
2480
2481_initial_num_blocks_is_4\@:
2482 INITIAL_BLOCKS_AVX2 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2483 sub $16*4, %r13
2484 jmp _initial_blocks_encrypted\@
2485
2486_initial_num_blocks_is_3\@:
2487 INITIAL_BLOCKS_AVX2 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2488 sub $16*3, %r13
2489 jmp _initial_blocks_encrypted\@
2490
2491_initial_num_blocks_is_2\@:
2492 INITIAL_BLOCKS_AVX2 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2493 sub $16*2, %r13
2494 jmp _initial_blocks_encrypted\@
2495
2496_initial_num_blocks_is_1\@:
2497 INITIAL_BLOCKS_AVX2 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2498 sub $16*1, %r13
2499 jmp _initial_blocks_encrypted\@
2500
2501_initial_num_blocks_is_0\@:
2502 INITIAL_BLOCKS_AVX2 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2503
2504
2505_initial_blocks_encrypted\@:
2506 cmp $0, %r13
2507 je _zero_cipher_left\@
2508
2509 sub $128, %r13
2510 je _eight_cipher_left\@
2511
2512
2513
2514
2515 vmovd %xmm9, %r15d
2516 and $255, %r15d
2517 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2518
2519
2520_encrypt_by_8_new\@:
2521 cmp $(255-8), %r15d
2522 jg _encrypt_by_8\@
2523
2524
2525
2526 add $8, %r15b
2527 GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC
2528 add $128, %r11
2529 sub $128, %r13
2530 jne _encrypt_by_8_new\@
2531
2532 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2533 jmp _eight_cipher_left\@
2534
2535_encrypt_by_8\@:
2536 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2537 add $8, %r15b
2538 GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC
2539 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2540 add $128, %r11
2541 sub $128, %r13
2542 jne _encrypt_by_8_new\@
2543
2544 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2545
2546
2547
2548
2549_eight_cipher_left\@:
2550 GHASH_LAST_8_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8
2551
2552
2553_zero_cipher_left\@:
2554 cmp $16, arg4
2555 jl _only_less_than_16\@
2556
2557 mov arg4, %r13
2558 and $15, %r13 # r13 = (arg4 mod 16)
2559
2560 je _multiple_of_16_bytes\@
2561
2562 # handle the last <16 Byte block seperately
2563
2564
2565 vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
2566 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2567 ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn)
2568
2569 sub $16, %r11
2570 add %r13, %r11
2571 vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block
2572
2573 lea SHIFT_MASK+16(%rip), %r12
2574 sub %r13, %r12 # adjust the shuffle mask pointer
2575 # to be able to shift 16-r13 bytes
2576 # (r13 is the number of bytes in plaintext mod 16)
2577 vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask
2578 vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes
2579 jmp _final_ghash_mul\@
2580
2581_only_less_than_16\@:
2582 # check for 0 length
2583 mov arg4, %r13
2584 and $15, %r13 # r13 = (arg4 mod 16)
2585
2586 je _multiple_of_16_bytes\@
2587
2588 # handle the last <16 Byte block seperately
2589
2590
2591 vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
2592 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2593 ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn)
2594
2595
2596 lea SHIFT_MASK+16(%rip), %r12
2597 sub %r13, %r12 # adjust the shuffle mask pointer to be
2598 # able to shift 16-r13 bytes (r13 is the
2599 # number of bytes in plaintext mod 16)
2600
2601_get_last_16_byte_loop\@:
2602 movb (arg3, %r11), %al
2603 movb %al, TMP1 (%rsp , %r11)
2604 add $1, %r11
2605 cmp %r13, %r11
2606 jne _get_last_16_byte_loop\@
2607
2608 vmovdqu TMP1(%rsp), %xmm1
2609
2610 sub $16, %r11
2611
2612_final_ghash_mul\@:
2613 .if \ENC_DEC == DEC
2614 vmovdqa %xmm1, %xmm2
2615 vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
2616 vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9
2617 vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9
2618 vpand %xmm1, %xmm2, %xmm2
2619 vpshufb SHUF_MASK(%rip), %xmm2, %xmm2
2620 vpxor %xmm2, %xmm14, %xmm14
2621 #GHASH computation for the last <16 Byte block
2622 GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
2623 sub %r13, %r11
2624 add $16, %r11
2625 .else
2626 vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
2627 vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9
2628 vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9
2629 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2630 vpxor %xmm9, %xmm14, %xmm14
2631 #GHASH computation for the last <16 Byte block
2632 GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
2633 sub %r13, %r11
2634 add $16, %r11
2635 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext
2636 .endif
2637
2638
2639 #############################
2640 # output r13 Bytes
2641 vmovq %xmm9, %rax
2642 cmp $8, %r13
2643 jle _less_than_8_bytes_left\@
2644
2645 mov %rax, (arg2 , %r11)
2646 add $8, %r11
2647 vpsrldq $8, %xmm9, %xmm9
2648 vmovq %xmm9, %rax
2649 sub $8, %r13
2650
2651_less_than_8_bytes_left\@:
2652 movb %al, (arg2 , %r11)
2653 add $1, %r11
2654 shr $8, %rax
2655 sub $1, %r13
2656 jne _less_than_8_bytes_left\@
2657 #############################
2658
2659_multiple_of_16_bytes\@:
2660 mov arg7, %r12 # r12 = aadLen (number of bytes)
2661 shl $3, %r12 # convert into number of bits
2662 vmovd %r12d, %xmm15 # len(A) in xmm15
2663
2664 shl $3, arg4 # len(C) in bits (*128)
2665 vmovq arg4, %xmm1
2666 vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000
2667 vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C)
2668
2669 vpxor %xmm15, %xmm14, %xmm14
2670 GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation
2671 vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap
2672
2673 mov arg5, %rax # rax = *Y0
2674 vmovdqu (%rax), %xmm9 # xmm9 = Y0
2675
2676 ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0)
2677
2678 vpxor %xmm14, %xmm9, %xmm9
2679
2680
2681
2682_return_T\@:
2683 mov arg8, %r10 # r10 = authTag
2684 mov arg9, %r11 # r11 = auth_tag_len
2685
2686 cmp $16, %r11
2687 je _T_16\@
2688
2689 cmp $12, %r11
2690 je _T_12\@
2691
2692_T_8\@:
2693 vmovq %xmm9, %rax
2694 mov %rax, (%r10)
2695 jmp _return_T_done\@
2696_T_12\@:
2697 vmovq %xmm9, %rax
2698 mov %rax, (%r10)
2699 vpsrldq $8, %xmm9, %xmm9
2700 vmovd %xmm9, %eax
2701 mov %eax, 8(%r10)
2702 jmp _return_T_done\@
2703
2704_T_16\@:
2705 vmovdqu %xmm9, (%r10)
2706
2707_return_T_done\@:
2708 mov %r14, %rsp
2709
2710 pop %r15
2711 pop %r14
2712 pop %r13
2713 pop %r12
2714.endm
2715
2716
2717#############################################################
2718#void aesni_gcm_precomp_avx_gen4
2719# (gcm_data *my_ctx_data,
2720# u8 *hash_subkey)# /* H, the Hash sub key input.
2721# Data starts on a 16-byte boundary. */
2722#############################################################
2723ENTRY(aesni_gcm_precomp_avx_gen4)
2724 #the number of pushes must equal STACK_OFFSET
2725 push %r12
2726 push %r13
2727 push %r14
2728 push %r15
2729
2730 mov %rsp, %r14
2731
2732
2733
2734 sub $VARIABLE_OFFSET, %rsp
2735 and $~63, %rsp # align rsp to 64 bytes
2736
2737 vmovdqu (arg2), %xmm6 # xmm6 = HashKey
2738
2739 vpshufb SHUF_MASK(%rip), %xmm6, %xmm6
2740 ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey
2741 vmovdqa %xmm6, %xmm2
2742 vpsllq $1, %xmm6, %xmm6
2743 vpsrlq $63, %xmm2, %xmm2
2744 vmovdqa %xmm2, %xmm1
2745 vpslldq $8, %xmm2, %xmm2
2746 vpsrldq $8, %xmm1, %xmm1
2747 vpor %xmm2, %xmm6, %xmm6
2748 #reduction
2749 vpshufd $0b00100100, %xmm1, %xmm2
2750 vpcmpeqd TWOONE(%rip), %xmm2, %xmm2
2751 vpand POLY(%rip), %xmm2, %xmm2
2752 vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly
2753 #######################################################################
2754 vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly
2755
2756
2757 PRECOMPUTE_AVX2 %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
2758
2759 mov %r14, %rsp
2760
2761 pop %r15
2762 pop %r14
2763 pop %r13
2764 pop %r12
2765 ret
2766ENDPROC(aesni_gcm_precomp_avx_gen4)
2767
2768
2769###############################################################################
2770#void aesni_gcm_enc_avx_gen4(
2771# gcm_data *my_ctx_data, /* aligned to 16 Bytes */
2772# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */
2773# const u8 *in, /* Plaintext input */
2774# u64 plaintext_len, /* Length of data in Bytes for encryption. */
2775# u8 *iv, /* Pre-counter block j0: 4 byte salt
2776# (from Security Association) concatenated with 8 byte
2777# Initialisation Vector (from IPSec ESP Payload)
2778# concatenated with 0x00000001. 16-byte aligned pointer. */
2779# const u8 *aad, /* Additional Authentication Data (AAD)*/
2780# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
2781# u8 *auth_tag, /* Authenticated Tag output. */
2782# u64 auth_tag_len)# /* Authenticated Tag Length in bytes.
2783# Valid values are 16 (most likely), 12 or 8. */
2784###############################################################################
2785ENTRY(aesni_gcm_enc_avx_gen4)
2786 GCM_ENC_DEC_AVX2 ENC
2787 ret
2788ENDPROC(aesni_gcm_enc_avx_gen4)
2789
2790###############################################################################
2791#void aesni_gcm_dec_avx_gen4(
2792# gcm_data *my_ctx_data, /* aligned to 16 Bytes */
2793# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */
2794# const u8 *in, /* Ciphertext input */
2795# u64 plaintext_len, /* Length of data in Bytes for encryption. */
2796# u8 *iv, /* Pre-counter block j0: 4 byte salt
2797# (from Security Association) concatenated with 8 byte
2798# Initialisation Vector (from IPSec ESP Payload)
2799# concatenated with 0x00000001. 16-byte aligned pointer. */
2800# const u8 *aad, /* Additional Authentication Data (AAD)*/
2801# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
2802# u8 *auth_tag, /* Authenticated Tag output. */
2803# u64 auth_tag_len)# /* Authenticated Tag Length in bytes.
2804# Valid values are 16 (most likely), 12 or 8. */
2805###############################################################################
2806ENTRY(aesni_gcm_dec_avx_gen4)
2807 GCM_ENC_DEC_AVX2 DEC
2808 ret
2809ENDPROC(aesni_gcm_dec_avx_gen4)
2810
2811#endif /* CONFIG_AS_AVX2 */
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 835488b745ee..948ad0e77741 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -101,6 +101,9 @@ asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
101int crypto_fpu_init(void); 101int crypto_fpu_init(void);
102void crypto_fpu_exit(void); 102void crypto_fpu_exit(void);
103 103
104#define AVX_GEN2_OPTSIZE 640
105#define AVX_GEN4_OPTSIZE 4096
106
104#ifdef CONFIG_X86_64 107#ifdef CONFIG_X86_64
105asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, 108asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
106 const u8 *in, unsigned int len, u8 *iv); 109 const u8 *in, unsigned int len, u8 *iv);
@@ -150,6 +153,123 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
150 u8 *hash_subkey, const u8 *aad, unsigned long aad_len, 153 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
151 u8 *auth_tag, unsigned long auth_tag_len); 154 u8 *auth_tag, unsigned long auth_tag_len);
152 155
156
157#ifdef CONFIG_AS_AVX
158/*
159 * asmlinkage void aesni_gcm_precomp_avx_gen2()
160 * gcm_data *my_ctx_data, context data
161 * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
162 */
163asmlinkage void aesni_gcm_precomp_avx_gen2(void *my_ctx_data, u8 *hash_subkey);
164
165asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx, u8 *out,
166 const u8 *in, unsigned long plaintext_len, u8 *iv,
167 const u8 *aad, unsigned long aad_len,
168 u8 *auth_tag, unsigned long auth_tag_len);
169
170asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out,
171 const u8 *in, unsigned long ciphertext_len, u8 *iv,
172 const u8 *aad, unsigned long aad_len,
173 u8 *auth_tag, unsigned long auth_tag_len);
174
175static void aesni_gcm_enc_avx(void *ctx, u8 *out,
176 const u8 *in, unsigned long plaintext_len, u8 *iv,
177 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
178 u8 *auth_tag, unsigned long auth_tag_len)
179{
180 if (plaintext_len < AVX_GEN2_OPTSIZE) {
181 aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
182 aad_len, auth_tag, auth_tag_len);
183 } else {
184 aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
185 aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
186 aad_len, auth_tag, auth_tag_len);
187 }
188}
189
190static void aesni_gcm_dec_avx(void *ctx, u8 *out,
191 const u8 *in, unsigned long ciphertext_len, u8 *iv,
192 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
193 u8 *auth_tag, unsigned long auth_tag_len)
194{
195 if (ciphertext_len < AVX_GEN2_OPTSIZE) {
196 aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad,
197 aad_len, auth_tag, auth_tag_len);
198 } else {
199 aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
200 aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
201 aad_len, auth_tag, auth_tag_len);
202 }
203}
204#endif
205
206#ifdef CONFIG_AS_AVX2
207/*
208 * asmlinkage void aesni_gcm_precomp_avx_gen4()
209 * gcm_data *my_ctx_data, context data
210 * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
211 */
212asmlinkage void aesni_gcm_precomp_avx_gen4(void *my_ctx_data, u8 *hash_subkey);
213
214asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx, u8 *out,
215 const u8 *in, unsigned long plaintext_len, u8 *iv,
216 const u8 *aad, unsigned long aad_len,
217 u8 *auth_tag, unsigned long auth_tag_len);
218
219asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out,
220 const u8 *in, unsigned long ciphertext_len, u8 *iv,
221 const u8 *aad, unsigned long aad_len,
222 u8 *auth_tag, unsigned long auth_tag_len);
223
224static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
225 const u8 *in, unsigned long plaintext_len, u8 *iv,
226 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
227 u8 *auth_tag, unsigned long auth_tag_len)
228{
229 if (plaintext_len < AVX_GEN2_OPTSIZE) {
230 aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
231 aad_len, auth_tag, auth_tag_len);
232 } else if (plaintext_len < AVX_GEN4_OPTSIZE) {
233 aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
234 aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
235 aad_len, auth_tag, auth_tag_len);
236 } else {
237 aesni_gcm_precomp_avx_gen4(ctx, hash_subkey);
238 aesni_gcm_enc_avx_gen4(ctx, out, in, plaintext_len, iv, aad,
239 aad_len, auth_tag, auth_tag_len);
240 }
241}
242
243static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
244 const u8 *in, unsigned long ciphertext_len, u8 *iv,
245 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
246 u8 *auth_tag, unsigned long auth_tag_len)
247{
248 if (ciphertext_len < AVX_GEN2_OPTSIZE) {
249 aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey,
250 aad, aad_len, auth_tag, auth_tag_len);
251 } else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
252 aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
253 aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
254 aad_len, auth_tag, auth_tag_len);
255 } else {
256 aesni_gcm_precomp_avx_gen4(ctx, hash_subkey);
257 aesni_gcm_dec_avx_gen4(ctx, out, in, ciphertext_len, iv, aad,
258 aad_len, auth_tag, auth_tag_len);
259 }
260}
261#endif
262
263static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out,
264 const u8 *in, unsigned long plaintext_len, u8 *iv,
265 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
266 u8 *auth_tag, unsigned long auth_tag_len);
267
268static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out,
269 const u8 *in, unsigned long ciphertext_len, u8 *iv,
270 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
271 u8 *auth_tag, unsigned long auth_tag_len);
272
153static inline struct 273static inline struct
154aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) 274aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
155{ 275{
@@ -915,7 +1035,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
915 dst = src; 1035 dst = src;
916 } 1036 }
917 1037
918 aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, 1038 aesni_gcm_enc_tfm(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv,
919 ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst 1039 ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst
920 + ((unsigned long)req->cryptlen), auth_tag_len); 1040 + ((unsigned long)req->cryptlen), auth_tag_len);
921 1041
@@ -996,12 +1116,12 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
996 dst = src; 1116 dst = src;
997 } 1117 }
998 1118
999 aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv, 1119 aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
1000 ctx->hash_subkey, assoc, (unsigned long)req->assoclen, 1120 ctx->hash_subkey, assoc, (unsigned long)req->assoclen,
1001 authTag, auth_tag_len); 1121 authTag, auth_tag_len);
1002 1122
1003 /* Compare generated tag with passed in tag. */ 1123 /* Compare generated tag with passed in tag. */
1004 retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ? 1124 retval = crypto_memneq(src + tempCipherLen, authTag, auth_tag_len) ?
1005 -EBADMSG : 0; 1125 -EBADMSG : 0;
1006 1126
1007 if (one_entry_in_sg) { 1127 if (one_entry_in_sg) {
@@ -1353,6 +1473,27 @@ static int __init aesni_init(void)
1353 1473
1354 if (!x86_match_cpu(aesni_cpu_id)) 1474 if (!x86_match_cpu(aesni_cpu_id))
1355 return -ENODEV; 1475 return -ENODEV;
1476#ifdef CONFIG_X86_64
1477#ifdef CONFIG_AS_AVX2
1478 if (boot_cpu_has(X86_FEATURE_AVX2)) {
1479 pr_info("AVX2 version of gcm_enc/dec engaged.\n");
1480 aesni_gcm_enc_tfm = aesni_gcm_enc_avx2;
1481 aesni_gcm_dec_tfm = aesni_gcm_dec_avx2;
1482 } else
1483#endif
1484#ifdef CONFIG_AS_AVX
1485 if (boot_cpu_has(X86_FEATURE_AVX)) {
1486 pr_info("AVX version of gcm_enc/dec engaged.\n");
1487 aesni_gcm_enc_tfm = aesni_gcm_enc_avx;
1488 aesni_gcm_dec_tfm = aesni_gcm_dec_avx;
1489 } else
1490#endif
1491 {
1492 pr_info("SSE version of gcm_enc/dec engaged.\n");
1493 aesni_gcm_enc_tfm = aesni_gcm_enc;
1494 aesni_gcm_dec_tfm = aesni_gcm_dec;
1495 }
1496#endif
1356 1497
1357 err = crypto_fpu_init(); 1498 err = crypto_fpu_init();
1358 if (err) 1499 if (err)
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index fd8f9e2ca35f..535192f6bfad 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -13,7 +13,9 @@ static __always_inline __init void *dmi_alloc(unsigned len)
13} 13}
14 14
15/* Use early IO mappings for DMI because it's initialized early */ 15/* Use early IO mappings for DMI because it's initialized early */
16#define dmi_ioremap early_ioremap 16#define dmi_early_remap early_ioremap
17#define dmi_iounmap early_iounmap 17#define dmi_early_unmap early_iounmap
18#define dmi_remap ioremap
19#define dmi_unmap iounmap
18 20
19#endif /* _ASM_X86_DMI_H */ 21#endif /* _ASM_X86_DMI_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index e846225265ed..7252cd339175 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -175,64 +175,7 @@ static inline void __set_fixmap(enum fixed_addresses idx,
175} 175}
176#endif 176#endif
177 177
178#define set_fixmap(idx, phys) \ 178#include <asm-generic/fixmap.h>
179 __set_fixmap(idx, phys, PAGE_KERNEL)
180
181/*
182 * Some hardware wants to get fixmapped without caching.
183 */
184#define set_fixmap_nocache(idx, phys) \
185 __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
186
187#define clear_fixmap(idx) \
188 __set_fixmap(idx, 0, __pgprot(0))
189
190#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
191#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
192
193extern void __this_fixmap_does_not_exist(void);
194
195/*
196 * 'index to address' translation. If anyone tries to use the idx
197 * directly without translation, we catch the bug with a NULL-deference
198 * kernel oops. Illegal ranges of incoming indices are caught too.
199 */
200static __always_inline unsigned long fix_to_virt(const unsigned int idx)
201{
202 /*
203 * this branch gets completely eliminated after inlining,
204 * except when someone tries to use fixaddr indices in an
205 * illegal way. (such as mixing up address types or using
206 * out-of-range indices).
207 *
208 * If it doesn't get removed, the linker will complain
209 * loudly with a reasonably clear error message..
210 */
211 if (idx >= __end_of_fixed_addresses)
212 __this_fixmap_does_not_exist();
213
214 return __fix_to_virt(idx);
215}
216
217static inline unsigned long virt_to_fix(const unsigned long vaddr)
218{
219 BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
220 return __virt_to_fix(vaddr);
221}
222
223/* Return an pointer with offset calculated */
224static __always_inline unsigned long
225__set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
226{
227 __set_fixmap(idx, phys, flags);
228 return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1));
229}
230
231#define set_fixmap_offset(idx, phys) \
232 __set_fixmap_offset(idx, phys, PAGE_KERNEL)
233
234#define set_fixmap_offset_nocache(idx, phys) \
235 __set_fixmap_offset(idx, phys, PAGE_KERNEL_NOCACHE)
236 179
237#endif /* !__ASSEMBLY__ */ 180#endif /* !__ASSEMBLY__ */
238#endif /* _ASM_X86_FIXMAP_H */ 181#endif /* _ASM_X86_FIXMAP_H */
diff --git a/arch/x86/include/asm/hash.h b/arch/x86/include/asm/hash.h
new file mode 100644
index 000000000000..e8c58f88b1d4
--- /dev/null
+++ b/arch/x86/include/asm/hash.h
@@ -0,0 +1,7 @@
1#ifndef _ASM_X86_HASH_H
2#define _ASM_X86_HASH_H
3
4struct fast_hash_ops;
5extern void setup_arch_fast_hash(struct fast_hash_ops *ops);
6
7#endif /* _ASM_X86_HASH_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ae5d7830855c..fdf83afbb7d9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -605,6 +605,7 @@ struct kvm_arch {
605 /* fields used by HYPER-V emulation */ 605 /* fields used by HYPER-V emulation */
606 u64 hv_guest_os_id; 606 u64 hv_guest_os_id;
607 u64 hv_hypercall; 607 u64 hv_hypercall;
608 u64 hv_tsc_page;
608 609
609 #ifdef CONFIG_KVM_MMU_AUDIT 610 #ifdef CONFIG_KVM_MMU_AUDIT
610 int audit_point; 611 int audit_point;
@@ -699,6 +700,8 @@ struct kvm_x86_ops {
699 void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); 700 void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
700 void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); 701 void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
701 void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); 702 void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
703 u64 (*get_dr6)(struct kvm_vcpu *vcpu);
704 void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
702 void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); 705 void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
703 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); 706 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
704 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); 707 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 1df115909758..c7678e43465b 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -85,28 +85,9 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
85 return ret; 85 return ret;
86} 86}
87 87
88static inline uint32_t kvm_cpuid_base(void)
89{
90 if (boot_cpu_data.cpuid_level < 0)
91 return 0; /* So we don't blow up on old processors */
92
93 if (cpu_has_hypervisor)
94 return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
95
96 return 0;
97}
98
99static inline bool kvm_para_available(void)
100{
101 return kvm_cpuid_base() != 0;
102}
103
104static inline unsigned int kvm_arch_para_features(void)
105{
106 return cpuid_eax(KVM_CPUID_FEATURES);
107}
108
109#ifdef CONFIG_KVM_GUEST 88#ifdef CONFIG_KVM_GUEST
89bool kvm_para_available(void);
90unsigned int kvm_arch_para_features(void);
110void __init kvm_guest_init(void); 91void __init kvm_guest_init(void);
111void kvm_async_pf_task_wait(u32 token); 92void kvm_async_pf_task_wait(u32 token);
112void kvm_async_pf_task_wake(u32 token); 93void kvm_async_pf_task_wake(u32 token);
@@ -126,6 +107,16 @@ static inline void kvm_spinlock_init(void)
126#define kvm_async_pf_task_wait(T) do {} while(0) 107#define kvm_async_pf_task_wait(T) do {} while(0)
127#define kvm_async_pf_task_wake(T) do {} while(0) 108#define kvm_async_pf_task_wake(T) do {} while(0)
128 109
110static inline bool kvm_para_available(void)
111{
112 return 0;
113}
114
115static inline unsigned int kvm_arch_para_features(void)
116{
117 return 0;
118}
119
129static inline u32 kvm_read_and_reset_pf_reason(void) 120static inline u32 kvm_read_and_reset_pf_reason(void)
130{ 121{
131 return 0; 122 return 0;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 401f350ef71b..cd6e1610e29e 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -781,9 +781,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
781 */ 781 */
782#define PV_CALLEE_SAVE_REGS_THUNK(func) \ 782#define PV_CALLEE_SAVE_REGS_THUNK(func) \
783 extern typeof(func) __raw_callee_save_##func; \ 783 extern typeof(func) __raw_callee_save_##func; \
784 static void *__##func##__ __used = func; \
785 \ 784 \
786 asm(".pushsection .text;" \ 785 asm(".pushsection .text;" \
786 ".globl __raw_callee_save_" #func " ; " \
787 "__raw_callee_save_" #func ": " \ 787 "__raw_callee_save_" #func ": " \
788 PV_SAVE_ALL_CALLER_REGS \ 788 PV_SAVE_ALL_CALLER_REGS \
789 "call " #func ";" \ 789 "call " #func ";" \
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index aab8f671b523..7549b8b369e4 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -388,10 +388,11 @@ extern struct pv_lock_ops pv_lock_ops;
388 _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") 388 _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
389 389
390/* Simple instruction patching code. */ 390/* Simple instruction patching code. */
391#define DEF_NATIVE(ops, name, code) \ 391#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
392 extern const char start_##ops##_##name[] __visible, \ 392
393 end_##ops##_##name[] __visible; \ 393#define DEF_NATIVE(ops, name, code) \
394 asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") 394 __visible extern const char start_##ops##_##name[], end_##ops##_##name[]; \
395 asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name))
395 396
396unsigned paravirt_patch_nop(void); 397unsigned paravirt_patch_nop(void);
397unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); 398unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 947b5c417e83..1ac6114c9ea5 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -104,7 +104,7 @@ extern void pci_iommu_alloc(void);
104struct msi_desc; 104struct msi_desc;
105int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); 105int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
106void native_teardown_msi_irq(unsigned int irq); 106void native_teardown_msi_irq(unsigned int irq);
107void native_restore_msi_irqs(struct pci_dev *dev, int irq); 107void native_restore_msi_irqs(struct pci_dev *dev);
108int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, 108int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
109 unsigned int irq_base, unsigned int irq_offset); 109 unsigned int irq_base, unsigned int irq_offset);
110#else 110#else
@@ -125,7 +125,6 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
125 125
126/* generic pci stuff */ 126/* generic pci stuff */
127#include <asm-generic/pci.h> 127#include <asm-generic/pci.h>
128#define PCIBIOS_MAX_MEM_32 0xffffffff
129 128
130#ifdef CONFIG_NUMA 129#ifdef CONFIG_NUMA
131/* Returns the node based on pci bus */ 130/* Returns the node based on pci bus */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index a83aa44bb1fb..1aa9ccd43223 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -121,7 +121,8 @@
121 121
122/* Set of bits not changed in pte_modify */ 122/* Set of bits not changed in pte_modify */
123#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ 123#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
124 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) 124 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
125 _PAGE_SOFT_DIRTY)
125#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) 126#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
126 127
127#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) 128#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 3ba3de457d05..e1940c06ed02 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -163,9 +163,11 @@ struct thread_info {
163 */ 163 */
164#ifndef __ASSEMBLY__ 164#ifndef __ASSEMBLY__
165 165
166 166#define current_stack_pointer ({ \
167/* how to get the current stack pointer from C */ 167 unsigned long sp; \
168register unsigned long current_stack_pointer asm("esp") __used; 168 asm("mov %%esp,%0" : "=g" (sp)); \
169 sp; \
170})
169 171
170/* how to get the thread information struct from C */ 172/* how to get the thread information struct from C */
171static inline struct thread_info *current_thread_info(void) 173static inline struct thread_info *current_thread_info(void)
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 6b964a0b86d1..062921ef34e9 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -12,7 +12,6 @@ extern enum uv_system_type get_uv_system_type(void);
12extern int is_uv_system(void); 12extern int is_uv_system(void);
13extern void uv_cpu_init(void); 13extern void uv_cpu_init(void);
14extern void uv_nmi_init(void); 14extern void uv_nmi_init(void);
15extern void uv_register_nmi_notifier(void);
16extern void uv_system_init(void); 15extern void uv_system_init(void);
17extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, 16extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
18 struct mm_struct *mm, 17 struct mm_struct *mm,
@@ -26,7 +25,6 @@ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
26static inline int is_uv_system(void) { return 0; } 25static inline int is_uv_system(void) { return 0; }
27static inline void uv_cpu_init(void) { } 26static inline void uv_cpu_init(void) { }
28static inline void uv_system_init(void) { } 27static inline void uv_system_init(void) { }
29static inline void uv_register_nmi_notifier(void) { }
30static inline const struct cpumask * 28static inline const struct cpumask *
31uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, 29uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
32 unsigned long start, unsigned long end, unsigned int cpu) 30 unsigned long start, unsigned long end, unsigned int cpu)
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 966502d4682e..2067264fb7f5 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -100,6 +100,7 @@
100 100
101#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f 101#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
102#define VMX_MISC_SAVE_EFER_LMA 0x00000020 102#define VMX_MISC_SAVE_EFER_LMA 0x00000020
103#define VMX_MISC_ACTIVITY_HLT 0x00000040
103 104
104/* VMCS Encodings */ 105/* VMCS Encodings */
105enum vmcs_field { 106enum vmcs_field {
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 0f1be11e43d2..e45e4da96bf1 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -181,7 +181,7 @@ struct x86_msi_ops {
181 u8 hpet_id); 181 u8 hpet_id);
182 void (*teardown_msi_irq)(unsigned int irq); 182 void (*teardown_msi_irq)(unsigned int irq);
183 void (*teardown_msi_irqs)(struct pci_dev *dev); 183 void (*teardown_msi_irqs)(struct pci_dev *dev);
184 void (*restore_msi_irqs)(struct pci_dev *dev, int irq); 184 void (*restore_msi_irqs)(struct pci_dev *dev);
185 int (*setup_hpet_msi)(unsigned int irq, unsigned int id); 185 int (*setup_hpet_msi)(unsigned int irq, unsigned int id);
186 u32 (*msi_mask_irq)(struct msi_desc *desc, u32 mask, u32 flag); 186 u32 (*msi_mask_irq)(struct msi_desc *desc, u32 mask, u32 flag);
187 u32 (*msix_mask_irq)(struct msi_desc *desc, u32 flag); 187 u32 (*msix_mask_irq)(struct msi_desc *desc, u32 flag);
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index b913915e8e63..787e1bb5aafc 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -52,7 +52,8 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
52extern int m2p_add_override(unsigned long mfn, struct page *page, 52extern int m2p_add_override(unsigned long mfn, struct page *page,
53 struct gnttab_map_grant_ref *kmap_op); 53 struct gnttab_map_grant_ref *kmap_op);
54extern int m2p_remove_override(struct page *page, 54extern int m2p_remove_override(struct page *page,
55 struct gnttab_map_grant_ref *kmap_op); 55 struct gnttab_map_grant_ref *kmap_op,
56 unsigned long mfn);
56extern struct page *m2p_find_override(unsigned long mfn); 57extern struct page *m2p_find_override(unsigned long mfn);
57extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); 58extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
58 59
@@ -121,7 +122,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
121 pfn = m2p_find_override_pfn(mfn, ~0); 122 pfn = m2p_find_override_pfn(mfn, ~0);
122 } 123 }
123 124
124 /* 125 /*
125 * pfn is ~0 if there are no entries in the m2p for mfn or if the 126 * pfn is ~0 if there are no entries in the m2p for mfn or if the
126 * entry doesn't map back to the mfn and m2p_override doesn't have a 127 * entry doesn't map back to the mfn and m2p_override doesn't have a
127 * valid entry for it. 128 * valid entry for it.
@@ -167,7 +168,12 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
167 */ 168 */
168static inline unsigned long mfn_to_local_pfn(unsigned long mfn) 169static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
169{ 170{
170 unsigned long pfn = mfn_to_pfn(mfn); 171 unsigned long pfn;
172
173 if (xen_feature(XENFEAT_auto_translated_physmap))
174 return mfn;
175
176 pfn = mfn_to_pfn(mfn);
171 if (get_phys_to_machine(pfn) != mfn) 177 if (get_phys_to_machine(pfn) != mfn)
172 return -1; /* force !pfn_valid() */ 178 return -1; /* force !pfn_valid() */
173 return pfn; 179 return pfn;
@@ -222,5 +228,6 @@ void make_lowmem_page_readonly(void *vaddr);
222void make_lowmem_page_readwrite(void *vaddr); 228void make_lowmem_page_readwrite(void *vaddr);
223 229
224#define xen_remap(cookie, size) ioremap((cookie), (size)); 230#define xen_remap(cookie, size) ioremap((cookie), (size));
231#define xen_unmap(cookie) iounmap((cookie))
225 232
226#endif /* _ASM_X86_XEN_PAGE_H */ 233#endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index b8f1c0176cbc..462efe746d77 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -28,6 +28,9 @@
28/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ 28/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
29#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) 29#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1)
30 30
31/* A partition's reference time stamp counter (TSC) page */
32#define HV_X64_MSR_REFERENCE_TSC 0x40000021
33
31/* 34/*
32 * There is a single feature flag that signifies the presence of the MSR 35 * There is a single feature flag that signifies the presence of the MSR
33 * that can be used to retrieve both the local APIC Timer frequency as 36 * that can be used to retrieve both the local APIC Timer frequency as
@@ -198,6 +201,9 @@
198#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ 201#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \
199 (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) 202 (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
200 203
204#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
205#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
206
201#define HV_PROCESSOR_POWER_STATE_C0 0 207#define HV_PROCESSOR_POWER_STATE_C0 0
202#define HV_PROCESSOR_POWER_STATE_C1 1 208#define HV_PROCESSOR_POWER_STATE_C1 1
203#define HV_PROCESSOR_POWER_STATE_C2 2 209#define HV_PROCESSOR_POWER_STATE_C2 2
@@ -210,4 +216,11 @@
210#define HV_STATUS_INVALID_ALIGNMENT 4 216#define HV_STATUS_INVALID_ALIGNMENT 4
211#define HV_STATUS_INSUFFICIENT_BUFFERS 19 217#define HV_STATUS_INSUFFICIENT_BUFFERS 19
212 218
219typedef struct _HV_REFERENCE_TSC_PAGE {
220 __u32 tsc_sequence;
221 __u32 res1;
222 __u64 tsc_scale;
223 __s64 tsc_offset;
224} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
225
213#endif 226#endif
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 59cea185ad1d..c19fc60ff062 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -528,6 +528,7 @@
528#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e 528#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
529#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f 529#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
530#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 530#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
531#define MSR_IA32_VMX_VMFUNC 0x00000491
531 532
532/* VMX_BASIC bits and bitmasks */ 533/* VMX_BASIC bits and bitmasks */
533#define VMX_BASIC_VMCS_SIZE_SHIFT 32 534#define VMX_BASIC_VMCS_SIZE_SHIFT 32
diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h
index ee50c801f7b7..cc2d6a3aeae7 100644
--- a/arch/x86/include/uapi/asm/sembuf.h
+++ b/arch/x86/include/uapi/asm/sembuf.h
@@ -13,12 +13,12 @@
13struct semid64_ds { 13struct semid64_ds {
14 struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ 14 struct ipc64_perm sem_perm; /* permissions .. see ipc.h */
15 __kernel_time_t sem_otime; /* last semop time */ 15 __kernel_time_t sem_otime; /* last semop time */
16 unsigned long __unused1; 16 __kernel_ulong_t __unused1;
17 __kernel_time_t sem_ctime; /* last change time */ 17 __kernel_time_t sem_ctime; /* last change time */
18 unsigned long __unused2; 18 __kernel_ulong_t __unused2;
19 unsigned long sem_nsems; /* no. of semaphores in array */ 19 __kernel_ulong_t sem_nsems; /* no. of semaphores in array */
20 unsigned long __unused3; 20 __kernel_ulong_t __unused3;
21 unsigned long __unused4; 21 __kernel_ulong_t __unused4;
22}; 22};
23 23
24#endif /* _ASM_X86_SEMBUF_H */ 24#endif /* _ASM_X86_SEMBUF_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6c0b43bd024b..1dac94265b59 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -46,7 +46,6 @@
46 46
47#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */ 47#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */
48static int __initdata acpi_force = 0; 48static int __initdata acpi_force = 0;
49u32 acpi_rsdt_forced;
50int acpi_disabled; 49int acpi_disabled;
51EXPORT_SYMBOL(acpi_disabled); 50EXPORT_SYMBOL(acpi_disabled);
52 51
@@ -1034,9 +1033,7 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
1034 1033
1035 if (!acpi_ioapic) 1034 if (!acpi_ioapic)
1036 return 0; 1035 return 0;
1037 if (!dev) 1036 if (!dev || !dev_is_pci(dev))
1038 return 0;
1039 if (dev->bus != &pci_bus_type)
1040 return 0; 1037 return 0;
1041 1038
1042 pdev = to_pci_dev(dev); 1039 pdev = to_pci_dev(dev);
@@ -1564,7 +1561,7 @@ static int __init parse_acpi(char *arg)
1564 } 1561 }
1565 /* acpi=rsdt use RSDT instead of XSDT */ 1562 /* acpi=rsdt use RSDT instead of XSDT */
1566 else if (strcmp(arg, "rsdt") == 0) { 1563 else if (strcmp(arg, "rsdt") == 0) {
1567 acpi_rsdt_forced = 1; 1564 acpi_gbl_do_not_use_xsdt = TRUE;
1568 } 1565 }
1569 /* "acpi=noirq" disables ACPI interrupt routing */ 1566 /* "acpi=noirq" disables ACPI interrupt routing */
1570 else if (strcmp(arg, "noirq") == 0) { 1567 else if (strcmp(arg, "noirq") == 0) {
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 5d5b9eb2b7a4..2c621a6b901a 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -20,9 +20,7 @@
20#include <asm/apic.h> 20#include <asm/apic.h>
21#include <asm/ipi.h> 21#include <asm/ipi.h>
22 22
23#ifdef CONFIG_ACPI 23#include <linux/acpi.h>
24#include <acpi/acpi_bus.h>
25#endif
26 24
27static struct apic apic_physflat; 25static struct apic apic_physflat;
28static struct apic apic_flat; 26static struct apic apic_flat;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index a43f068ebec1..6ad4658de705 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -37,9 +37,6 @@
37#include <linux/kthread.h> 37#include <linux/kthread.h>
38#include <linux/jiffies.h> /* time_after() */ 38#include <linux/jiffies.h> /* time_after() */
39#include <linux/slab.h> 39#include <linux/slab.h>
40#ifdef CONFIG_ACPI
41#include <acpi/acpi_bus.h>
42#endif
43#include <linux/bootmem.h> 40#include <linux/bootmem.h>
44#include <linux/dmar.h> 41#include <linux/dmar.h>
45#include <linux/hpet.h> 42#include <linux/hpet.h>
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index ad0dc0428baf..d263b1307de1 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -980,7 +980,6 @@ void __init uv_system_init(void)
980 uv_nmi_setup(); 980 uv_nmi_setup();
981 uv_cpu_init(); 981 uv_cpu_init();
982 uv_scir_register_cpu_notifier(); 982 uv_scir_register_cpu_notifier();
983 uv_register_nmi_notifier();
984 proc_mkdir("sgi_uv", NULL); 983 proc_mkdir("sgi_uv", NULL);
985 984
986 /* register Legacy VGA I/O redirection handler */ 985 /* register Legacy VGA I/O redirection handler */
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index e2dbcb7dabdd..83a7995625a6 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -91,7 +91,7 @@ void __init setup_bios_corruption_check(void)
91 91
92 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); 92 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
93 93
94 for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) { 94 for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL) {
95 start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE), 95 start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE),
96 PAGE_SIZE, corruption_check_size); 96 PAGE_SIZE, corruption_check_size);
97 end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE), 97 end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE),
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 4a6ff747aaad..8fffd845e22b 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -433,7 +433,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
433 if (c->x86 >= 0x15) 433 if (c->x86 >= 0x15)
434 snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); 434 snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
435 435
436 if (request_firmware(&fw, (const char *)fw_name, device)) { 436 if (request_firmware_direct(&fw, (const char *)fw_name, device)) {
437 pr_debug("failed to load file %s\n", fw_name); 437 pr_debug("failed to load file %s\n", fw_name);
438 goto out; 438 goto out;
439 } 439 }
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 5fb2cebf556b..a276fa75d9b5 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -278,7 +278,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
278 sprintf(name, "intel-ucode/%02x-%02x-%02x", 278 sprintf(name, "intel-ucode/%02x-%02x-%02x",
279 c->x86, c->x86_model, c->x86_mask); 279 c->x86, c->x86_model, c->x86_mask);
280 280
281 if (request_firmware(&firmware, name, device)) { 281 if (request_firmware_direct(&firmware, name, device)) {
282 pr_debug("data file %s load failed\n", name); 282 pr_debug("data file %s load failed\n", name);
283 return UCODE_NFOUND; 283 return UCODE_NFOUND;
284 } 284 }
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 174da5fc5a7b..988c00a1f60d 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1120,7 +1120,7 @@ void __init memblock_find_dma_reserve(void)
1120 nr_pages += end_pfn - start_pfn; 1120 nr_pages += end_pfn - start_pfn;
1121 } 1121 }
1122 1122
1123 for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) { 1123 for_each_free_mem_range(u, NUMA_NO_NODE, &start, &end, NULL) {
1124 start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN); 1124 start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
1125 end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN); 1125 end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
1126 if (start_pfn < end_pfn) 1126 if (start_pfn < end_pfn)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 6dd802c6d780..713f1b3bad52 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -500,6 +500,38 @@ void __init kvm_guest_init(void)
500#endif 500#endif
501} 501}
502 502
503static noinline uint32_t __kvm_cpuid_base(void)
504{
505 if (boot_cpu_data.cpuid_level < 0)
506 return 0; /* So we don't blow up on old processors */
507
508 if (cpu_has_hypervisor)
509 return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
510
511 return 0;
512}
513
514static inline uint32_t kvm_cpuid_base(void)
515{
516 static int kvm_cpuid_base = -1;
517
518 if (kvm_cpuid_base == -1)
519 kvm_cpuid_base = __kvm_cpuid_base();
520
521 return kvm_cpuid_base;
522}
523
524bool kvm_para_available(void)
525{
526 return kvm_cpuid_base() != 0;
527}
528EXPORT_SYMBOL_GPL(kvm_para_available);
529
530unsigned int kvm_arch_para_features(void)
531{
532 return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES);
533}
534
503static uint32_t __init kvm_detect(void) 535static uint32_t __init kvm_detect(void)
504{ 536{
505 return kvm_cpuid_base(); 537 return kvm_cpuid_base();
@@ -673,7 +705,7 @@ static cpumask_t waiting_cpus;
673/* Track spinlock on which a cpu is waiting */ 705/* Track spinlock on which a cpu is waiting */
674static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting); 706static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting);
675 707
676static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) 708__visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
677{ 709{
678 struct kvm_lock_waiting *w; 710 struct kvm_lock_waiting *w;
679 int cpu; 711 int cpu;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a3acbac2ee72..19e5adb49a27 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -180,7 +180,7 @@ static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data)
180 180
181static void cyc2ns_data_init(struct cyc2ns_data *data) 181static void cyc2ns_data_init(struct cyc2ns_data *data)
182{ 182{
183 data->cyc2ns_mul = 1U << CYC2NS_SCALE_FACTOR; 183 data->cyc2ns_mul = 0;
184 data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; 184 data->cyc2ns_shift = CYC2NS_SCALE_FACTOR;
185 data->cyc2ns_offset = 0; 185 data->cyc2ns_offset = 0;
186 data->__count = 0; 186 data->__count = 0;
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 992f890283e9..f6584a90aba3 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -33,7 +33,7 @@
33 * and vice versa. 33 * and vice versa.
34 */ 34 */
35 35
36static unsigned long vsmp_save_fl(void) 36asmlinkage unsigned long vsmp_save_fl(void)
37{ 37{
38 unsigned long flags = native_save_fl(); 38 unsigned long flags = native_save_fl();
39 39
@@ -43,7 +43,7 @@ static unsigned long vsmp_save_fl(void)
43} 43}
44PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); 44PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl);
45 45
46static void vsmp_restore_fl(unsigned long flags) 46__visible void vsmp_restore_fl(unsigned long flags)
47{ 47{
48 if (flags & X86_EFLAGS_IF) 48 if (flags & X86_EFLAGS_IF)
49 flags &= ~X86_EFLAGS_AC; 49 flags &= ~X86_EFLAGS_AC;
@@ -53,7 +53,7 @@ static void vsmp_restore_fl(unsigned long flags)
53} 53}
54PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); 54PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl);
55 55
56static void vsmp_irq_disable(void) 56asmlinkage void vsmp_irq_disable(void)
57{ 57{
58 unsigned long flags = native_save_fl(); 58 unsigned long flags = native_save_fl();
59 59
@@ -61,7 +61,7 @@ static void vsmp_irq_disable(void)
61} 61}
62PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); 62PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable);
63 63
64static void vsmp_irq_enable(void) 64asmlinkage void vsmp_irq_enable(void)
65{ 65{
66 unsigned long flags = native_save_fl(); 66 unsigned long flags = native_save_fl();
67 67
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 021783b1f46a..e48b674639cc 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -136,9 +136,9 @@ void arch_teardown_msi_irq(unsigned int irq)
136 x86_msi.teardown_msi_irq(irq); 136 x86_msi.teardown_msi_irq(irq);
137} 137}
138 138
139void arch_restore_msi_irqs(struct pci_dev *dev, int irq) 139void arch_restore_msi_irqs(struct pci_dev *dev)
140{ 140{
141 x86_msi.restore_msi_irqs(dev, irq); 141 x86_msi.restore_msi_irqs(dev);
142} 142}
143u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) 143u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
144{ 144{
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b89c5db2b832..287e4c85fff9 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -80,7 +80,7 @@ config KVM_MMU_AUDIT
80 depends on KVM && TRACEPOINTS 80 depends on KVM && TRACEPOINTS
81 ---help--- 81 ---help---
82 This option adds a R/W kVM module parameter 'mmu_audit', which allows 82 This option adds a R/W kVM module parameter 'mmu_audit', which allows
83 audit KVM MMU at runtime. 83 auditing of KVM MMU events at runtime.
84 84
85config KVM_DEVICE_ASSIGNMENT 85config KVM_DEVICE_ASSIGNMENT
86 bool "KVM legacy PCI device assignment support" 86 bool "KVM legacy PCI device assignment support"
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index f1e4895174b2..a2a1bb7ed8c1 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -72,4 +72,12 @@ static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu)
72 return best && (best->ecx & bit(X86_FEATURE_PCID)); 72 return best && (best->ecx & bit(X86_FEATURE_PCID));
73} 73}
74 74
75static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
76{
77 struct kvm_cpuid_entry2 *best;
78
79 best = kvm_find_cpuid_entry(vcpu, 1, 0);
80 return best && (best->ecx & bit(X86_FEATURE_X2APIC));
81}
82
75#endif 83#endif
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 412a5aa0ef94..518d86471b76 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -37,6 +37,7 @@
37 37
38#include "irq.h" 38#include "irq.h"
39#include "i8254.h" 39#include "i8254.h"
40#include "x86.h"
40 41
41#ifndef CONFIG_X86_64 42#ifndef CONFIG_X86_64
42#define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 43#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -349,6 +350,23 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
349 atomic_set(&ps->pending, 0); 350 atomic_set(&ps->pending, 0);
350 ps->irq_ack = 1; 351 ps->irq_ack = 1;
351 352
353 /*
354 * Do not allow the guest to program periodic timers with small
355 * interval, since the hrtimers are not throttled by the host
356 * scheduler.
357 */
358 if (ps->is_periodic) {
359 s64 min_period = min_timer_period_us * 1000LL;
360
361 if (ps->period < min_period) {
362 pr_info_ratelimited(
363 "kvm: requested %lld ns "
364 "i8254 timer period limited to %lld ns\n",
365 ps->period, min_period);
366 ps->period = min_period;
367 }
368 }
369
352 hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), 370 hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval),
353 HRTIMER_MODE_ABS); 371 HRTIMER_MODE_ABS);
354} 372}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 775702f649ca..9736529ade08 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -71,9 +71,6 @@
71#define VEC_POS(v) ((v) & (32 - 1)) 71#define VEC_POS(v) ((v) & (32 - 1))
72#define REG_POS(v) (((v) >> 5) << 4) 72#define REG_POS(v) (((v) >> 5) << 4)
73 73
74static unsigned int min_timer_period_us = 500;
75module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
76
77static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) 74static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
78{ 75{
79 *((u32 *) (apic->regs + reg_off)) = val; 76 *((u32 *) (apic->regs + reg_off)) = val;
@@ -435,7 +432,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
435 u8 val; 432 u8 val;
436 if (pv_eoi_get_user(vcpu, &val) < 0) 433 if (pv_eoi_get_user(vcpu, &val) < 0)
437 apic_debug("Can't read EOI MSR value: 0x%llx\n", 434 apic_debug("Can't read EOI MSR value: 0x%llx\n",
438 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 435 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
439 return val & 0x1; 436 return val & 0x1;
440} 437}
441 438
@@ -443,7 +440,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
443{ 440{
444 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 441 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
445 apic_debug("Can't set EOI MSR value: 0x%llx\n", 442 apic_debug("Can't set EOI MSR value: 0x%llx\n",
446 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 443 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
447 return; 444 return;
448 } 445 }
449 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 446 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
@@ -453,7 +450,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
453{ 450{
454 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 451 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
455 apic_debug("Can't clear EOI MSR value: 0x%llx\n", 452 apic_debug("Can't clear EOI MSR value: 0x%llx\n",
456 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 453 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
457 return; 454 return;
458 } 455 }
459 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 456 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index c8b0d0d2da5c..6a11845fd8b9 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -65,7 +65,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
65 struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); 65 struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
66 66
67u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); 67u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
68void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); 68int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
69void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, 69void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
70 struct kvm_lapic_state *s); 70 struct kvm_lapic_state *s);
71int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); 71int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 40772ef0f2b1..e50425d0f5f7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2659,6 +2659,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2659 int emulate = 0; 2659 int emulate = 0;
2660 gfn_t pseudo_gfn; 2660 gfn_t pseudo_gfn;
2661 2661
2662 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
2663 return 0;
2664
2662 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { 2665 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
2663 if (iterator.level == level) { 2666 if (iterator.level == level) {
2664 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, 2667 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
@@ -2829,6 +2832,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2829 bool ret = false; 2832 bool ret = false;
2830 u64 spte = 0ull; 2833 u64 spte = 0ull;
2831 2834
2835 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
2836 return false;
2837
2832 if (!page_fault_can_be_fast(error_code)) 2838 if (!page_fault_can_be_fast(error_code))
2833 return false; 2839 return false;
2834 2840
@@ -3224,6 +3230,9 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
3224 struct kvm_shadow_walk_iterator iterator; 3230 struct kvm_shadow_walk_iterator iterator;
3225 u64 spte = 0ull; 3231 u64 spte = 0ull;
3226 3232
3233 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
3234 return spte;
3235
3227 walk_shadow_page_lockless_begin(vcpu); 3236 walk_shadow_page_lockless_begin(vcpu);
3228 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) 3237 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
3229 if (!is_shadow_present_pte(spte)) 3238 if (!is_shadow_present_pte(spte))
@@ -4510,6 +4519,9 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
4510 u64 spte; 4519 u64 spte;
4511 int nr_sptes = 0; 4520 int nr_sptes = 0;
4512 4521
4522 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
4523 return nr_sptes;
4524
4513 walk_shadow_page_lockless_begin(vcpu); 4525 walk_shadow_page_lockless_begin(vcpu);
4514 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { 4526 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
4515 sptes[iterator.level-1] = spte; 4527 sptes[iterator.level-1] = spte;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ad75d77999d0..cba218a2f08d 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -569,6 +569,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
569 if (FNAME(gpte_changed)(vcpu, gw, top_level)) 569 if (FNAME(gpte_changed)(vcpu, gw, top_level))
570 goto out_gpte_changed; 570 goto out_gpte_changed;
571 571
572 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
573 goto out_gpte_changed;
574
572 for (shadow_walk_init(&it, vcpu, addr); 575 for (shadow_walk_init(&it, vcpu, addr);
573 shadow_walk_okay(&it) && it.level > gw->level; 576 shadow_walk_okay(&it) && it.level > gw->level;
574 shadow_walk_next(&it)) { 577 shadow_walk_next(&it)) {
@@ -820,6 +823,11 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
820 */ 823 */
821 mmu_topup_memory_caches(vcpu); 824 mmu_topup_memory_caches(vcpu);
822 825
826 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) {
827 WARN_ON(1);
828 return;
829 }
830
823 spin_lock(&vcpu->kvm->mmu_lock); 831 spin_lock(&vcpu->kvm->mmu_lock);
824 for_each_shadow_entry(vcpu, gva, iterator) { 832 for_each_shadow_entry(vcpu, gva, iterator) {
825 level = iterator.level; 833 level = iterator.level;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c7168a5cff1b..e81df8fce027 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1671,6 +1671,19 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1671 mark_dirty(svm->vmcb, VMCB_ASID); 1671 mark_dirty(svm->vmcb, VMCB_ASID);
1672} 1672}
1673 1673
1674static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
1675{
1676 return to_svm(vcpu)->vmcb->save.dr6;
1677}
1678
1679static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
1680{
1681 struct vcpu_svm *svm = to_svm(vcpu);
1682
1683 svm->vmcb->save.dr6 = value;
1684 mark_dirty(svm->vmcb, VMCB_DR);
1685}
1686
1674static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) 1687static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1675{ 1688{
1676 struct vcpu_svm *svm = to_svm(vcpu); 1689 struct vcpu_svm *svm = to_svm(vcpu);
@@ -4286,6 +4299,8 @@ static struct kvm_x86_ops svm_x86_ops = {
4286 .set_idt = svm_set_idt, 4299 .set_idt = svm_set_idt,
4287 .get_gdt = svm_get_gdt, 4300 .get_gdt = svm_get_gdt,
4288 .set_gdt = svm_set_gdt, 4301 .set_gdt = svm_set_gdt,
4302 .get_dr6 = svm_get_dr6,
4303 .set_dr6 = svm_set_dr6,
4289 .set_dr7 = svm_set_dr7, 4304 .set_dr7 = svm_set_dr7,
4290 .cache_reg = svm_cache_reg, 4305 .cache_reg = svm_cache_reg,
4291 .get_rflags = svm_get_rflags, 4306 .get_rflags = svm_get_rflags,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index da7837e1349d..a06f101ef64b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -418,6 +418,8 @@ struct vcpu_vmx {
418 u64 msr_host_kernel_gs_base; 418 u64 msr_host_kernel_gs_base;
419 u64 msr_guest_kernel_gs_base; 419 u64 msr_guest_kernel_gs_base;
420#endif 420#endif
421 u32 vm_entry_controls_shadow;
422 u32 vm_exit_controls_shadow;
421 /* 423 /*
422 * loaded_vmcs points to the VMCS currently used in this vcpu. For a 424 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
423 * non-nested (L1) guest, it always points to vmcs01. For a nested 425 * non-nested (L1) guest, it always points to vmcs01. For a nested
@@ -1056,7 +1058,9 @@ static inline bool is_exception(u32 intr_info)
1056 == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); 1058 == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
1057} 1059}
1058 1060
1059static void nested_vmx_vmexit(struct kvm_vcpu *vcpu); 1061static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
1062 u32 exit_intr_info,
1063 unsigned long exit_qualification);
1060static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, 1064static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
1061 struct vmcs12 *vmcs12, 1065 struct vmcs12 *vmcs12,
1062 u32 reason, unsigned long qualification); 1066 u32 reason, unsigned long qualification);
@@ -1326,6 +1330,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask)
1326 vmcs_writel(field, vmcs_readl(field) | mask); 1330 vmcs_writel(field, vmcs_readl(field) | mask);
1327} 1331}
1328 1332
1333static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
1334{
1335 vmcs_write32(VM_ENTRY_CONTROLS, val);
1336 vmx->vm_entry_controls_shadow = val;
1337}
1338
1339static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
1340{
1341 if (vmx->vm_entry_controls_shadow != val)
1342 vm_entry_controls_init(vmx, val);
1343}
1344
1345static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
1346{
1347 return vmx->vm_entry_controls_shadow;
1348}
1349
1350
1351static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
1352{
1353 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
1354}
1355
1356static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
1357{
1358 vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
1359}
1360
1361static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
1362{
1363 vmcs_write32(VM_EXIT_CONTROLS, val);
1364 vmx->vm_exit_controls_shadow = val;
1365}
1366
1367static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
1368{
1369 if (vmx->vm_exit_controls_shadow != val)
1370 vm_exit_controls_init(vmx, val);
1371}
1372
1373static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
1374{
1375 return vmx->vm_exit_controls_shadow;
1376}
1377
1378
1379static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
1380{
1381 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
1382}
1383
1384static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
1385{
1386 vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
1387}
1388
1329static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) 1389static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
1330{ 1390{
1331 vmx->segment_cache.bitmask = 0; 1391 vmx->segment_cache.bitmask = 0;
@@ -1410,11 +1470,11 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
1410 vmcs_write32(EXCEPTION_BITMAP, eb); 1470 vmcs_write32(EXCEPTION_BITMAP, eb);
1411} 1471}
1412 1472
1413static void clear_atomic_switch_msr_special(unsigned long entry, 1473static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1414 unsigned long exit) 1474 unsigned long entry, unsigned long exit)
1415{ 1475{
1416 vmcs_clear_bits(VM_ENTRY_CONTROLS, entry); 1476 vm_entry_controls_clearbit(vmx, entry);
1417 vmcs_clear_bits(VM_EXIT_CONTROLS, exit); 1477 vm_exit_controls_clearbit(vmx, exit);
1418} 1478}
1419 1479
1420static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) 1480static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
@@ -1425,14 +1485,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
1425 switch (msr) { 1485 switch (msr) {
1426 case MSR_EFER: 1486 case MSR_EFER:
1427 if (cpu_has_load_ia32_efer) { 1487 if (cpu_has_load_ia32_efer) {
1428 clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, 1488 clear_atomic_switch_msr_special(vmx,
1489 VM_ENTRY_LOAD_IA32_EFER,
1429 VM_EXIT_LOAD_IA32_EFER); 1490 VM_EXIT_LOAD_IA32_EFER);
1430 return; 1491 return;
1431 } 1492 }
1432 break; 1493 break;
1433 case MSR_CORE_PERF_GLOBAL_CTRL: 1494 case MSR_CORE_PERF_GLOBAL_CTRL:
1434 if (cpu_has_load_perf_global_ctrl) { 1495 if (cpu_has_load_perf_global_ctrl) {
1435 clear_atomic_switch_msr_special( 1496 clear_atomic_switch_msr_special(vmx,
1436 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, 1497 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1437 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); 1498 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
1438 return; 1499 return;
@@ -1453,14 +1514,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
1453 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); 1514 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
1454} 1515}
1455 1516
1456static void add_atomic_switch_msr_special(unsigned long entry, 1517static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1457 unsigned long exit, unsigned long guest_val_vmcs, 1518 unsigned long entry, unsigned long exit,
1458 unsigned long host_val_vmcs, u64 guest_val, u64 host_val) 1519 unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
1520 u64 guest_val, u64 host_val)
1459{ 1521{
1460 vmcs_write64(guest_val_vmcs, guest_val); 1522 vmcs_write64(guest_val_vmcs, guest_val);
1461 vmcs_write64(host_val_vmcs, host_val); 1523 vmcs_write64(host_val_vmcs, host_val);
1462 vmcs_set_bits(VM_ENTRY_CONTROLS, entry); 1524 vm_entry_controls_setbit(vmx, entry);
1463 vmcs_set_bits(VM_EXIT_CONTROLS, exit); 1525 vm_exit_controls_setbit(vmx, exit);
1464} 1526}
1465 1527
1466static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, 1528static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
@@ -1472,7 +1534,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1472 switch (msr) { 1534 switch (msr) {
1473 case MSR_EFER: 1535 case MSR_EFER:
1474 if (cpu_has_load_ia32_efer) { 1536 if (cpu_has_load_ia32_efer) {
1475 add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, 1537 add_atomic_switch_msr_special(vmx,
1538 VM_ENTRY_LOAD_IA32_EFER,
1476 VM_EXIT_LOAD_IA32_EFER, 1539 VM_EXIT_LOAD_IA32_EFER,
1477 GUEST_IA32_EFER, 1540 GUEST_IA32_EFER,
1478 HOST_IA32_EFER, 1541 HOST_IA32_EFER,
@@ -1482,7 +1545,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1482 break; 1545 break;
1483 case MSR_CORE_PERF_GLOBAL_CTRL: 1546 case MSR_CORE_PERF_GLOBAL_CTRL:
1484 if (cpu_has_load_perf_global_ctrl) { 1547 if (cpu_has_load_perf_global_ctrl) {
1485 add_atomic_switch_msr_special( 1548 add_atomic_switch_msr_special(vmx,
1486 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, 1549 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1487 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, 1550 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
1488 GUEST_IA32_PERF_GLOBAL_CTRL, 1551 GUEST_IA32_PERF_GLOBAL_CTRL,
@@ -1906,7 +1969,9 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
1906 if (!(vmcs12->exception_bitmap & (1u << nr))) 1969 if (!(vmcs12->exception_bitmap & (1u << nr)))
1907 return 0; 1970 return 0;
1908 1971
1909 nested_vmx_vmexit(vcpu); 1972 nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
1973 vmcs_read32(VM_EXIT_INTR_INFO),
1974 vmcs_readl(EXIT_QUALIFICATION));
1910 return 1; 1975 return 1;
1911} 1976}
1912 1977
@@ -2279,6 +2344,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2279 rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); 2344 rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
2280 nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | 2345 nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
2281 VMX_MISC_SAVE_EFER_LMA; 2346 VMX_MISC_SAVE_EFER_LMA;
2347 nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT;
2282 nested_vmx_misc_high = 0; 2348 nested_vmx_misc_high = 0;
2283} 2349}
2284 2350
@@ -2295,32 +2361,10 @@ static inline u64 vmx_control_msr(u32 low, u32 high)
2295 return low | ((u64)high << 32); 2361 return low | ((u64)high << 32);
2296} 2362}
2297 2363
2298/* 2364/* Returns 0 on success, non-0 otherwise. */
2299 * If we allow our guest to use VMX instructions (i.e., nested VMX), we should
2300 * also let it use VMX-specific MSRs.
2301 * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a
2302 * VMX-specific MSR, or 0 when we haven't (and the caller should handle it
2303 * like all other MSRs).
2304 */
2305static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) 2365static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2306{ 2366{
2307 if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC &&
2308 msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) {
2309 /*
2310 * According to the spec, processors which do not support VMX
2311 * should throw a #GP(0) when VMX capability MSRs are read.
2312 */
2313 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
2314 return 1;
2315 }
2316
2317 switch (msr_index) { 2367 switch (msr_index) {
2318 case MSR_IA32_FEATURE_CONTROL:
2319 if (nested_vmx_allowed(vcpu)) {
2320 *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control;
2321 break;
2322 }
2323 return 0;
2324 case MSR_IA32_VMX_BASIC: 2368 case MSR_IA32_VMX_BASIC:
2325 /* 2369 /*
2326 * This MSR reports some information about VMX support. We 2370 * This MSR reports some information about VMX support. We
@@ -2387,34 +2431,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2387 *pdata = nested_vmx_ept_caps; 2431 *pdata = nested_vmx_ept_caps;
2388 break; 2432 break;
2389 default: 2433 default:
2390 return 0;
2391 }
2392
2393 return 1;
2394}
2395
2396static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2397{
2398 u32 msr_index = msr_info->index;
2399 u64 data = msr_info->data;
2400 bool host_initialized = msr_info->host_initiated;
2401
2402 if (!nested_vmx_allowed(vcpu))
2403 return 0;
2404
2405 if (msr_index == MSR_IA32_FEATURE_CONTROL) {
2406 if (!host_initialized &&
2407 to_vmx(vcpu)->nested.msr_ia32_feature_control
2408 & FEATURE_CONTROL_LOCKED)
2409 return 0;
2410 to_vmx(vcpu)->nested.msr_ia32_feature_control = data;
2411 return 1; 2434 return 1;
2412 } 2435 }
2413 2436
2414 /*
2415 * No need to treat VMX capability MSRs specially: If we don't handle
2416 * them, handle_wrmsr will #GP(0), which is correct (they are readonly)
2417 */
2418 return 0; 2437 return 0;
2419} 2438}
2420 2439
@@ -2460,13 +2479,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2460 case MSR_IA32_SYSENTER_ESP: 2479 case MSR_IA32_SYSENTER_ESP:
2461 data = vmcs_readl(GUEST_SYSENTER_ESP); 2480 data = vmcs_readl(GUEST_SYSENTER_ESP);
2462 break; 2481 break;
2482 case MSR_IA32_FEATURE_CONTROL:
2483 if (!nested_vmx_allowed(vcpu))
2484 return 1;
2485 data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
2486 break;
2487 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2488 if (!nested_vmx_allowed(vcpu))
2489 return 1;
2490 return vmx_get_vmx_msr(vcpu, msr_index, pdata);
2463 case MSR_TSC_AUX: 2491 case MSR_TSC_AUX:
2464 if (!to_vmx(vcpu)->rdtscp_enabled) 2492 if (!to_vmx(vcpu)->rdtscp_enabled)
2465 return 1; 2493 return 1;
2466 /* Otherwise falls through */ 2494 /* Otherwise falls through */
2467 default: 2495 default:
2468 if (vmx_get_vmx_msr(vcpu, msr_index, pdata))
2469 return 0;
2470 msr = find_msr_entry(to_vmx(vcpu), msr_index); 2496 msr = find_msr_entry(to_vmx(vcpu), msr_index);
2471 if (msr) { 2497 if (msr) {
2472 data = msr->data; 2498 data = msr->data;
@@ -2479,6 +2505,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2479 return 0; 2505 return 0;
2480} 2506}
2481 2507
2508static void vmx_leave_nested(struct kvm_vcpu *vcpu);
2509
2482/* 2510/*
2483 * Writes msr value into into the appropriate "register". 2511 * Writes msr value into into the appropriate "register".
2484 * Returns 0 on success, non-0 otherwise. 2512 * Returns 0 on success, non-0 otherwise.
@@ -2533,6 +2561,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2533 case MSR_IA32_TSC_ADJUST: 2561 case MSR_IA32_TSC_ADJUST:
2534 ret = kvm_set_msr_common(vcpu, msr_info); 2562 ret = kvm_set_msr_common(vcpu, msr_info);
2535 break; 2563 break;
2564 case MSR_IA32_FEATURE_CONTROL:
2565 if (!nested_vmx_allowed(vcpu) ||
2566 (to_vmx(vcpu)->nested.msr_ia32_feature_control &
2567 FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
2568 return 1;
2569 vmx->nested.msr_ia32_feature_control = data;
2570 if (msr_info->host_initiated && data == 0)
2571 vmx_leave_nested(vcpu);
2572 break;
2573 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2574 return 1; /* they are read-only */
2536 case MSR_TSC_AUX: 2575 case MSR_TSC_AUX:
2537 if (!vmx->rdtscp_enabled) 2576 if (!vmx->rdtscp_enabled)
2538 return 1; 2577 return 1;
@@ -2541,8 +2580,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2541 return 1; 2580 return 1;
2542 /* Otherwise falls through */ 2581 /* Otherwise falls through */
2543 default: 2582 default:
2544 if (vmx_set_vmx_msr(vcpu, msr_info))
2545 break;
2546 msr = find_msr_entry(vmx, msr_index); 2583 msr = find_msr_entry(vmx, msr_index);
2547 if (msr) { 2584 if (msr) {
2548 msr->data = data; 2585 msr->data = data;
@@ -3182,14 +3219,10 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
3182 vmx_load_host_state(to_vmx(vcpu)); 3219 vmx_load_host_state(to_vmx(vcpu));
3183 vcpu->arch.efer = efer; 3220 vcpu->arch.efer = efer;
3184 if (efer & EFER_LMA) { 3221 if (efer & EFER_LMA) {
3185 vmcs_write32(VM_ENTRY_CONTROLS, 3222 vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3186 vmcs_read32(VM_ENTRY_CONTROLS) |
3187 VM_ENTRY_IA32E_MODE);
3188 msr->data = efer; 3223 msr->data = efer;
3189 } else { 3224 } else {
3190 vmcs_write32(VM_ENTRY_CONTROLS, 3225 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3191 vmcs_read32(VM_ENTRY_CONTROLS) &
3192 ~VM_ENTRY_IA32E_MODE);
3193 3226
3194 msr->data = efer & ~EFER_LME; 3227 msr->data = efer & ~EFER_LME;
3195 } 3228 }
@@ -3217,9 +3250,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
3217 3250
3218static void exit_lmode(struct kvm_vcpu *vcpu) 3251static void exit_lmode(struct kvm_vcpu *vcpu)
3219{ 3252{
3220 vmcs_write32(VM_ENTRY_CONTROLS, 3253 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3221 vmcs_read32(VM_ENTRY_CONTROLS)
3222 & ~VM_ENTRY_IA32E_MODE);
3223 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); 3254 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
3224} 3255}
3225 3256
@@ -4346,10 +4377,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4346 ++vmx->nmsrs; 4377 ++vmx->nmsrs;
4347 } 4378 }
4348 4379
4349 vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); 4380
4381 vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
4350 4382
4351 /* 22.2.1, 20.8.1 */ 4383 /* 22.2.1, 20.8.1 */
4352 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); 4384 vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
4353 4385
4354 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 4386 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
4355 set_cr4_guest_host_mask(vmx); 4387 set_cr4_guest_host_mask(vmx);
@@ -4360,7 +4392,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4360static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) 4392static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4361{ 4393{
4362 struct vcpu_vmx *vmx = to_vmx(vcpu); 4394 struct vcpu_vmx *vmx = to_vmx(vcpu);
4363 u64 msr; 4395 struct msr_data apic_base_msr;
4364 4396
4365 vmx->rmode.vm86_active = 0; 4397 vmx->rmode.vm86_active = 0;
4366 4398
@@ -4368,10 +4400,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4368 4400
4369 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 4401 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
4370 kvm_set_cr8(&vmx->vcpu, 0); 4402 kvm_set_cr8(&vmx->vcpu, 0);
4371 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 4403 apic_base_msr.data = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
4372 if (kvm_vcpu_is_bsp(&vmx->vcpu)) 4404 if (kvm_vcpu_is_bsp(&vmx->vcpu))
4373 msr |= MSR_IA32_APICBASE_BSP; 4405 apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
4374 kvm_set_apic_base(&vmx->vcpu, msr); 4406 apic_base_msr.host_initiated = true;
4407 kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);
4375 4408
4376 vmx_segment_cache_clear(vmx); 4409 vmx_segment_cache_clear(vmx);
4377 4410
@@ -4588,15 +4621,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
4588static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) 4621static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4589{ 4622{
4590 if (is_guest_mode(vcpu)) { 4623 if (is_guest_mode(vcpu)) {
4591 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4592
4593 if (to_vmx(vcpu)->nested.nested_run_pending) 4624 if (to_vmx(vcpu)->nested.nested_run_pending)
4594 return 0; 4625 return 0;
4595 if (nested_exit_on_nmi(vcpu)) { 4626 if (nested_exit_on_nmi(vcpu)) {
4596 nested_vmx_vmexit(vcpu); 4627 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
4597 vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; 4628 NMI_VECTOR | INTR_TYPE_NMI_INTR |
4598 vmcs12->vm_exit_intr_info = NMI_VECTOR | 4629 INTR_INFO_VALID_MASK, 0);
4599 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK;
4600 /* 4630 /*
4601 * The NMI-triggered VM exit counts as injection: 4631 * The NMI-triggered VM exit counts as injection:
4602 * clear this one and block further NMIs. 4632 * clear this one and block further NMIs.
@@ -4618,15 +4648,11 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4618static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) 4648static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
4619{ 4649{
4620 if (is_guest_mode(vcpu)) { 4650 if (is_guest_mode(vcpu)) {
4621 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4622
4623 if (to_vmx(vcpu)->nested.nested_run_pending) 4651 if (to_vmx(vcpu)->nested.nested_run_pending)
4624 return 0; 4652 return 0;
4625 if (nested_exit_on_intr(vcpu)) { 4653 if (nested_exit_on_intr(vcpu)) {
4626 nested_vmx_vmexit(vcpu); 4654 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
4627 vmcs12->vm_exit_reason = 4655 0, 0);
4628 EXIT_REASON_EXTERNAL_INTERRUPT;
4629 vmcs12->vm_exit_intr_info = 0;
4630 /* 4656 /*
4631 * fall through to normal code, but now in L1, not L2 4657 * fall through to normal code, but now in L1, not L2
4632 */ 4658 */
@@ -4812,7 +4838,8 @@ static int handle_exception(struct kvm_vcpu *vcpu)
4812 dr6 = vmcs_readl(EXIT_QUALIFICATION); 4838 dr6 = vmcs_readl(EXIT_QUALIFICATION);
4813 if (!(vcpu->guest_debug & 4839 if (!(vcpu->guest_debug &
4814 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { 4840 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
4815 vcpu->arch.dr6 = dr6 | DR6_FIXED_1; 4841 vcpu->arch.dr6 &= ~15;
4842 vcpu->arch.dr6 |= dr6;
4816 kvm_queue_exception(vcpu, DB_VECTOR); 4843 kvm_queue_exception(vcpu, DB_VECTOR);
4817 return 1; 4844 return 1;
4818 } 4845 }
@@ -5080,14 +5107,27 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5080 reg = DEBUG_REG_ACCESS_REG(exit_qualification); 5107 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
5081 if (exit_qualification & TYPE_MOV_FROM_DR) { 5108 if (exit_qualification & TYPE_MOV_FROM_DR) {
5082 unsigned long val; 5109 unsigned long val;
5083 if (!kvm_get_dr(vcpu, dr, &val)) 5110
5084 kvm_register_write(vcpu, reg, val); 5111 if (kvm_get_dr(vcpu, dr, &val))
5112 return 1;
5113 kvm_register_write(vcpu, reg, val);
5085 } else 5114 } else
5086 kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); 5115 if (kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]))
5116 return 1;
5117
5087 skip_emulated_instruction(vcpu); 5118 skip_emulated_instruction(vcpu);
5088 return 1; 5119 return 1;
5089} 5120}
5090 5121
5122static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
5123{
5124 return vcpu->arch.dr6;
5125}
5126
5127static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
5128{
5129}
5130
5091static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) 5131static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
5092{ 5132{
5093 vmcs_writel(GUEST_DR7, val); 5133 vmcs_writel(GUEST_DR7, val);
@@ -6460,11 +6500,8 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
6460 int size; 6500 int size;
6461 u8 b; 6501 u8 b;
6462 6502
6463 if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING))
6464 return 1;
6465
6466 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) 6503 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
6467 return 0; 6504 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
6468 6505
6469 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 6506 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6470 6507
@@ -6628,6 +6665,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
6628 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 6665 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6629 u32 exit_reason = vmx->exit_reason; 6666 u32 exit_reason = vmx->exit_reason;
6630 6667
6668 trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
6669 vmcs_readl(EXIT_QUALIFICATION),
6670 vmx->idt_vectoring_info,
6671 intr_info,
6672 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
6673 KVM_ISA_VMX);
6674
6631 if (vmx->nested.nested_run_pending) 6675 if (vmx->nested.nested_run_pending)
6632 return 0; 6676 return 0;
6633 6677
@@ -6777,7 +6821,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
6777 return handle_invalid_guest_state(vcpu); 6821 return handle_invalid_guest_state(vcpu);
6778 6822
6779 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { 6823 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
6780 nested_vmx_vmexit(vcpu); 6824 nested_vmx_vmexit(vcpu, exit_reason,
6825 vmcs_read32(VM_EXIT_INTR_INFO),
6826 vmcs_readl(EXIT_QUALIFICATION));
6781 return 1; 6827 return 1;
6782 } 6828 }
6783 6829
@@ -7332,8 +7378,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
7332 struct vcpu_vmx *vmx = to_vmx(vcpu); 7378 struct vcpu_vmx *vmx = to_vmx(vcpu);
7333 7379
7334 free_vpid(vmx); 7380 free_vpid(vmx);
7335 free_nested(vmx);
7336 free_loaded_vmcs(vmx->loaded_vmcs); 7381 free_loaded_vmcs(vmx->loaded_vmcs);
7382 free_nested(vmx);
7337 kfree(vmx->guest_msrs); 7383 kfree(vmx->guest_msrs);
7338 kvm_vcpu_uninit(vcpu); 7384 kvm_vcpu_uninit(vcpu);
7339 kmem_cache_free(kvm_vcpu_cache, vmx); 7385 kmem_cache_free(kvm_vcpu_cache, vmx);
@@ -7518,15 +7564,14 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
7518static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, 7564static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
7519 struct x86_exception *fault) 7565 struct x86_exception *fault)
7520{ 7566{
7521 struct vmcs12 *vmcs12; 7567 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7522 nested_vmx_vmexit(vcpu); 7568 u32 exit_reason;
7523 vmcs12 = get_vmcs12(vcpu);
7524 7569
7525 if (fault->error_code & PFERR_RSVD_MASK) 7570 if (fault->error_code & PFERR_RSVD_MASK)
7526 vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; 7571 exit_reason = EXIT_REASON_EPT_MISCONFIG;
7527 else 7572 else
7528 vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; 7573 exit_reason = EXIT_REASON_EPT_VIOLATION;
7529 vmcs12->exit_qualification = vcpu->arch.exit_qualification; 7574 nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification);
7530 vmcs12->guest_physical_address = fault->address; 7575 vmcs12->guest_physical_address = fault->address;
7531} 7576}
7532 7577
@@ -7564,7 +7609,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
7564 7609
7565 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ 7610 /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
7566 if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) 7611 if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
7567 nested_vmx_vmexit(vcpu); 7612 nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
7613 vmcs_read32(VM_EXIT_INTR_INFO),
7614 vmcs_readl(EXIT_QUALIFICATION));
7568 else 7615 else
7569 kvm_inject_page_fault(vcpu, fault); 7616 kvm_inject_page_fault(vcpu, fault);
7570} 7617}
@@ -7706,6 +7753,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7706 else 7753 else
7707 vmcs_write64(APIC_ACCESS_ADDR, 7754 vmcs_write64(APIC_ACCESS_ADDR,
7708 page_to_phys(vmx->nested.apic_access_page)); 7755 page_to_phys(vmx->nested.apic_access_page));
7756 } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
7757 exec_control |=
7758 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
7759 vmcs_write64(APIC_ACCESS_ADDR,
7760 page_to_phys(vcpu->kvm->arch.apic_access_page));
7709 } 7761 }
7710 7762
7711 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 7763 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
@@ -7759,12 +7811,12 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7759 exit_control = vmcs_config.vmexit_ctrl; 7811 exit_control = vmcs_config.vmexit_ctrl;
7760 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) 7812 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
7761 exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; 7813 exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
7762 vmcs_write32(VM_EXIT_CONTROLS, exit_control); 7814 vm_exit_controls_init(vmx, exit_control);
7763 7815
7764 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are 7816 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
7765 * emulated by vmx_set_efer(), below. 7817 * emulated by vmx_set_efer(), below.
7766 */ 7818 */
7767 vmcs_write32(VM_ENTRY_CONTROLS, 7819 vm_entry_controls_init(vmx,
7768 (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & 7820 (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
7769 ~VM_ENTRY_IA32E_MODE) | 7821 ~VM_ENTRY_IA32E_MODE) |
7770 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); 7822 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
@@ -7882,7 +7934,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7882 return 1; 7934 return 1;
7883 } 7935 }
7884 7936
7885 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) { 7937 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
7938 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) {
7886 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 7939 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
7887 return 1; 7940 return 1;
7888 } 7941 }
@@ -7994,8 +8047,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
7994 8047
7995 enter_guest_mode(vcpu); 8048 enter_guest_mode(vcpu);
7996 8049
7997 vmx->nested.nested_run_pending = 1;
7998
7999 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); 8050 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
8000 8051
8001 cpu = get_cpu(); 8052 cpu = get_cpu();
@@ -8011,6 +8062,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
8011 8062
8012 prepare_vmcs02(vcpu, vmcs12); 8063 prepare_vmcs02(vcpu, vmcs12);
8013 8064
8065 if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
8066 return kvm_emulate_halt(vcpu);
8067
8068 vmx->nested.nested_run_pending = 1;
8069
8014 /* 8070 /*
8015 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point 8071 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
8016 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet 8072 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
@@ -8110,7 +8166,9 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
8110 * exit-information fields only. Other fields are modified by L1 with VMWRITE, 8166 * exit-information fields only. Other fields are modified by L1 with VMWRITE,
8111 * which already writes to vmcs12 directly. 8167 * which already writes to vmcs12 directly.
8112 */ 8168 */
8113static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) 8169static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
8170 u32 exit_reason, u32 exit_intr_info,
8171 unsigned long exit_qualification)
8114{ 8172{
8115 /* update guest state fields: */ 8173 /* update guest state fields: */
8116 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); 8174 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
@@ -8162,6 +8220,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8162 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 8220 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
8163 vmcs12->guest_pending_dbg_exceptions = 8221 vmcs12->guest_pending_dbg_exceptions =
8164 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); 8222 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
8223 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
8224 vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
8225 else
8226 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
8165 8227
8166 if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && 8228 if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
8167 (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) 8229 (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
@@ -8186,7 +8248,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8186 8248
8187 vmcs12->vm_entry_controls = 8249 vmcs12->vm_entry_controls =
8188 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | 8250 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
8189 (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); 8251 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
8190 8252
8191 /* TODO: These cannot have changed unless we have MSR bitmaps and 8253 /* TODO: These cannot have changed unless we have MSR bitmaps and
8192 * the relevant bit asks not to trap the change */ 8254 * the relevant bit asks not to trap the change */
@@ -8201,10 +8263,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
8201 8263
8202 /* update exit information fields: */ 8264 /* update exit information fields: */
8203 8265
8204 vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; 8266 vmcs12->vm_exit_reason = exit_reason;
8205 vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 8267 vmcs12->exit_qualification = exit_qualification;
8206 8268
8207 vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 8269 vmcs12->vm_exit_intr_info = exit_intr_info;
8208 if ((vmcs12->vm_exit_intr_info & 8270 if ((vmcs12->vm_exit_intr_info &
8209 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == 8271 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
8210 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) 8272 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK))
@@ -8370,7 +8432,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
8370 * and modify vmcs12 to make it see what it would expect to see there if 8432 * and modify vmcs12 to make it see what it would expect to see there if
8371 * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) 8433 * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
8372 */ 8434 */
8373static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) 8435static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8436 u32 exit_intr_info,
8437 unsigned long exit_qualification)
8374{ 8438{
8375 struct vcpu_vmx *vmx = to_vmx(vcpu); 8439 struct vcpu_vmx *vmx = to_vmx(vcpu);
8376 int cpu; 8440 int cpu;
@@ -8380,7 +8444,15 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
8380 WARN_ON_ONCE(vmx->nested.nested_run_pending); 8444 WARN_ON_ONCE(vmx->nested.nested_run_pending);
8381 8445
8382 leave_guest_mode(vcpu); 8446 leave_guest_mode(vcpu);
8383 prepare_vmcs12(vcpu, vmcs12); 8447 prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
8448 exit_qualification);
8449
8450 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
8451 vmcs12->exit_qualification,
8452 vmcs12->idt_vectoring_info_field,
8453 vmcs12->vm_exit_intr_info,
8454 vmcs12->vm_exit_intr_error_code,
8455 KVM_ISA_VMX);
8384 8456
8385 cpu = get_cpu(); 8457 cpu = get_cpu();
8386 vmx->loaded_vmcs = &vmx->vmcs01; 8458 vmx->loaded_vmcs = &vmx->vmcs01;
@@ -8389,6 +8461,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
8389 vcpu->cpu = cpu; 8461 vcpu->cpu = cpu;
8390 put_cpu(); 8462 put_cpu();
8391 8463
8464 vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS));
8465 vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
8392 vmx_segment_cache_clear(vmx); 8466 vmx_segment_cache_clear(vmx);
8393 8467
8394 /* if no vmcs02 cache requested, remove the one we used */ 8468 /* if no vmcs02 cache requested, remove the one we used */
@@ -8424,6 +8498,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
8424} 8498}
8425 8499
8426/* 8500/*
8501 * Forcibly leave nested mode in order to be able to reset the VCPU later on.
8502 */
8503static void vmx_leave_nested(struct kvm_vcpu *vcpu)
8504{
8505 if (is_guest_mode(vcpu))
8506 nested_vmx_vmexit(vcpu, -1, 0, 0);
8507 free_nested(to_vmx(vcpu));
8508}
8509
8510/*
8427 * L1's failure to enter L2 is a subset of a normal exit, as explained in 8511 * L1's failure to enter L2 is a subset of a normal exit, as explained in
8428 * 23.7 "VM-entry failures during or after loading guest state" (this also 8512 * 23.7 "VM-entry failures during or after loading guest state" (this also
8429 * lists the acceptable exit-reason and exit-qualification parameters). 8513 * lists the acceptable exit-reason and exit-qualification parameters).
@@ -8486,6 +8570,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
8486 .set_idt = vmx_set_idt, 8570 .set_idt = vmx_set_idt,
8487 .get_gdt = vmx_get_gdt, 8571 .get_gdt = vmx_get_gdt,
8488 .set_gdt = vmx_set_gdt, 8572 .set_gdt = vmx_set_gdt,
8573 .get_dr6 = vmx_get_dr6,
8574 .set_dr6 = vmx_set_dr6,
8489 .set_dr7 = vmx_set_dr7, 8575 .set_dr7 = vmx_set_dr7,
8490 .cache_reg = vmx_cache_reg, 8576 .cache_reg = vmx_cache_reg,
8491 .get_rflags = vmx_get_rflags, 8577 .get_rflags = vmx_get_rflags,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5d004da1e35d..39c28f09dfd5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
94static bool ignore_msrs = 0; 94static bool ignore_msrs = 0;
95module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); 95module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
96 96
97unsigned int min_timer_period_us = 500;
98module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
99
97bool kvm_has_tsc_control; 100bool kvm_has_tsc_control;
98EXPORT_SYMBOL_GPL(kvm_has_tsc_control); 101EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
99u32 kvm_max_guest_tsc_khz; 102u32 kvm_max_guest_tsc_khz;
@@ -254,10 +257,26 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
254} 257}
255EXPORT_SYMBOL_GPL(kvm_get_apic_base); 258EXPORT_SYMBOL_GPL(kvm_get_apic_base);
256 259
257void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) 260int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
258{ 261{
259 /* TODO: reserve bits check */ 262 u64 old_state = vcpu->arch.apic_base &
260 kvm_lapic_set_base(vcpu, data); 263 (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
264 u64 new_state = msr_info->data &
265 (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
266 u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) |
267 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE);
268
269 if (!msr_info->host_initiated &&
270 ((msr_info->data & reserved_bits) != 0 ||
271 new_state == X2APIC_ENABLE ||
272 (new_state == MSR_IA32_APICBASE_ENABLE &&
273 old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
274 (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
275 old_state == 0)))
276 return 1;
277
278 kvm_lapic_set_base(vcpu, msr_info->data);
279 return 0;
261} 280}
262EXPORT_SYMBOL_GPL(kvm_set_apic_base); 281EXPORT_SYMBOL_GPL(kvm_set_apic_base);
263 282
@@ -719,6 +738,12 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
719} 738}
720EXPORT_SYMBOL_GPL(kvm_get_cr8); 739EXPORT_SYMBOL_GPL(kvm_get_cr8);
721 740
741static void kvm_update_dr6(struct kvm_vcpu *vcpu)
742{
743 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
744 kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
745}
746
722static void kvm_update_dr7(struct kvm_vcpu *vcpu) 747static void kvm_update_dr7(struct kvm_vcpu *vcpu)
723{ 748{
724 unsigned long dr7; 749 unsigned long dr7;
@@ -747,6 +772,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
747 if (val & 0xffffffff00000000ULL) 772 if (val & 0xffffffff00000000ULL)
748 return -1; /* #GP */ 773 return -1; /* #GP */
749 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 774 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
775 kvm_update_dr6(vcpu);
750 break; 776 break;
751 case 5: 777 case 5:
752 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 778 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -788,7 +814,10 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
788 return 1; 814 return 1;
789 /* fall through */ 815 /* fall through */
790 case 6: 816 case 6:
791 *val = vcpu->arch.dr6; 817 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
818 *val = vcpu->arch.dr6;
819 else
820 *val = kvm_x86_ops->get_dr6(vcpu);
792 break; 821 break;
793 case 5: 822 case 5:
794 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 823 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -836,11 +865,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
836 * kvm-specific. Those are put in the beginning of the list. 865 * kvm-specific. Those are put in the beginning of the list.
837 */ 866 */
838 867
839#define KVM_SAVE_MSRS_BEGIN 10 868#define KVM_SAVE_MSRS_BEGIN 12
840static u32 msrs_to_save[] = { 869static u32 msrs_to_save[] = {
841 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 870 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
842 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, 871 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
843 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 872 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
873 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
844 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, 874 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
845 MSR_KVM_PV_EOI_EN, 875 MSR_KVM_PV_EOI_EN,
846 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 876 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
@@ -1275,8 +1305,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1275 kvm->arch.last_tsc_write = data; 1305 kvm->arch.last_tsc_write = data;
1276 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; 1306 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1277 1307
1278 /* Reset of TSC must disable overshoot protection below */
1279 vcpu->arch.hv_clock.tsc_timestamp = 0;
1280 vcpu->arch.last_guest_tsc = data; 1308 vcpu->arch.last_guest_tsc = data;
1281 1309
1282 /* Keep track of which generation this VCPU has synchronized to */ 1310 /* Keep track of which generation this VCPU has synchronized to */
@@ -1484,7 +1512,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1484 unsigned long flags, this_tsc_khz; 1512 unsigned long flags, this_tsc_khz;
1485 struct kvm_vcpu_arch *vcpu = &v->arch; 1513 struct kvm_vcpu_arch *vcpu = &v->arch;
1486 struct kvm_arch *ka = &v->kvm->arch; 1514 struct kvm_arch *ka = &v->kvm->arch;
1487 s64 kernel_ns, max_kernel_ns; 1515 s64 kernel_ns;
1488 u64 tsc_timestamp, host_tsc; 1516 u64 tsc_timestamp, host_tsc;
1489 struct pvclock_vcpu_time_info guest_hv_clock; 1517 struct pvclock_vcpu_time_info guest_hv_clock;
1490 u8 pvclock_flags; 1518 u8 pvclock_flags;
@@ -1543,37 +1571,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1543 if (!vcpu->pv_time_enabled) 1571 if (!vcpu->pv_time_enabled)
1544 return 0; 1572 return 0;
1545 1573
1546 /*
1547 * Time as measured by the TSC may go backwards when resetting the base
1548 * tsc_timestamp. The reason for this is that the TSC resolution is
1549 * higher than the resolution of the other clock scales. Thus, many
1550 * possible measurments of the TSC correspond to one measurement of any
1551 * other clock, and so a spread of values is possible. This is not a
1552 * problem for the computation of the nanosecond clock; with TSC rates
1553 * around 1GHZ, there can only be a few cycles which correspond to one
1554 * nanosecond value, and any path through this code will inevitably
1555 * take longer than that. However, with the kernel_ns value itself,
1556 * the precision may be much lower, down to HZ granularity. If the
1557 * first sampling of TSC against kernel_ns ends in the low part of the
1558 * range, and the second in the high end of the range, we can get:
1559 *
1560 * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new
1561 *
1562 * As the sampling errors potentially range in the thousands of cycles,
1563 * it is possible such a time value has already been observed by the
1564 * guest. To protect against this, we must compute the system time as
1565 * observed by the guest and ensure the new system time is greater.
1566 */
1567 max_kernel_ns = 0;
1568 if (vcpu->hv_clock.tsc_timestamp) {
1569 max_kernel_ns = vcpu->last_guest_tsc -
1570 vcpu->hv_clock.tsc_timestamp;
1571 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
1572 vcpu->hv_clock.tsc_to_system_mul,
1573 vcpu->hv_clock.tsc_shift);
1574 max_kernel_ns += vcpu->last_kernel_ns;
1575 }
1576
1577 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { 1574 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1578 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, 1575 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1579 &vcpu->hv_clock.tsc_shift, 1576 &vcpu->hv_clock.tsc_shift,
@@ -1581,14 +1578,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1581 vcpu->hw_tsc_khz = this_tsc_khz; 1578 vcpu->hw_tsc_khz = this_tsc_khz;
1582 } 1579 }
1583 1580
1584 /* with a master <monotonic time, tsc value> tuple,
1585 * pvclock clock reads always increase at the (scaled) rate
1586 * of guest TSC - no need to deal with sampling errors.
1587 */
1588 if (!use_master_clock) {
1589 if (max_kernel_ns > kernel_ns)
1590 kernel_ns = max_kernel_ns;
1591 }
1592 /* With all the info we got, fill in the values */ 1581 /* With all the info we got, fill in the values */
1593 vcpu->hv_clock.tsc_timestamp = tsc_timestamp; 1582 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1594 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; 1583 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
@@ -1826,6 +1815,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
1826 switch (msr) { 1815 switch (msr) {
1827 case HV_X64_MSR_GUEST_OS_ID: 1816 case HV_X64_MSR_GUEST_OS_ID:
1828 case HV_X64_MSR_HYPERCALL: 1817 case HV_X64_MSR_HYPERCALL:
1818 case HV_X64_MSR_REFERENCE_TSC:
1819 case HV_X64_MSR_TIME_REF_COUNT:
1829 r = true; 1820 r = true;
1830 break; 1821 break;
1831 } 1822 }
@@ -1865,6 +1856,21 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1865 if (__copy_to_user((void __user *)addr, instructions, 4)) 1856 if (__copy_to_user((void __user *)addr, instructions, 4))
1866 return 1; 1857 return 1;
1867 kvm->arch.hv_hypercall = data; 1858 kvm->arch.hv_hypercall = data;
1859 mark_page_dirty(kvm, gfn);
1860 break;
1861 }
1862 case HV_X64_MSR_REFERENCE_TSC: {
1863 u64 gfn;
1864 HV_REFERENCE_TSC_PAGE tsc_ref;
1865 memset(&tsc_ref, 0, sizeof(tsc_ref));
1866 kvm->arch.hv_tsc_page = data;
1867 if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
1868 break;
1869 gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
1870 if (kvm_write_guest(kvm, data,
1871 &tsc_ref, sizeof(tsc_ref)))
1872 return 1;
1873 mark_page_dirty(kvm, gfn);
1868 break; 1874 break;
1869 } 1875 }
1870 default: 1876 default:
@@ -1879,19 +1885,21 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1879{ 1885{
1880 switch (msr) { 1886 switch (msr) {
1881 case HV_X64_MSR_APIC_ASSIST_PAGE: { 1887 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1888 u64 gfn;
1882 unsigned long addr; 1889 unsigned long addr;
1883 1890
1884 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { 1891 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1885 vcpu->arch.hv_vapic = data; 1892 vcpu->arch.hv_vapic = data;
1886 break; 1893 break;
1887 } 1894 }
1888 addr = gfn_to_hva(vcpu->kvm, data >> 1895 gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
1889 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT); 1896 addr = gfn_to_hva(vcpu->kvm, gfn);
1890 if (kvm_is_error_hva(addr)) 1897 if (kvm_is_error_hva(addr))
1891 return 1; 1898 return 1;
1892 if (__clear_user((void __user *)addr, PAGE_SIZE)) 1899 if (__clear_user((void __user *)addr, PAGE_SIZE))
1893 return 1; 1900 return 1;
1894 vcpu->arch.hv_vapic = data; 1901 vcpu->arch.hv_vapic = data;
1902 mark_page_dirty(vcpu->kvm, gfn);
1895 break; 1903 break;
1896 } 1904 }
1897 case HV_X64_MSR_EOI: 1905 case HV_X64_MSR_EOI:
@@ -2017,8 +2025,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2017 case 0x200 ... 0x2ff: 2025 case 0x200 ... 0x2ff:
2018 return set_msr_mtrr(vcpu, msr, data); 2026 return set_msr_mtrr(vcpu, msr, data);
2019 case MSR_IA32_APICBASE: 2027 case MSR_IA32_APICBASE:
2020 kvm_set_apic_base(vcpu, data); 2028 return kvm_set_apic_base(vcpu, msr_info);
2021 break;
2022 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: 2029 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2023 return kvm_x2apic_msr_write(vcpu, msr, data); 2030 return kvm_x2apic_msr_write(vcpu, msr, data);
2024 case MSR_IA32_TSCDEADLINE: 2031 case MSR_IA32_TSCDEADLINE:
@@ -2291,6 +2298,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2291 case HV_X64_MSR_HYPERCALL: 2298 case HV_X64_MSR_HYPERCALL:
2292 data = kvm->arch.hv_hypercall; 2299 data = kvm->arch.hv_hypercall;
2293 break; 2300 break;
2301 case HV_X64_MSR_TIME_REF_COUNT: {
2302 data =
2303 div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
2304 break;
2305 }
2306 case HV_X64_MSR_REFERENCE_TSC:
2307 data = kvm->arch.hv_tsc_page;
2308 break;
2294 default: 2309 default:
2295 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 2310 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2296 return 1; 2311 return 1;
@@ -2601,6 +2616,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2601 case KVM_CAP_GET_TSC_KHZ: 2616 case KVM_CAP_GET_TSC_KHZ:
2602 case KVM_CAP_KVMCLOCK_CTRL: 2617 case KVM_CAP_KVMCLOCK_CTRL:
2603 case KVM_CAP_READONLY_MEM: 2618 case KVM_CAP_READONLY_MEM:
2619 case KVM_CAP_HYPERV_TIME:
2604#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT 2620#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2605 case KVM_CAP_ASSIGN_DEV_IRQ: 2621 case KVM_CAP_ASSIGN_DEV_IRQ:
2606 case KVM_CAP_PCI_2_3: 2622 case KVM_CAP_PCI_2_3:
@@ -2972,8 +2988,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2972static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, 2988static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2973 struct kvm_debugregs *dbgregs) 2989 struct kvm_debugregs *dbgregs)
2974{ 2990{
2991 unsigned long val;
2992
2975 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); 2993 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2976 dbgregs->dr6 = vcpu->arch.dr6; 2994 _kvm_get_dr(vcpu, 6, &val);
2995 dbgregs->dr6 = val;
2977 dbgregs->dr7 = vcpu->arch.dr7; 2996 dbgregs->dr7 = vcpu->arch.dr7;
2978 dbgregs->flags = 0; 2997 dbgregs->flags = 0;
2979 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); 2998 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
@@ -2987,7 +3006,9 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2987 3006
2988 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); 3007 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2989 vcpu->arch.dr6 = dbgregs->dr6; 3008 vcpu->arch.dr6 = dbgregs->dr6;
3009 kvm_update_dr6(vcpu);
2990 vcpu->arch.dr7 = dbgregs->dr7; 3010 vcpu->arch.dr7 = dbgregs->dr7;
3011 kvm_update_dr7(vcpu);
2991 3012
2992 return 0; 3013 return 0;
2993} 3014}
@@ -5834,6 +5855,11 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
5834 kvm_apic_update_tmr(vcpu, tmr); 5855 kvm_apic_update_tmr(vcpu, tmr);
5835} 5856}
5836 5857
5858/*
5859 * Returns 1 to let __vcpu_run() continue the guest execution loop without
5860 * exiting to the userspace. Otherwise, the value will be returned to the
5861 * userspace.
5862 */
5837static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5863static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5838{ 5864{
5839 int r; 5865 int r;
@@ -6089,7 +6115,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
6089 } 6115 }
6090 if (need_resched()) { 6116 if (need_resched()) {
6091 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 6117 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6092 kvm_resched(vcpu); 6118 cond_resched();
6093 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 6119 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6094 } 6120 }
6095 } 6121 }
@@ -6401,6 +6427,7 @@ EXPORT_SYMBOL_GPL(kvm_task_switch);
6401int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 6427int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
6402 struct kvm_sregs *sregs) 6428 struct kvm_sregs *sregs)
6403{ 6429{
6430 struct msr_data apic_base_msr;
6404 int mmu_reset_needed = 0; 6431 int mmu_reset_needed = 0;
6405 int pending_vec, max_bits, idx; 6432 int pending_vec, max_bits, idx;
6406 struct desc_ptr dt; 6433 struct desc_ptr dt;
@@ -6424,7 +6451,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
6424 6451
6425 mmu_reset_needed |= vcpu->arch.efer != sregs->efer; 6452 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
6426 kvm_x86_ops->set_efer(vcpu, sregs->efer); 6453 kvm_x86_ops->set_efer(vcpu, sregs->efer);
6427 kvm_set_apic_base(vcpu, sregs->apic_base); 6454 apic_base_msr.data = sregs->apic_base;
6455 apic_base_msr.host_initiated = true;
6456 kvm_set_apic_base(vcpu, &apic_base_msr);
6428 6457
6429 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; 6458 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
6430 kvm_x86_ops->set_cr0(vcpu, sregs->cr0); 6459 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
@@ -6717,6 +6746,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
6717 6746
6718 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); 6747 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
6719 vcpu->arch.dr6 = DR6_FIXED_1; 6748 vcpu->arch.dr6 = DR6_FIXED_1;
6749 kvm_update_dr6(vcpu);
6720 vcpu->arch.dr7 = DR7_FIXED_1; 6750 vcpu->arch.dr7 = DR7_FIXED_1;
6721 kvm_update_dr7(vcpu); 6751 kvm_update_dr7(vcpu);
6722 6752
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 587fb9ede436..8da5823bcde6 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -125,5 +125,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) 125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
126extern u64 host_xcr0; 126extern u64 host_xcr0;
127 127
128extern unsigned int min_timer_period_us;
129
128extern struct static_key kvm_no_apic_vcpu; 130extern struct static_key kvm_no_apic_vcpu;
129#endif 131#endif
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index bdf8532494fe..ad1fb5f53925 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -233,13 +233,13 @@ static void lguest_end_context_switch(struct task_struct *next)
233 * flags word contains all kind of stuff, but in practice Linux only cares 233 * flags word contains all kind of stuff, but in practice Linux only cares
234 * about the interrupt flag. Our "save_flags()" just returns that. 234 * about the interrupt flag. Our "save_flags()" just returns that.
235 */ 235 */
236static unsigned long save_fl(void) 236asmlinkage unsigned long lguest_save_fl(void)
237{ 237{
238 return lguest_data.irq_enabled; 238 return lguest_data.irq_enabled;
239} 239}
240 240
241/* Interrupts go off... */ 241/* Interrupts go off... */
242static void irq_disable(void) 242asmlinkage void lguest_irq_disable(void)
243{ 243{
244 lguest_data.irq_enabled = 0; 244 lguest_data.irq_enabled = 0;
245} 245}
@@ -253,8 +253,8 @@ static void irq_disable(void)
253 * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the 253 * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the
254 * C function, then restores it. 254 * C function, then restores it.
255 */ 255 */
256PV_CALLEE_SAVE_REGS_THUNK(save_fl); 256PV_CALLEE_SAVE_REGS_THUNK(lguest_save_fl);
257PV_CALLEE_SAVE_REGS_THUNK(irq_disable); 257PV_CALLEE_SAVE_REGS_THUNK(lguest_irq_disable);
258/*:*/ 258/*:*/
259 259
260/* These are in i386_head.S */ 260/* These are in i386_head.S */
@@ -1291,9 +1291,9 @@ __init void lguest_init(void)
1291 */ 1291 */
1292 1292
1293 /* Interrupt-related operations */ 1293 /* Interrupt-related operations */
1294 pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); 1294 pv_irq_ops.save_fl = PV_CALLEE_SAVE(lguest_save_fl);
1295 pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); 1295 pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl);
1296 pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); 1296 pv_irq_ops.irq_disable = PV_CALLEE_SAVE(lguest_irq_disable);
1297 pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable); 1297 pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable);
1298 pv_irq_ops.safe_halt = lguest_safe_halt; 1298 pv_irq_ops.safe_halt = lguest_safe_halt;
1299 1299
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 992d63bb154f..eabcb6e6a900 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -24,7 +24,7 @@ lib-$(CONFIG_SMP) += rwlock.o
24lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o 24lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
25lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o 25lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
26 26
27obj-y += msr.o msr-reg.o msr-reg-export.o 27obj-y += msr.o msr-reg.o msr-reg-export.o hash.o
28 28
29ifeq ($(CONFIG_X86_32),y) 29ifeq ($(CONFIG_X86_32),y)
30 obj-y += atomic64_32.o 30 obj-y += atomic64_32.o
diff --git a/arch/x86/lib/hash.c b/arch/x86/lib/hash.c
new file mode 100644
index 000000000000..3056702e81fb
--- /dev/null
+++ b/arch/x86/lib/hash.c
@@ -0,0 +1,88 @@
1/*
2 * Some portions derived from code covered by the following notice:
3 *
4 * Copyright (c) 2010-2013 Intel Corporation. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <linux/hash.h>
35
36#include <asm/processor.h>
37#include <asm/cpufeature.h>
38#include <asm/hash.h>
39
40static inline u32 crc32_u32(u32 crc, u32 val)
41{
42 asm ("crc32l %1,%0\n" : "+r" (crc) : "rm" (val));
43 return crc;
44}
45
46static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed)
47{
48 const u32 *p32 = (const u32 *) data;
49 u32 i, tmp = 0;
50
51 for (i = 0; i < len / 4; i++)
52 seed = crc32_u32(*p32++, seed);
53
54 switch (3 - (len & 0x03)) {
55 case 0:
56 tmp |= *((const u8 *) p32 + 2) << 16;
57 /* fallthrough */
58 case 1:
59 tmp |= *((const u8 *) p32 + 1) << 8;
60 /* fallthrough */
61 case 2:
62 tmp |= *((const u8 *) p32);
63 seed = crc32_u32(tmp, seed);
64 default:
65 break;
66 }
67
68 return seed;
69}
70
71static u32 intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed)
72{
73 const u32 *p32 = (const u32 *) data;
74 u32 i;
75
76 for (i = 0; i < len; i++)
77 seed = crc32_u32(*p32++, seed);
78
79 return seed;
80}
81
82void setup_arch_fast_hash(struct fast_hash_ops *ops)
83{
84 if (cpu_has_xmm4_2) {
85 ops->hash = intel_crc4_2_hash;
86 ops->hash2 = intel_crc4_2_hash2;
87 }
88}
diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c
index 59d353d2c599..a5449089cd9f 100644
--- a/arch/x86/math-emu/errors.c
+++ b/arch/x86/math-emu/errors.c
@@ -330,11 +330,6 @@ asmlinkage void FPU_exception(int n)
330 330
331 RE_ENTRANT_CHECK_OFF; 331 RE_ENTRANT_CHECK_OFF;
332 if ((~control_word & n & CW_Exceptions) || (n == EX_INTERNAL)) { 332 if ((~control_word & n & CW_Exceptions) || (n == EX_INTERNAL)) {
333#ifdef PRINT_MESSAGES
334 /* My message from the sponsor */
335 printk(FPU_VERSION " " __DATE__ " (C) W. Metzenthen.\n");
336#endif /* PRINT_MESSAGES */
337
338 /* Get a name string for error reporting */ 333 /* Get a name string for error reporting */
339 for (i = 0; exception_names[i].type; i++) 334 for (i = 0; exception_names[i].type; i++)
340 if ((exception_names[i].type & n) == 335 if ((exception_names[i].type & n) ==
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 0596e8e0cc19..207d9aef662d 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -108,8 +108,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
108 108
109static inline void get_head_page_multiple(struct page *page, int nr) 109static inline void get_head_page_multiple(struct page *page, int nr)
110{ 110{
111 VM_BUG_ON(page != compound_head(page)); 111 VM_BUG_ON_PAGE(page != compound_head(page), page);
112 VM_BUG_ON(page_count(page) == 0); 112 VM_BUG_ON_PAGE(page_count(page) == 0, page);
113 atomic_add(nr, &page->_count); 113 atomic_add(nr, &page->_count);
114 SetPageReferenced(page); 114 SetPageReferenced(page);
115} 115}
@@ -135,7 +135,7 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
135 head = pte_page(pte); 135 head = pte_page(pte);
136 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); 136 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
137 do { 137 do {
138 VM_BUG_ON(compound_head(page) != head); 138 VM_BUG_ON_PAGE(compound_head(page) != head, page);
139 pages[*nr] = page; 139 pages[*nr] = page;
140 if (PageTail(page)) 140 if (PageTail(page))
141 get_huge_page_tail(page); 141 get_huge_page_tail(page);
@@ -212,7 +212,7 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
212 head = pte_page(pte); 212 head = pte_page(pte);
213 page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); 213 page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
214 do { 214 do {
215 VM_BUG_ON(compound_head(page) != head); 215 VM_BUG_ON_PAGE(compound_head(page) != head, page);
216 pages[*nr] = page; 216 pages[*nr] = page;
217 if (PageTail(page)) 217 if (PageTail(page))
218 get_huge_page_tail(page); 218 get_huge_page_tail(page);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 5bdc5430597c..e39504878aec 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -665,7 +665,7 @@ void __init initmem_init(void)
665 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; 665 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
666#endif 666#endif
667 667
668 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); 668 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
669 sparse_memory_present_with_active_regions(0); 669 sparse_memory_present_with_active_regions(0);
670 670
671#ifdef CONFIG_FLATMEM 671#ifdef CONFIG_FLATMEM
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 104d56a9245f..f35c66c5959a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -643,7 +643,7 @@ kernel_physical_mapping_init(unsigned long start,
643#ifndef CONFIG_NUMA 643#ifndef CONFIG_NUMA
644void __init initmem_init(void) 644void __init initmem_init(void)
645{ 645{
646 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); 646 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
647} 647}
648#endif 648#endif
649 649
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 8dabbed409ee..1e9da795767a 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -74,7 +74,7 @@ static void __init do_one_pass(u64 pattern, u64 start, u64 end)
74 u64 i; 74 u64 i;
75 phys_addr_t this_start, this_end; 75 phys_addr_t this_start, this_end;
76 76
77 for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) { 77 for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) {
78 this_start = clamp_t(phys_addr_t, this_start, start, end); 78 this_start = clamp_t(phys_addr_t, this_start, start, end);
79 this_end = clamp_t(phys_addr_t, this_end, start, end); 79 this_end = clamp_t(phys_addr_t, this_end, start, end);
80 if (this_start < this_end) { 80 if (this_start < this_end) {
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index c85da7bb6b60..81b2750f3666 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -491,7 +491,16 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
491 491
492 for (i = 0; i < mi->nr_blks; i++) { 492 for (i = 0; i < mi->nr_blks; i++) {
493 struct numa_memblk *mb = &mi->blk[i]; 493 struct numa_memblk *mb = &mi->blk[i];
494 memblock_set_node(mb->start, mb->end - mb->start, mb->nid); 494 memblock_set_node(mb->start, mb->end - mb->start,
495 &memblock.memory, mb->nid);
496
497 /*
498 * At this time, all memory regions reserved by memblock are
499 * used by the kernel. Set the nid in memblock.reserved will
500 * mark out all the nodes the kernel resides in.
501 */
502 memblock_set_node(mb->start, mb->end - mb->start,
503 &memblock.reserved, mb->nid);
495 } 504 }
496 505
497 /* 506 /*
@@ -553,6 +562,30 @@ static void __init numa_init_array(void)
553 } 562 }
554} 563}
555 564
565static void __init numa_clear_kernel_node_hotplug(void)
566{
567 int i, nid;
568 nodemask_t numa_kernel_nodes;
569 unsigned long start, end;
570 struct memblock_type *type = &memblock.reserved;
571
572 /* Mark all kernel nodes. */
573 for (i = 0; i < type->cnt; i++)
574 node_set(type->regions[i].nid, numa_kernel_nodes);
575
576 /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
577 for (i = 0; i < numa_meminfo.nr_blks; i++) {
578 nid = numa_meminfo.blk[i].nid;
579 if (!node_isset(nid, numa_kernel_nodes))
580 continue;
581
582 start = numa_meminfo.blk[i].start;
583 end = numa_meminfo.blk[i].end;
584
585 memblock_clear_hotplug(start, end - start);
586 }
587}
588
556static int __init numa_init(int (*init_func)(void)) 589static int __init numa_init(int (*init_func)(void))
557{ 590{
558 int i; 591 int i;
@@ -565,7 +598,12 @@ static int __init numa_init(int (*init_func)(void))
565 nodes_clear(node_possible_map); 598 nodes_clear(node_possible_map);
566 nodes_clear(node_online_map); 599 nodes_clear(node_online_map);
567 memset(&numa_meminfo, 0, sizeof(numa_meminfo)); 600 memset(&numa_meminfo, 0, sizeof(numa_meminfo));
568 WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES)); 601 WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory,
602 MAX_NUMNODES));
603 WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved,
604 MAX_NUMNODES));
605 /* In case that parsing SRAT failed. */
606 WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX));
569 numa_reset_distance(); 607 numa_reset_distance();
570 608
571 ret = init_func(); 609 ret = init_func();
@@ -601,6 +639,16 @@ static int __init numa_init(int (*init_func)(void))
601 numa_clear_node(i); 639 numa_clear_node(i);
602 } 640 }
603 numa_init_array(); 641 numa_init_array();
642
643 /*
644 * At very early time, the kernel have to use some memory such as
645 * loading the kernel image. We cannot prevent this anyway. So any
646 * node the kernel resides in should be un-hotpluggable.
647 *
648 * And when we come here, numa_init() won't fail.
649 */
650 numa_clear_kernel_node_hotplug();
651
604 return 0; 652 return 0;
605} 653}
606 654
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 5ecf65117e6f..1953e9c9391a 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -191,6 +191,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
191 (unsigned long long) start, (unsigned long long) end - 1, 191 (unsigned long long) start, (unsigned long long) end - 1,
192 hotpluggable ? " hotplug" : ""); 192 hotpluggable ? " hotplug" : "");
193 193
194 /* Mark hotplug range in memblock. */
195 if (hotpluggable && memblock_mark_hotplug(start, ma->length))
196 pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
197 (unsigned long long)start, (unsigned long long)end - 1);
198
194 return 0; 199 return 0;
195out_err_bad_srat: 200out_err_bad_srat:
196 bad_srat(); 201 bad_srat();
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 082e88129712..248642f4bab7 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -12,7 +12,6 @@
12 12
13#include <linux/pci.h> 13#include <linux/pci.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/acpi.h>
16#include <linux/sfi_acpi.h> 15#include <linux/sfi_acpi.h>
17#include <linux/bitmap.h> 16#include <linux/bitmap.h>
18#include <linux/dmi.h> 17#include <linux/dmi.h>
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index 5c90975cdf0f..43984bc1665a 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -14,7 +14,6 @@
14#include <linux/rcupdate.h> 14#include <linux/rcupdate.h>
15#include <asm/e820.h> 15#include <asm/e820.h>
16#include <asm/pci_x86.h> 16#include <asm/pci_x86.h>
17#include <acpi/acpi.h>
18 17
19/* Assume systems with more busses have correct MCFG */ 18/* Assume systems with more busses have correct MCFG */
20#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) 19#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 5eee4959785d..103e702ec5a7 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -337,7 +337,7 @@ out:
337 return ret; 337 return ret;
338} 338}
339 339
340static void xen_initdom_restore_msi_irqs(struct pci_dev *dev, int irq) 340static void xen_initdom_restore_msi_irqs(struct pci_dev *dev)
341{ 341{
342 int ret = 0; 342 int ret = 0;
343 343
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index 7145ec63c520..4df9591eadad 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -49,7 +49,8 @@ void __init efi_bgrt_init(void)
49 49
50 image = efi_lookup_mapped_addr(bgrt_tab->image_address); 50 image = efi_lookup_mapped_addr(bgrt_tab->image_address);
51 if (!image) { 51 if (!image) {
52 image = ioremap(bgrt_tab->image_address, sizeof(bmp_header)); 52 image = early_memremap(bgrt_tab->image_address,
53 sizeof(bmp_header));
53 ioremapped = true; 54 ioremapped = true;
54 if (!image) 55 if (!image)
55 return; 56 return;
@@ -57,7 +58,7 @@ void __init efi_bgrt_init(void)
57 58
58 memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); 59 memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
59 if (ioremapped) 60 if (ioremapped)
60 iounmap(image); 61 early_iounmap(image, sizeof(bmp_header));
61 bgrt_image_size = bmp_header.size; 62 bgrt_image_size = bmp_header.size;
62 63
63 bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); 64 bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL);
@@ -65,7 +66,8 @@ void __init efi_bgrt_init(void)
65 return; 66 return;
66 67
67 if (ioremapped) { 68 if (ioremapped) {
68 image = ioremap(bgrt_tab->image_address, bmp_header.size); 69 image = early_memremap(bgrt_tab->image_address,
70 bmp_header.size);
69 if (!image) { 71 if (!image) {
70 kfree(bgrt_image); 72 kfree(bgrt_image);
71 bgrt_image = NULL; 73 bgrt_image = NULL;
@@ -75,5 +77,5 @@ void __init efi_bgrt_init(void)
75 77
76 memcpy_fromio(bgrt_image, image, bgrt_image_size); 78 memcpy_fromio(bgrt_image, image, bgrt_image_size);
77 if (ioremapped) 79 if (ioremapped)
78 iounmap(image); 80 early_iounmap(image, bmp_header.size);
79} 81}
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h b/arch/x86/platform/intel-mid/device_libs/platform_ipc.h
index 8f568dd79605..79bb09d4f718 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h
+++ b/arch/x86/platform/intel-mid/device_libs/platform_ipc.h
@@ -12,6 +12,7 @@
12#ifndef _PLATFORM_IPC_H_ 12#ifndef _PLATFORM_IPC_H_
13#define _PLATFORM_IPC_H_ 13#define _PLATFORM_IPC_H_
14 14
15extern void __init ipc_device_handler(struct sfi_device_table_entry *pentry, 15void __init
16 struct devs_id *dev) __attribute__((weak)); 16ipc_device_handler(struct sfi_device_table_entry *pentry, struct devs_id *dev);
17
17#endif 18#endif
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic.h b/arch/x86/platform/intel-mid/device_libs/platform_msic.h
index 917eb56d77da..b7be1d041da2 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic.h
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic.h
@@ -14,6 +14,6 @@
14 14
15extern struct intel_msic_platform_data msic_pdata; 15extern struct intel_msic_platform_data msic_pdata;
16 16
17extern void *msic_generic_platform_data(void *info, 17void *msic_generic_platform_data(void *info, enum intel_msic_block block);
18 enum intel_msic_block block) __attribute__((weak)); 18
19#endif 19#endif
diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
index a537ffc16299..46aa25c8ce06 100644
--- a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
+++ b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
@@ -14,6 +14,6 @@
14/* For every CPU addition a new get_<cpuname>_ops interface needs 14/* For every CPU addition a new get_<cpuname>_ops interface needs
15 * to be added. 15 * to be added.
16 */ 16 */
17extern void * __cpuinit get_penwell_ops(void) __attribute__((weak)); 17extern void *get_penwell_ops(void) __attribute__((weak));
18extern void * __cpuinit get_cloverview_ops(void) __attribute__((weak)); 18extern void *get_cloverview_ops(void) __attribute__((weak));
19extern void * __init get_tangier_ops(void) __attribute__((weak)); 19extern void *get_tangier_ops(void) __attribute__((weak));
diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c
index 4f7884eebc14..23381d2174ae 100644
--- a/arch/x86/platform/intel-mid/mfld.c
+++ b/arch/x86/platform/intel-mid/mfld.c
@@ -58,18 +58,18 @@ static unsigned long __init mfld_calibrate_tsc(void)
58 return 0; 58 return 0;
59} 59}
60 60
61static void __init penwell_arch_setup() 61static void __init penwell_arch_setup(void)
62{ 62{
63 x86_platform.calibrate_tsc = mfld_calibrate_tsc; 63 x86_platform.calibrate_tsc = mfld_calibrate_tsc;
64 pm_power_off = mfld_power_off; 64 pm_power_off = mfld_power_off;
65} 65}
66 66
67void * __cpuinit get_penwell_ops() 67void *get_penwell_ops(void)
68{ 68{
69 return &penwell_ops; 69 return &penwell_ops;
70} 70}
71 71
72void * __cpuinit get_cloverview_ops() 72void *get_cloverview_ops(void)
73{ 73{
74 return &penwell_ops; 74 return &penwell_ops;
75} 75}
diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfl.c
index 09d10159e7b7..aaca91753d32 100644
--- a/arch/x86/platform/intel-mid/mrfl.c
+++ b/arch/x86/platform/intel-mid/mrfl.c
@@ -97,7 +97,7 @@ static struct intel_mid_ops tangier_ops = {
97 .arch_setup = tangier_arch_setup, 97 .arch_setup = tangier_arch_setup,
98}; 98};
99 99
100void * __cpuinit get_tangier_ops() 100void *get_tangier_ops(void)
101{ 101{
102 return &tangier_ops; 102 return &tangier_ops;
103} 103}
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 649a12befba9..08e350e757dc 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -15,8 +15,7 @@
15#include <linux/power_supply.h> 15#include <linux/power_supply.h>
16#include <linux/olpc-ec.h> 16#include <linux/olpc-ec.h>
17 17
18#include <acpi/acpi_bus.h> 18#include <linux/acpi.h>
19#include <acpi/acpi_drivers.h>
20#include <asm/olpc.h> 19#include <asm/olpc.h>
21 20
22#define DRV_NAME "olpc-xo15-sci" 21#define DRV_NAME "olpc-xo15-sci"
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 8eeccba73130..be27da60dc8f 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -74,7 +74,6 @@ static atomic_t uv_in_nmi;
74static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1); 74static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1);
75static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1); 75static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1);
76static atomic_t uv_nmi_slave_continue; 76static atomic_t uv_nmi_slave_continue;
77static atomic_t uv_nmi_kexec_failed;
78static cpumask_var_t uv_nmi_cpu_mask; 77static cpumask_var_t uv_nmi_cpu_mask;
79 78
80/* Values for uv_nmi_slave_continue */ 79/* Values for uv_nmi_slave_continue */
@@ -149,7 +148,8 @@ module_param_named(retry_count, uv_nmi_retry_count, int, 0644);
149 * "dump" - dump process stack for each cpu 148 * "dump" - dump process stack for each cpu
150 * "ips" - dump IP info for each cpu 149 * "ips" - dump IP info for each cpu
151 * "kdump" - do crash dump 150 * "kdump" - do crash dump
152 * "kdb" - enter KDB/KGDB (default) 151 * "kdb" - enter KDB (default)
152 * "kgdb" - enter KGDB
153 */ 153 */
154static char uv_nmi_action[8] = "kdb"; 154static char uv_nmi_action[8] = "kdb";
155module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644); 155module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644);
@@ -504,6 +504,7 @@ static void uv_nmi_touch_watchdogs(void)
504} 504}
505 505
506#if defined(CONFIG_KEXEC) 506#if defined(CONFIG_KEXEC)
507static atomic_t uv_nmi_kexec_failed;
507static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) 508static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
508{ 509{
509 /* Call crash to dump system state */ 510 /* Call crash to dump system state */
@@ -537,18 +538,45 @@ static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
537} 538}
538#endif /* !CONFIG_KEXEC */ 539#endif /* !CONFIG_KEXEC */
539 540
541#ifdef CONFIG_KGDB
540#ifdef CONFIG_KGDB_KDB 542#ifdef CONFIG_KGDB_KDB
541/* Call KDB from NMI handler */ 543static inline int uv_nmi_kdb_reason(void)
542static void uv_call_kdb(int cpu, struct pt_regs *regs, int master)
543{ 544{
544 int ret; 545 return KDB_REASON_SYSTEM_NMI;
546}
547#else /* !CONFIG_KGDB_KDB */
548static inline int uv_nmi_kdb_reason(void)
549{
550 /* Insure user is expecting to attach gdb remote */
551 if (uv_nmi_action_is("kgdb"))
552 return 0;
553
554 pr_err("UV: NMI error: KDB is not enabled in this kernel\n");
555 return -1;
556}
557#endif /* CONFIG_KGDB_KDB */
545 558
559/*
560 * Call KGDB/KDB from NMI handler
561 *
562 * Note that if both KGDB and KDB are configured, then the action of 'kgdb' or
563 * 'kdb' has no affect on which is used. See the KGDB documention for further
564 * information.
565 */
566static void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
567{
546 if (master) { 568 if (master) {
569 int reason = uv_nmi_kdb_reason();
570 int ret;
571
572 if (reason < 0)
573 return;
574
547 /* call KGDB NMI handler as MASTER */ 575 /* call KGDB NMI handler as MASTER */
548 ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs, 576 ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs, reason,
549 &uv_nmi_slave_continue); 577 &uv_nmi_slave_continue);
550 if (ret) { 578 if (ret) {
551 pr_alert("KDB returned error, is kgdboc set?\n"); 579 pr_alert("KGDB returned error, is kgdboc set?\n");
552 atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT); 580 atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
553 } 581 }
554 } else { 582 } else {
@@ -567,12 +595,12 @@ static void uv_call_kdb(int cpu, struct pt_regs *regs, int master)
567 uv_nmi_sync_exit(master); 595 uv_nmi_sync_exit(master);
568} 596}
569 597
570#else /* !CONFIG_KGDB_KDB */ 598#else /* !CONFIG_KGDB */
571static inline void uv_call_kdb(int cpu, struct pt_regs *regs, int master) 599static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
572{ 600{
573 pr_err("UV: NMI error: KGDB/KDB is not enabled in this kernel\n"); 601 pr_err("UV: NMI error: KGDB is not enabled in this kernel\n");
574} 602}
575#endif /* !CONFIG_KGDB_KDB */ 603#endif /* !CONFIG_KGDB */
576 604
577/* 605/*
578 * UV NMI handler 606 * UV NMI handler
@@ -606,9 +634,9 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
606 if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) 634 if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump"))
607 uv_nmi_dump_state(cpu, regs, master); 635 uv_nmi_dump_state(cpu, regs, master);
608 636
609 /* Call KDB if enabled */ 637 /* Call KGDB/KDB if enabled */
610 else if (uv_nmi_action_is("kdb")) 638 else if (uv_nmi_action_is("kdb") || uv_nmi_action_is("kgdb"))
611 uv_call_kdb(cpu, regs, master); 639 uv_call_kgdb_kdb(cpu, regs, master);
612 640
613 /* Clear per_cpu "in nmi" flag */ 641 /* Clear per_cpu "in nmi" flag */
614 atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_OUT); 642 atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_OUT);
@@ -634,7 +662,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
634/* 662/*
635 * NMI handler for pulling in CPUs when perf events are grabbing our NMI 663 * NMI handler for pulling in CPUs when perf events are grabbing our NMI
636 */ 664 */
637int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs) 665static int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs)
638{ 666{
639 int ret; 667 int ret;
640 668
@@ -651,7 +679,7 @@ int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs)
651 return ret; 679 return ret;
652} 680}
653 681
654void uv_register_nmi_notifier(void) 682static void uv_register_nmi_notifier(void)
655{ 683{
656 if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) 684 if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv"))
657 pr_warn("UV: NMI handler failed to register\n"); 685 pr_warn("UV: NMI handler failed to register\n");
@@ -695,6 +723,5 @@ void uv_nmi_setup(void)
695 uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid]; 723 uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid];
696 } 724 }
697 BUG_ON(!alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL)); 725 BUG_ON(!alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL));
726 uv_register_nmi_notifier();
698} 727}
699
700
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 9cac82588cbc..3497f14e4dea 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -64,20 +64,7 @@ $(obj)/realmode.relocs: $(obj)/realmode.elf FORCE
64 64
65# --------------------------------------------------------------------------- 65# ---------------------------------------------------------------------------
66 66
67# How to compile the 16-bit code. Note we always compile for -march=i386, 67KBUILD_CFLAGS := $(LINUXINCLUDE) $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \
68# that way we can complain to the user if the CPU is insufficient. 68 -I$(srctree)/arch/x86/boot
69KBUILD_CFLAGS := $(LINUXINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ -D_WAKEUP \
70 -I$(srctree)/arch/x86/boot \
71 -DDISABLE_BRANCH_PROFILING \
72 -Wall -Wstrict-prototypes \
73 -march=i386 -mregparm=3 \
74 -include $(srctree)/$(src)/../../boot/code16gcc.h \
75 -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
76 -mno-mmx -mno-sse \
77 $(call cc-option, -ffreestanding) \
78 $(call cc-option, -fno-toplevel-reorder,\
79 $(call cc-option, -fno-unit-at-a-time)) \
80 $(call cc-option, -fno-stack-protector) \
81 $(call cc-option, -mpreferred-stack-boundary=2)
82KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ 69KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
83GCOV_PROFILE := n 70GCOV_PROFILE := n
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 11f9285a2ff6..cfbdbdb4e173 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -1025,6 +1025,29 @@ static void emit_relocs(int as_text, int use_real_mode)
1025 } 1025 }
1026} 1026}
1027 1027
1028/*
1029 * As an aid to debugging problems with different linkers
1030 * print summary information about the relocs.
1031 * Since different linkers tend to emit the sections in
1032 * different orders we use the section names in the output.
1033 */
1034static int do_reloc_info(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
1035 const char *symname)
1036{
1037 printf("%s\t%s\t%s\t%s\n",
1038 sec_name(sec->shdr.sh_info),
1039 rel_type(ELF_R_TYPE(rel->r_info)),
1040 symname,
1041 sec_name(sym->st_shndx));
1042 return 0;
1043}
1044
1045static void print_reloc_info(void)
1046{
1047 printf("reloc section\treloc type\tsymbol\tsymbol section\n");
1048 walk_relocs(do_reloc_info);
1049}
1050
1028#if ELF_BITS == 64 1051#if ELF_BITS == 64
1029# define process process_64 1052# define process process_64
1030#else 1053#else
@@ -1032,7 +1055,8 @@ static void emit_relocs(int as_text, int use_real_mode)
1032#endif 1055#endif
1033 1056
1034void process(FILE *fp, int use_real_mode, int as_text, 1057void process(FILE *fp, int use_real_mode, int as_text,
1035 int show_absolute_syms, int show_absolute_relocs) 1058 int show_absolute_syms, int show_absolute_relocs,
1059 int show_reloc_info)
1036{ 1060{
1037 regex_init(use_real_mode); 1061 regex_init(use_real_mode);
1038 read_ehdr(fp); 1062 read_ehdr(fp);
@@ -1050,5 +1074,9 @@ void process(FILE *fp, int use_real_mode, int as_text,
1050 print_absolute_relocs(); 1074 print_absolute_relocs();
1051 return; 1075 return;
1052 } 1076 }
1077 if (show_reloc_info) {
1078 print_reloc_info();
1079 return;
1080 }
1053 emit_relocs(as_text, use_real_mode); 1081 emit_relocs(as_text, use_real_mode);
1054} 1082}
diff --git a/arch/x86/tools/relocs.h b/arch/x86/tools/relocs.h
index 07cdb1eca4fa..f59590645b68 100644
--- a/arch/x86/tools/relocs.h
+++ b/arch/x86/tools/relocs.h
@@ -29,8 +29,9 @@ enum symtype {
29}; 29};
30 30
31void process_32(FILE *fp, int use_real_mode, int as_text, 31void process_32(FILE *fp, int use_real_mode, int as_text,
32 int show_absolute_syms, int show_absolute_relocs); 32 int show_absolute_syms, int show_absolute_relocs,
33 int show_reloc_info);
33void process_64(FILE *fp, int use_real_mode, int as_text, 34void process_64(FILE *fp, int use_real_mode, int as_text,
34 int show_absolute_syms, int show_absolute_relocs); 35 int show_absolute_syms, int show_absolute_relocs,
35 36 int show_reloc_info);
36#endif /* RELOCS_H */ 37#endif /* RELOCS_H */
diff --git a/arch/x86/tools/relocs_common.c b/arch/x86/tools/relocs_common.c
index 44d396823a53..acab636bcb34 100644
--- a/arch/x86/tools/relocs_common.c
+++ b/arch/x86/tools/relocs_common.c
@@ -11,12 +11,13 @@ void die(char *fmt, ...)
11 11
12static void usage(void) 12static void usage(void)
13{ 13{
14 die("relocs [--abs-syms|--abs-relocs|--text|--realmode] vmlinux\n"); 14 die("relocs [--abs-syms|--abs-relocs|--reloc-info|--text|--realmode]" \
15 " vmlinux\n");
15} 16}
16 17
17int main(int argc, char **argv) 18int main(int argc, char **argv)
18{ 19{
19 int show_absolute_syms, show_absolute_relocs; 20 int show_absolute_syms, show_absolute_relocs, show_reloc_info;
20 int as_text, use_real_mode; 21 int as_text, use_real_mode;
21 const char *fname; 22 const char *fname;
22 FILE *fp; 23 FILE *fp;
@@ -25,6 +26,7 @@ int main(int argc, char **argv)
25 26
26 show_absolute_syms = 0; 27 show_absolute_syms = 0;
27 show_absolute_relocs = 0; 28 show_absolute_relocs = 0;
29 show_reloc_info = 0;
28 as_text = 0; 30 as_text = 0;
29 use_real_mode = 0; 31 use_real_mode = 0;
30 fname = NULL; 32 fname = NULL;
@@ -39,6 +41,10 @@ int main(int argc, char **argv)
39 show_absolute_relocs = 1; 41 show_absolute_relocs = 1;
40 continue; 42 continue;
41 } 43 }
44 if (strcmp(arg, "--reloc-info") == 0) {
45 show_reloc_info = 1;
46 continue;
47 }
42 if (strcmp(arg, "--text") == 0) { 48 if (strcmp(arg, "--text") == 0) {
43 as_text = 1; 49 as_text = 1;
44 continue; 50 continue;
@@ -67,10 +73,12 @@ int main(int argc, char **argv)
67 rewind(fp); 73 rewind(fp);
68 if (e_ident[EI_CLASS] == ELFCLASS64) 74 if (e_ident[EI_CLASS] == ELFCLASS64)
69 process_64(fp, use_real_mode, as_text, 75 process_64(fp, use_real_mode, as_text,
70 show_absolute_syms, show_absolute_relocs); 76 show_absolute_syms, show_absolute_relocs,
77 show_reloc_info);
71 else 78 else
72 process_32(fp, use_real_mode, as_text, 79 process_32(fp, use_real_mode, as_text,
73 show_absolute_syms, show_absolute_relocs); 80 show_absolute_syms, show_absolute_relocs,
81 show_reloc_info);
74 fclose(fp); 82 fclose(fp);
75 return 0; 83 return 0;
76} 84}
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 1a3c76505649..01b90261fa38 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -51,3 +51,7 @@ config XEN_DEBUG_FS
51 Enable statistics output and various tuning options in debugfs. 51 Enable statistics output and various tuning options in debugfs.
52 Enabling this option may incur a significant performance overhead. 52 Enabling this option may incur a significant performance overhead.
53 53
54config XEN_PVH
55 bool "Support for running as a PVH guest"
56 depends on X86_64 && XEN && XEN_PVHVM
57 def_bool n
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index fa6ade76ef3f..a4d7b647867f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -262,8 +262,9 @@ static void __init xen_banner(void)
262 struct xen_extraversion extra; 262 struct xen_extraversion extra;
263 HYPERVISOR_xen_version(XENVER_extraversion, &extra); 263 HYPERVISOR_xen_version(XENVER_extraversion, &extra);
264 264
265 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 265 pr_info("Booting paravirtualized kernel %son %s\n",
266 pv_info.name); 266 xen_feature(XENFEAT_auto_translated_physmap) ?
267 "with PVH extensions " : "", pv_info.name);
267 printk(KERN_INFO "Xen version: %d.%d%s%s\n", 268 printk(KERN_INFO "Xen version: %d.%d%s%s\n",
268 version >> 16, version & 0xffff, extra.extraversion, 269 version >> 16, version & 0xffff, extra.extraversion,
269 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); 270 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
@@ -433,7 +434,7 @@ static void __init xen_init_cpuid_mask(void)
433 434
434 ax = 1; 435 ax = 1;
435 cx = 0; 436 cx = 0;
436 xen_cpuid(&ax, &bx, &cx, &dx); 437 cpuid(1, &ax, &bx, &cx, &dx);
437 438
438 xsave_mask = 439 xsave_mask =
439 (1 << (X86_FEATURE_XSAVE % 32)) | 440 (1 << (X86_FEATURE_XSAVE % 32)) |
@@ -1142,8 +1143,9 @@ void xen_setup_vcpu_info_placement(void)
1142 xen_vcpu_setup(cpu); 1143 xen_vcpu_setup(cpu);
1143 1144
1144 /* xen_vcpu_setup managed to place the vcpu_info within the 1145 /* xen_vcpu_setup managed to place the vcpu_info within the
1145 percpu area for all cpus, so make use of it */ 1146 * percpu area for all cpus, so make use of it. Note that for
1146 if (have_vcpu_info_placement) { 1147 * PVH we want to use native IRQ mechanism. */
1148 if (have_vcpu_info_placement && !xen_pvh_domain()) {
1147 pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); 1149 pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
1148 pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); 1150 pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
1149 pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); 1151 pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
@@ -1407,9 +1409,49 @@ static void __init xen_boot_params_init_edd(void)
1407 * Set up the GDT and segment registers for -fstack-protector. Until 1409 * Set up the GDT and segment registers for -fstack-protector. Until
1408 * we do this, we have to be careful not to call any stack-protected 1410 * we do this, we have to be careful not to call any stack-protected
1409 * function, which is most of the kernel. 1411 * function, which is most of the kernel.
1412 *
1413 * Note, that it is __ref because the only caller of this after init
1414 * is PVH which is not going to use xen_load_gdt_boot or other
1415 * __init functions.
1410 */ 1416 */
1411static void __init xen_setup_stackprotector(void) 1417static void __ref xen_setup_gdt(int cpu)
1412{ 1418{
1419 if (xen_feature(XENFEAT_auto_translated_physmap)) {
1420#ifdef CONFIG_X86_64
1421 unsigned long dummy;
1422
1423 load_percpu_segment(cpu); /* We need to access per-cpu area */
1424 switch_to_new_gdt(cpu); /* GDT and GS set */
1425
1426 /* We are switching of the Xen provided GDT to our HVM mode
1427 * GDT. The new GDT has __KERNEL_CS with CS.L = 1
1428 * and we are jumping to reload it.
1429 */
1430 asm volatile ("pushq %0\n"
1431 "leaq 1f(%%rip),%0\n"
1432 "pushq %0\n"
1433 "lretq\n"
1434 "1:\n"
1435 : "=&r" (dummy) : "0" (__KERNEL_CS));
1436
1437 /*
1438 * While not needed, we also set the %es, %ds, and %fs
1439 * to zero. We don't care about %ss as it is NULL.
1440 * Strictly speaking this is not needed as Xen zeros those
1441 * out (and also MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE)
1442 *
1443 * Linux zeros them in cpu_init() and in secondary_startup_64
1444 * (for BSP).
1445 */
1446 loadsegment(es, 0);
1447 loadsegment(ds, 0);
1448 loadsegment(fs, 0);
1449#else
1450 /* PVH: TODO Implement. */
1451 BUG();
1452#endif
1453 return; /* PVH does not need any PV GDT ops. */
1454 }
1413 pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; 1455 pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
1414 pv_cpu_ops.load_gdt = xen_load_gdt_boot; 1456 pv_cpu_ops.load_gdt = xen_load_gdt_boot;
1415 1457
@@ -1420,6 +1462,46 @@ static void __init xen_setup_stackprotector(void)
1420 pv_cpu_ops.load_gdt = xen_load_gdt; 1462 pv_cpu_ops.load_gdt = xen_load_gdt;
1421} 1463}
1422 1464
1465/*
1466 * A PV guest starts with default flags that are not set for PVH, set them
1467 * here asap.
1468 */
1469static void xen_pvh_set_cr_flags(int cpu)
1470{
1471
1472 /* Some of these are setup in 'secondary_startup_64'. The others:
1473 * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests
1474 * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */
1475 write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM);
1476}
1477
1478/*
1479 * Note, that it is ref - because the only caller of this after init
1480 * is PVH which is not going to use xen_load_gdt_boot or other
1481 * __init functions.
1482 */
1483void __ref xen_pvh_secondary_vcpu_init(int cpu)
1484{
1485 xen_setup_gdt(cpu);
1486 xen_pvh_set_cr_flags(cpu);
1487}
1488
1489static void __init xen_pvh_early_guest_init(void)
1490{
1491 if (!xen_feature(XENFEAT_auto_translated_physmap))
1492 return;
1493
1494 if (!xen_feature(XENFEAT_hvm_callback_vector))
1495 return;
1496
1497 xen_have_vector_callback = 1;
1498 xen_pvh_set_cr_flags(0);
1499
1500#ifdef CONFIG_X86_32
1501 BUG(); /* PVH: Implement proper support. */
1502#endif
1503}
1504
1423/* First C function to be called on Xen boot */ 1505/* First C function to be called on Xen boot */
1424asmlinkage void __init xen_start_kernel(void) 1506asmlinkage void __init xen_start_kernel(void)
1425{ 1507{
@@ -1431,13 +1513,16 @@ asmlinkage void __init xen_start_kernel(void)
1431 1513
1432 xen_domain_type = XEN_PV_DOMAIN; 1514 xen_domain_type = XEN_PV_DOMAIN;
1433 1515
1516 xen_setup_features();
1517 xen_pvh_early_guest_init();
1434 xen_setup_machphys_mapping(); 1518 xen_setup_machphys_mapping();
1435 1519
1436 /* Install Xen paravirt ops */ 1520 /* Install Xen paravirt ops */
1437 pv_info = xen_info; 1521 pv_info = xen_info;
1438 pv_init_ops = xen_init_ops; 1522 pv_init_ops = xen_init_ops;
1439 pv_cpu_ops = xen_cpu_ops;
1440 pv_apic_ops = xen_apic_ops; 1523 pv_apic_ops = xen_apic_ops;
1524 if (!xen_pvh_domain())
1525 pv_cpu_ops = xen_cpu_ops;
1441 1526
1442 x86_init.resources.memory_setup = xen_memory_setup; 1527 x86_init.resources.memory_setup = xen_memory_setup;
1443 x86_init.oem.arch_setup = xen_arch_setup; 1528 x86_init.oem.arch_setup = xen_arch_setup;
@@ -1469,17 +1554,14 @@ asmlinkage void __init xen_start_kernel(void)
1469 /* Work out if we support NX */ 1554 /* Work out if we support NX */
1470 x86_configure_nx(); 1555 x86_configure_nx();
1471 1556
1472 xen_setup_features();
1473
1474 /* Get mfn list */ 1557 /* Get mfn list */
1475 if (!xen_feature(XENFEAT_auto_translated_physmap)) 1558 xen_build_dynamic_phys_to_machine();
1476 xen_build_dynamic_phys_to_machine();
1477 1559
1478 /* 1560 /*
1479 * Set up kernel GDT and segment registers, mainly so that 1561 * Set up kernel GDT and segment registers, mainly so that
1480 * -fstack-protector code can be executed. 1562 * -fstack-protector code can be executed.
1481 */ 1563 */
1482 xen_setup_stackprotector(); 1564 xen_setup_gdt(0);
1483 1565
1484 xen_init_irq_ops(); 1566 xen_init_irq_ops();
1485 xen_init_cpuid_mask(); 1567 xen_init_cpuid_mask();
@@ -1548,14 +1630,18 @@ asmlinkage void __init xen_start_kernel(void)
1548 /* set the limit of our address space */ 1630 /* set the limit of our address space */
1549 xen_reserve_top(); 1631 xen_reserve_top();
1550 1632
1551 /* We used to do this in xen_arch_setup, but that is too late on AMD 1633 /* PVH: runs at default kernel iopl of 0 */
1552 * were early_cpu_init (run before ->arch_setup()) calls early_amd_init 1634 if (!xen_pvh_domain()) {
1553 * which pokes 0xcf8 port. 1635 /*
1554 */ 1636 * We used to do this in xen_arch_setup, but that is too late
1555 set_iopl.iopl = 1; 1637 * on AMD were early_cpu_init (run before ->arch_setup()) calls
1556 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 1638 * early_amd_init which pokes 0xcf8 port.
1557 if (rc != 0) 1639 */
1558 xen_raw_printk("physdev_op failed %d\n", rc); 1640 set_iopl.iopl = 1;
1641 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1642 if (rc != 0)
1643 xen_raw_printk("physdev_op failed %d\n", rc);
1644 }
1559 1645
1560#ifdef CONFIG_X86_32 1646#ifdef CONFIG_X86_32
1561 /* set up basic CPUID stuff */ 1647 /* set up basic CPUID stuff */
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 3a5f55d51907..c98583588580 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -125,3 +125,67 @@ void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
125 apply_to_page_range(&init_mm, (unsigned long)shared, 125 apply_to_page_range(&init_mm, (unsigned long)shared,
126 PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); 126 PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
127} 127}
128#ifdef CONFIG_XEN_PVH
129#include <xen/balloon.h>
130#include <xen/events.h>
131#include <xen/xen.h>
132#include <linux/slab.h>
133static int __init xlated_setup_gnttab_pages(void)
134{
135 struct page **pages;
136 xen_pfn_t *pfns;
137 int rc;
138 unsigned int i;
139 unsigned long nr_grant_frames = gnttab_max_grant_frames();
140
141 BUG_ON(nr_grant_frames == 0);
142 pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL);
143 if (!pages)
144 return -ENOMEM;
145
146 pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL);
147 if (!pfns) {
148 kfree(pages);
149 return -ENOMEM;
150 }
151 rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */);
152 if (rc) {
153 pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__,
154 nr_grant_frames, rc);
155 kfree(pages);
156 kfree(pfns);
157 return rc;
158 }
159 for (i = 0; i < nr_grant_frames; i++)
160 pfns[i] = page_to_pfn(pages[i]);
161
162 rc = arch_gnttab_map_shared(pfns, nr_grant_frames, nr_grant_frames,
163 &xen_auto_xlat_grant_frames.vaddr);
164
165 if (rc) {
166 pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__,
167 nr_grant_frames, rc);
168 free_xenballooned_pages(nr_grant_frames, pages);
169 kfree(pages);
170 kfree(pfns);
171 return rc;
172 }
173 kfree(pages);
174
175 xen_auto_xlat_grant_frames.pfn = pfns;
176 xen_auto_xlat_grant_frames.count = nr_grant_frames;
177
178 return 0;
179}
180
181static int __init xen_pvh_gnttab_setup(void)
182{
183 if (!xen_pvh_domain())
184 return -ENODEV;
185
186 return xlated_setup_gnttab_pages();
187}
188/* Call it _before_ __gnttab_init as we need to initialize the
189 * xen_auto_xlat_grant_frames first. */
190core_initcall(xen_pvh_gnttab_setup);
191#endif
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 0da7f863056f..08f763de26fe 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -5,6 +5,7 @@
5#include <xen/interface/xen.h> 5#include <xen/interface/xen.h>
6#include <xen/interface/sched.h> 6#include <xen/interface/sched.h>
7#include <xen/interface/vcpu.h> 7#include <xen/interface/vcpu.h>
8#include <xen/features.h>
8#include <xen/events.h> 9#include <xen/events.h>
9 10
10#include <asm/xen/hypercall.h> 11#include <asm/xen/hypercall.h>
@@ -22,7 +23,7 @@ void xen_force_evtchn_callback(void)
22 (void)HYPERVISOR_xen_version(0, NULL); 23 (void)HYPERVISOR_xen_version(0, NULL);
23} 24}
24 25
25static unsigned long xen_save_fl(void) 26asmlinkage unsigned long xen_save_fl(void)
26{ 27{
27 struct vcpu_info *vcpu; 28 struct vcpu_info *vcpu;
28 unsigned long flags; 29 unsigned long flags;
@@ -40,7 +41,7 @@ static unsigned long xen_save_fl(void)
40} 41}
41PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl); 42PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl);
42 43
43static void xen_restore_fl(unsigned long flags) 44__visible void xen_restore_fl(unsigned long flags)
44{ 45{
45 struct vcpu_info *vcpu; 46 struct vcpu_info *vcpu;
46 47
@@ -62,7 +63,7 @@ static void xen_restore_fl(unsigned long flags)
62} 63}
63PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); 64PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
64 65
65static void xen_irq_disable(void) 66asmlinkage void xen_irq_disable(void)
66{ 67{
67 /* There's a one instruction preempt window here. We need to 68 /* There's a one instruction preempt window here. We need to
68 make sure we're don't switch CPUs between getting the vcpu 69 make sure we're don't switch CPUs between getting the vcpu
@@ -73,7 +74,7 @@ static void xen_irq_disable(void)
73} 74}
74PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); 75PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
75 76
76static void xen_irq_enable(void) 77asmlinkage void xen_irq_enable(void)
77{ 78{
78 struct vcpu_info *vcpu; 79 struct vcpu_info *vcpu;
79 80
@@ -128,6 +129,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
128 129
129void __init xen_init_irq_ops(void) 130void __init xen_init_irq_ops(void)
130{ 131{
131 pv_irq_ops = xen_irq_ops; 132 /* For PVH we use default pv_irq_ops settings. */
133 if (!xen_feature(XENFEAT_hvm_callback_vector))
134 pv_irq_ops = xen_irq_ops;
132 x86_init.irqs.intr_init = xen_init_IRQ; 135 x86_init.irqs.intr_init = xen_init_IRQ;
133} 136}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index ce563be09cc1..2423ef04ffea 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -431,7 +431,7 @@ static pteval_t iomap_pte(pteval_t val)
431 return val; 431 return val;
432} 432}
433 433
434static pteval_t xen_pte_val(pte_t pte) 434__visible pteval_t xen_pte_val(pte_t pte)
435{ 435{
436 pteval_t pteval = pte.pte; 436 pteval_t pteval = pte.pte;
437#if 0 437#if 0
@@ -448,7 +448,7 @@ static pteval_t xen_pte_val(pte_t pte)
448} 448}
449PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); 449PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
450 450
451static pgdval_t xen_pgd_val(pgd_t pgd) 451__visible pgdval_t xen_pgd_val(pgd_t pgd)
452{ 452{
453 return pte_mfn_to_pfn(pgd.pgd); 453 return pte_mfn_to_pfn(pgd.pgd);
454} 454}
@@ -479,7 +479,7 @@ void xen_set_pat(u64 pat)
479 WARN_ON(pat != 0x0007010600070106ull); 479 WARN_ON(pat != 0x0007010600070106ull);
480} 480}
481 481
482static pte_t xen_make_pte(pteval_t pte) 482__visible pte_t xen_make_pte(pteval_t pte)
483{ 483{
484 phys_addr_t addr = (pte & PTE_PFN_MASK); 484 phys_addr_t addr = (pte & PTE_PFN_MASK);
485#if 0 485#if 0
@@ -514,14 +514,14 @@ static pte_t xen_make_pte(pteval_t pte)
514} 514}
515PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); 515PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
516 516
517static pgd_t xen_make_pgd(pgdval_t pgd) 517__visible pgd_t xen_make_pgd(pgdval_t pgd)
518{ 518{
519 pgd = pte_pfn_to_mfn(pgd); 519 pgd = pte_pfn_to_mfn(pgd);
520 return native_make_pgd(pgd); 520 return native_make_pgd(pgd);
521} 521}
522PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); 522PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
523 523
524static pmdval_t xen_pmd_val(pmd_t pmd) 524__visible pmdval_t xen_pmd_val(pmd_t pmd)
525{ 525{
526 return pte_mfn_to_pfn(pmd.pmd); 526 return pte_mfn_to_pfn(pmd.pmd);
527} 527}
@@ -580,7 +580,7 @@ static void xen_pmd_clear(pmd_t *pmdp)
580} 580}
581#endif /* CONFIG_X86_PAE */ 581#endif /* CONFIG_X86_PAE */
582 582
583static pmd_t xen_make_pmd(pmdval_t pmd) 583__visible pmd_t xen_make_pmd(pmdval_t pmd)
584{ 584{
585 pmd = pte_pfn_to_mfn(pmd); 585 pmd = pte_pfn_to_mfn(pmd);
586 return native_make_pmd(pmd); 586 return native_make_pmd(pmd);
@@ -588,13 +588,13 @@ static pmd_t xen_make_pmd(pmdval_t pmd)
588PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); 588PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
589 589
590#if PAGETABLE_LEVELS == 4 590#if PAGETABLE_LEVELS == 4
591static pudval_t xen_pud_val(pud_t pud) 591__visible pudval_t xen_pud_val(pud_t pud)
592{ 592{
593 return pte_mfn_to_pfn(pud.pud); 593 return pte_mfn_to_pfn(pud.pud);
594} 594}
595PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); 595PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
596 596
597static pud_t xen_make_pud(pudval_t pud) 597__visible pud_t xen_make_pud(pudval_t pud)
598{ 598{
599 pud = pte_pfn_to_mfn(pud); 599 pud = pte_pfn_to_mfn(pud);
600 600
@@ -1198,44 +1198,40 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
1198 * instead of somewhere later and be confusing. */ 1198 * instead of somewhere later and be confusing. */
1199 xen_mc_flush(); 1199 xen_mc_flush();
1200} 1200}
1201#endif 1201static void __init xen_pagetable_p2m_copy(void)
1202static void __init xen_pagetable_init(void)
1203{ 1202{
1204#ifdef CONFIG_X86_64
1205 unsigned long size; 1203 unsigned long size;
1206 unsigned long addr; 1204 unsigned long addr;
1207#endif 1205 unsigned long new_mfn_list;
1208 paging_init(); 1206
1209 xen_setup_shared_info(); 1207 if (xen_feature(XENFEAT_auto_translated_physmap))
1210#ifdef CONFIG_X86_64 1208 return;
1211 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 1209
1212 unsigned long new_mfn_list; 1210 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1213 1211
1214 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 1212 new_mfn_list = xen_revector_p2m_tree();
1215 1213 /* No memory or already called. */
1216 /* On 32-bit, we get zero so this never gets executed. */ 1214 if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list)
1217 new_mfn_list = xen_revector_p2m_tree(); 1215 return;
1218 if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { 1216
1219 /* using __ka address and sticking INVALID_P2M_ENTRY! */ 1217 /* using __ka address and sticking INVALID_P2M_ENTRY! */
1220 memset((void *)xen_start_info->mfn_list, 0xff, size); 1218 memset((void *)xen_start_info->mfn_list, 0xff, size);
1221 1219
1222 /* We should be in __ka space. */ 1220 /* We should be in __ka space. */
1223 BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); 1221 BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
1224 addr = xen_start_info->mfn_list; 1222 addr = xen_start_info->mfn_list;
1225 /* We roundup to the PMD, which means that if anybody at this stage is 1223 /* We roundup to the PMD, which means that if anybody at this stage is
1226 * using the __ka address of xen_start_info or xen_start_info->shared_info 1224 * using the __ka address of xen_start_info or xen_start_info->shared_info
1227 * they are in going to crash. Fortunatly we have already revectored 1225 * they are in going to crash. Fortunatly we have already revectored
1228 * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ 1226 * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */
1229 size = roundup(size, PMD_SIZE); 1227 size = roundup(size, PMD_SIZE);
1230 xen_cleanhighmap(addr, addr + size); 1228 xen_cleanhighmap(addr, addr + size);
1231 1229
1232 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); 1230 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1233 memblock_free(__pa(xen_start_info->mfn_list), size); 1231 memblock_free(__pa(xen_start_info->mfn_list), size);
1234 /* And revector! Bye bye old array */ 1232 /* And revector! Bye bye old array */
1235 xen_start_info->mfn_list = new_mfn_list; 1233 xen_start_info->mfn_list = new_mfn_list;
1236 } else 1234
1237 goto skip;
1238 }
1239 /* At this stage, cleanup_highmap has already cleaned __ka space 1235 /* At this stage, cleanup_highmap has already cleaned __ka space
1240 * from _brk_limit way up to the max_pfn_mapped (which is the end of 1236 * from _brk_limit way up to the max_pfn_mapped (which is the end of
1241 * the ramdisk). We continue on, erasing PMD entries that point to page 1237 * the ramdisk). We continue on, erasing PMD entries that point to page
@@ -1255,7 +1251,15 @@ static void __init xen_pagetable_init(void)
1255 * anything at this stage. */ 1251 * anything at this stage. */
1256 xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); 1252 xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
1257#endif 1253#endif
1258skip: 1254}
1255#endif
1256
1257static void __init xen_pagetable_init(void)
1258{
1259 paging_init();
1260 xen_setup_shared_info();
1261#ifdef CONFIG_X86_64
1262 xen_pagetable_p2m_copy();
1259#endif 1263#endif
1260 xen_post_allocator_init(); 1264 xen_post_allocator_init();
1261} 1265}
@@ -1753,6 +1757,10 @@ static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags)
1753 unsigned long pfn = __pa(addr) >> PAGE_SHIFT; 1757 unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
1754 pte_t pte = pfn_pte(pfn, prot); 1758 pte_t pte = pfn_pte(pfn, prot);
1755 1759
1760 /* For PVH no need to set R/O or R/W to pin them or unpin them. */
1761 if (xen_feature(XENFEAT_auto_translated_physmap))
1762 return;
1763
1756 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) 1764 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
1757 BUG(); 1765 BUG();
1758} 1766}
@@ -1863,6 +1871,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
1863 * but that's enough to get __va working. We need to fill in the rest 1871 * but that's enough to get __va working. We need to fill in the rest
1864 * of the physical mapping once some sort of allocator has been set 1872 * of the physical mapping once some sort of allocator has been set
1865 * up. 1873 * up.
1874 * NOTE: for PVH, the page tables are native.
1866 */ 1875 */
1867void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) 1876void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1868{ 1877{
@@ -1884,17 +1893,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1884 /* Zap identity mapping */ 1893 /* Zap identity mapping */
1885 init_level4_pgt[0] = __pgd(0); 1894 init_level4_pgt[0] = __pgd(0);
1886 1895
1887 /* Pre-constructed entries are in pfn, so convert to mfn */ 1896 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1888 /* L4[272] -> level3_ident_pgt 1897 /* Pre-constructed entries are in pfn, so convert to mfn */
1889 * L4[511] -> level3_kernel_pgt */ 1898 /* L4[272] -> level3_ident_pgt
1890 convert_pfn_mfn(init_level4_pgt); 1899 * L4[511] -> level3_kernel_pgt */
1891 1900 convert_pfn_mfn(init_level4_pgt);
1892 /* L3_i[0] -> level2_ident_pgt */ 1901
1893 convert_pfn_mfn(level3_ident_pgt); 1902 /* L3_i[0] -> level2_ident_pgt */
1894 /* L3_k[510] -> level2_kernel_pgt 1903 convert_pfn_mfn(level3_ident_pgt);
1895 * L3_i[511] -> level2_fixmap_pgt */ 1904 /* L3_k[510] -> level2_kernel_pgt
1896 convert_pfn_mfn(level3_kernel_pgt); 1905 * L3_i[511] -> level2_fixmap_pgt */
1897 1906 convert_pfn_mfn(level3_kernel_pgt);
1907 }
1898 /* We get [511][511] and have Xen's version of level2_kernel_pgt */ 1908 /* We get [511][511] and have Xen's version of level2_kernel_pgt */
1899 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); 1909 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1900 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); 1910 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
@@ -1918,31 +1928,33 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1918 copy_page(level2_fixmap_pgt, l2); 1928 copy_page(level2_fixmap_pgt, l2);
1919 /* Note that we don't do anything with level1_fixmap_pgt which 1929 /* Note that we don't do anything with level1_fixmap_pgt which
1920 * we don't need. */ 1930 * we don't need. */
1931 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1932 /* Make pagetable pieces RO */
1933 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
1934 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1935 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1936 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1937 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
1938 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1939 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1940
1941 /* Pin down new L4 */
1942 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1943 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1944
1945 /* Unpin Xen-provided one */
1946 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1921 1947
1922 /* Make pagetable pieces RO */ 1948 /*
1923 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); 1949 * At this stage there can be no user pgd, and no page
1924 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); 1950 * structure to attach it to, so make sure we just set kernel
1925 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); 1951 * pgd.
1926 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); 1952 */
1927 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); 1953 xen_mc_batch();
1928 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 1954 __xen_write_cr3(true, __pa(init_level4_pgt));
1929 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); 1955 xen_mc_issue(PARAVIRT_LAZY_CPU);
1930 1956 } else
1931 /* Pin down new L4 */ 1957 native_write_cr3(__pa(init_level4_pgt));
1932 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1933 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1934
1935 /* Unpin Xen-provided one */
1936 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1937
1938 /*
1939 * At this stage there can be no user pgd, and no page
1940 * structure to attach it to, so make sure we just set kernel
1941 * pgd.
1942 */
1943 xen_mc_batch();
1944 __xen_write_cr3(true, __pa(init_level4_pgt));
1945 xen_mc_issue(PARAVIRT_LAZY_CPU);
1946 1958
1947 /* We can't that easily rip out L3 and L2, as the Xen pagetables are 1959 /* We can't that easily rip out L3 and L2, as the Xen pagetables are
1948 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for 1960 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for
@@ -2103,6 +2115,9 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2103 2115
2104static void __init xen_post_allocator_init(void) 2116static void __init xen_post_allocator_init(void)
2105{ 2117{
2118 if (xen_feature(XENFEAT_auto_translated_physmap))
2119 return;
2120
2106 pv_mmu_ops.set_pte = xen_set_pte; 2121 pv_mmu_ops.set_pte = xen_set_pte;
2107 pv_mmu_ops.set_pmd = xen_set_pmd; 2122 pv_mmu_ops.set_pmd = xen_set_pmd;
2108 pv_mmu_ops.set_pud = xen_set_pud; 2123 pv_mmu_ops.set_pud = xen_set_pud;
@@ -2207,6 +2222,15 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2207void __init xen_init_mmu_ops(void) 2222void __init xen_init_mmu_ops(void)
2208{ 2223{
2209 x86_init.paging.pagetable_init = xen_pagetable_init; 2224 x86_init.paging.pagetable_init = xen_pagetable_init;
2225
2226 /* Optimization - we can use the HVM one but it has no idea which
2227 * VCPUs are descheduled - which means that it will needlessly IPI
2228 * them. Xen knows so let it do the job.
2229 */
2230 if (xen_feature(XENFEAT_auto_translated_physmap)) {
2231 pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
2232 return;
2233 }
2210 pv_mmu_ops = xen_mmu_ops; 2234 pv_mmu_ops = xen_mmu_ops;
2211 2235
2212 memset(dummy_mapping, 0xff, PAGE_SIZE); 2236 memset(dummy_mapping, 0xff, PAGE_SIZE);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 2ae8699e8767..8009acbe41e4 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -280,6 +280,9 @@ void __ref xen_build_mfn_list_list(void)
280{ 280{
281 unsigned long pfn; 281 unsigned long pfn;
282 282
283 if (xen_feature(XENFEAT_auto_translated_physmap))
284 return;
285
283 /* Pre-initialize p2m_top_mfn to be completely missing */ 286 /* Pre-initialize p2m_top_mfn to be completely missing */
284 if (p2m_top_mfn == NULL) { 287 if (p2m_top_mfn == NULL) {
285 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); 288 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
@@ -336,6 +339,9 @@ void __ref xen_build_mfn_list_list(void)
336 339
337void xen_setup_mfn_list_list(void) 340void xen_setup_mfn_list_list(void)
338{ 341{
342 if (xen_feature(XENFEAT_auto_translated_physmap))
343 return;
344
339 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); 345 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
340 346
341 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = 347 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
@@ -346,10 +352,15 @@ void xen_setup_mfn_list_list(void)
346/* Set up p2m_top to point to the domain-builder provided p2m pages */ 352/* Set up p2m_top to point to the domain-builder provided p2m pages */
347void __init xen_build_dynamic_phys_to_machine(void) 353void __init xen_build_dynamic_phys_to_machine(void)
348{ 354{
349 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; 355 unsigned long *mfn_list;
350 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); 356 unsigned long max_pfn;
351 unsigned long pfn; 357 unsigned long pfn;
352 358
359 if (xen_feature(XENFEAT_auto_translated_physmap))
360 return;
361
362 mfn_list = (unsigned long *)xen_start_info->mfn_list;
363 max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
353 xen_max_p2m_pfn = max_pfn; 364 xen_max_p2m_pfn = max_pfn;
354 365
355 p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); 366 p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
@@ -888,13 +899,6 @@ int m2p_add_override(unsigned long mfn, struct page *page,
888 "m2p_add_override: pfn %lx not mapped", pfn)) 899 "m2p_add_override: pfn %lx not mapped", pfn))
889 return -EINVAL; 900 return -EINVAL;
890 } 901 }
891 WARN_ON(PagePrivate(page));
892 SetPagePrivate(page);
893 set_page_private(page, mfn);
894 page->index = pfn_to_mfn(pfn);
895
896 if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
897 return -ENOMEM;
898 902
899 if (kmap_op != NULL) { 903 if (kmap_op != NULL) {
900 if (!PageHighMem(page)) { 904 if (!PageHighMem(page)) {
@@ -933,19 +937,16 @@ int m2p_add_override(unsigned long mfn, struct page *page,
933} 937}
934EXPORT_SYMBOL_GPL(m2p_add_override); 938EXPORT_SYMBOL_GPL(m2p_add_override);
935int m2p_remove_override(struct page *page, 939int m2p_remove_override(struct page *page,
936 struct gnttab_map_grant_ref *kmap_op) 940 struct gnttab_map_grant_ref *kmap_op,
941 unsigned long mfn)
937{ 942{
938 unsigned long flags; 943 unsigned long flags;
939 unsigned long mfn;
940 unsigned long pfn; 944 unsigned long pfn;
941 unsigned long uninitialized_var(address); 945 unsigned long uninitialized_var(address);
942 unsigned level; 946 unsigned level;
943 pte_t *ptep = NULL; 947 pte_t *ptep = NULL;
944 948
945 pfn = page_to_pfn(page); 949 pfn = page_to_pfn(page);
946 mfn = get_phys_to_machine(pfn);
947 if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT))
948 return -EINVAL;
949 950
950 if (!PageHighMem(page)) { 951 if (!PageHighMem(page)) {
951 address = (unsigned long)__va(pfn << PAGE_SHIFT); 952 address = (unsigned long)__va(pfn << PAGE_SHIFT);
@@ -959,10 +960,7 @@ int m2p_remove_override(struct page *page,
959 spin_lock_irqsave(&m2p_override_lock, flags); 960 spin_lock_irqsave(&m2p_override_lock, flags);
960 list_del(&page->lru); 961 list_del(&page->lru);
961 spin_unlock_irqrestore(&m2p_override_lock, flags); 962 spin_unlock_irqrestore(&m2p_override_lock, flags);
962 WARN_ON(!PagePrivate(page));
963 ClearPagePrivate(page);
964 963
965 set_phys_to_machine(pfn, page->index);
966 if (kmap_op != NULL) { 964 if (kmap_op != NULL) {
967 if (!PageHighMem(page)) { 965 if (!PageHighMem(page)) {
968 struct multicall_space mcs; 966 struct multicall_space mcs;
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 0a7852483ffe..a8261716d58d 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -30,10 +30,9 @@
30#define XEN_PLATFORM_ERR_PROTOCOL -2 30#define XEN_PLATFORM_ERR_PROTOCOL -2
31#define XEN_PLATFORM_ERR_BLACKLIST -3 31#define XEN_PLATFORM_ERR_BLACKLIST -3
32 32
33/* store the value of xen_emul_unplug after the unplug is done */
34int xen_platform_pci_unplug;
35EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
36#ifdef CONFIG_XEN_PVHVM 33#ifdef CONFIG_XEN_PVHVM
34/* store the value of xen_emul_unplug after the unplug is done */
35static int xen_platform_pci_unplug;
37static int xen_emul_unplug; 36static int xen_emul_unplug;
38 37
39static int check_platform_magic(void) 38static int check_platform_magic(void)
@@ -69,6 +68,80 @@ static int check_platform_magic(void)
69 return 0; 68 return 0;
70} 69}
71 70
71bool xen_has_pv_devices()
72{
73 if (!xen_domain())
74 return false;
75
76 /* PV domains always have them. */
77 if (xen_pv_domain())
78 return true;
79
80 /* And user has xen_platform_pci=0 set in guest config as
81 * driver did not modify the value. */
82 if (xen_platform_pci_unplug == 0)
83 return false;
84
85 if (xen_platform_pci_unplug & XEN_UNPLUG_NEVER)
86 return false;
87
88 if (xen_platform_pci_unplug & XEN_UNPLUG_ALL)
89 return true;
90
91 /* This is an odd one - we are going to run legacy
92 * and PV drivers at the same time. */
93 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY)
94 return true;
95
96 /* And the caller has to follow with xen_pv_{disk,nic}_devices
97 * to be certain which driver can load. */
98 return false;
99}
100EXPORT_SYMBOL_GPL(xen_has_pv_devices);
101
102static bool __xen_has_pv_device(int state)
103{
104 /* HVM domains might or might not */
105 if (xen_hvm_domain() && (xen_platform_pci_unplug & state))
106 return true;
107
108 return xen_has_pv_devices();
109}
110
111bool xen_has_pv_nic_devices(void)
112{
113 return __xen_has_pv_device(XEN_UNPLUG_ALL_NICS | XEN_UNPLUG_ALL);
114}
115EXPORT_SYMBOL_GPL(xen_has_pv_nic_devices);
116
117bool xen_has_pv_disk_devices(void)
118{
119 return __xen_has_pv_device(XEN_UNPLUG_ALL_IDE_DISKS |
120 XEN_UNPLUG_AUX_IDE_DISKS | XEN_UNPLUG_ALL);
121}
122EXPORT_SYMBOL_GPL(xen_has_pv_disk_devices);
123
124/*
125 * This one is odd - it determines whether you want to run PV _and_
126 * legacy (IDE) drivers together. This combination is only possible
127 * under HVM.
128 */
129bool xen_has_pv_and_legacy_disk_devices(void)
130{
131 if (!xen_domain())
132 return false;
133
134 /* N.B. This is only ever used in HVM mode */
135 if (xen_pv_domain())
136 return false;
137
138 if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY)
139 return true;
140
141 return false;
142}
143EXPORT_SYMBOL_GPL(xen_has_pv_and_legacy_disk_devices);
144
72void xen_unplug_emulated_devices(void) 145void xen_unplug_emulated_devices(void)
73{ 146{
74 int r; 147 int r;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 68c054f59de6..0982233b9b84 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -27,6 +27,7 @@
27#include <xen/interface/memory.h> 27#include <xen/interface/memory.h>
28#include <xen/interface/physdev.h> 28#include <xen/interface/physdev.h>
29#include <xen/features.h> 29#include <xen/features.h>
30#include "mmu.h"
30#include "xen-ops.h" 31#include "xen-ops.h"
31#include "vdso.h" 32#include "vdso.h"
32 33
@@ -34,7 +35,7 @@
34extern const char xen_hypervisor_callback[]; 35extern const char xen_hypervisor_callback[];
35extern const char xen_failsafe_callback[]; 36extern const char xen_failsafe_callback[];
36#ifdef CONFIG_X86_64 37#ifdef CONFIG_X86_64
37extern const char nmi[]; 38extern asmlinkage void nmi(void);
38#endif 39#endif
39extern void xen_sysenter_target(void); 40extern void xen_sysenter_target(void);
40extern void xen_syscall_target(void); 41extern void xen_syscall_target(void);
@@ -81,6 +82,9 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
81 82
82 memblock_reserve(start, size); 83 memblock_reserve(start, size);
83 84
85 if (xen_feature(XENFEAT_auto_translated_physmap))
86 return;
87
84 xen_max_p2m_pfn = PFN_DOWN(start + size); 88 xen_max_p2m_pfn = PFN_DOWN(start + size);
85 for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { 89 for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
86 unsigned long mfn = pfn_to_mfn(pfn); 90 unsigned long mfn = pfn_to_mfn(pfn);
@@ -103,6 +107,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
103 .domid = DOMID_SELF 107 .domid = DOMID_SELF
104 }; 108 };
105 unsigned long len = 0; 109 unsigned long len = 0;
110 int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
106 unsigned long pfn; 111 unsigned long pfn;
107 int ret; 112 int ret;
108 113
@@ -116,7 +121,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
116 continue; 121 continue;
117 frame = mfn; 122 frame = mfn;
118 } else { 123 } else {
119 if (mfn != INVALID_P2M_ENTRY) 124 if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
120 continue; 125 continue;
121 frame = pfn; 126 frame = pfn;
122 } 127 }
@@ -154,6 +159,13 @@ static unsigned long __init xen_do_chunk(unsigned long start,
154static unsigned long __init xen_release_chunk(unsigned long start, 159static unsigned long __init xen_release_chunk(unsigned long start,
155 unsigned long end) 160 unsigned long end)
156{ 161{
162 /*
163 * Xen already ballooned out the E820 non RAM regions for us
164 * and set them up properly in EPT.
165 */
166 if (xen_feature(XENFEAT_auto_translated_physmap))
167 return end - start;
168
157 return xen_do_chunk(start, end, true); 169 return xen_do_chunk(start, end, true);
158} 170}
159 171
@@ -222,7 +234,13 @@ static void __init xen_set_identity_and_release_chunk(
222 * (except for the ISA region which must be 1:1 mapped) to 234 * (except for the ISA region which must be 1:1 mapped) to
223 * release the refcounts (in Xen) on the original frames. 235 * release the refcounts (in Xen) on the original frames.
224 */ 236 */
225 for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { 237
238 /*
239 * PVH E820 matches the hypervisor's P2M which means we need to
240 * account for the proper values of *release and *identity.
241 */
242 for (pfn = start_pfn; !xen_feature(XENFEAT_auto_translated_physmap) &&
243 pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
226 pte_t pte = __pte_ma(0); 244 pte_t pte = __pte_ma(0);
227 245
228 if (pfn < PFN_UP(ISA_END_ADDRESS)) 246 if (pfn < PFN_UP(ISA_END_ADDRESS))
@@ -559,20 +577,17 @@ void xen_enable_syscall(void)
559void xen_enable_nmi(void) 577void xen_enable_nmi(void)
560{ 578{
561#ifdef CONFIG_X86_64 579#ifdef CONFIG_X86_64
562 if (register_callback(CALLBACKTYPE_nmi, nmi)) 580 if (register_callback(CALLBACKTYPE_nmi, (char *)nmi))
563 BUG(); 581 BUG();
564#endif 582#endif
565} 583}
566void __init xen_arch_setup(void) 584void __init xen_pvmmu_arch_setup(void)
567{ 585{
568 xen_panic_handler_init();
569
570 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); 586 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
571 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 587 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
572 588
573 if (!xen_feature(XENFEAT_auto_translated_physmap)) 589 HYPERVISOR_vm_assist(VMASST_CMD_enable,
574 HYPERVISOR_vm_assist(VMASST_CMD_enable, 590 VMASST_TYPE_pae_extended_cr3);
575 VMASST_TYPE_pae_extended_cr3);
576 591
577 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || 592 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
578 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) 593 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
@@ -581,6 +596,15 @@ void __init xen_arch_setup(void)
581 xen_enable_sysenter(); 596 xen_enable_sysenter();
582 xen_enable_syscall(); 597 xen_enable_syscall();
583 xen_enable_nmi(); 598 xen_enable_nmi();
599}
600
601/* This function is not called for HVM domains */
602void __init xen_arch_setup(void)
603{
604 xen_panic_handler_init();
605 if (!xen_feature(XENFEAT_auto_translated_physmap))
606 xen_pvmmu_arch_setup();
607
584#ifdef CONFIG_ACPI 608#ifdef CONFIG_ACPI
585 if (!(xen_start_info->flags & SIF_INITDOMAIN)) { 609 if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
586 printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); 610 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index c36b325abd83..a18eadd8bb40 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -73,9 +73,11 @@ static void cpu_bringup(void)
73 touch_softlockup_watchdog(); 73 touch_softlockup_watchdog();
74 preempt_disable(); 74 preempt_disable();
75 75
76 xen_enable_sysenter(); 76 /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
77 xen_enable_syscall(); 77 if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
78 78 xen_enable_sysenter();
79 xen_enable_syscall();
80 }
79 cpu = smp_processor_id(); 81 cpu = smp_processor_id();
80 smp_store_cpu_info(cpu); 82 smp_store_cpu_info(cpu);
81 cpu_data(cpu).x86_max_cores = 1; 83 cpu_data(cpu).x86_max_cores = 1;
@@ -97,8 +99,14 @@ static void cpu_bringup(void)
97 wmb(); /* make sure everything is out */ 99 wmb(); /* make sure everything is out */
98} 100}
99 101
100static void cpu_bringup_and_idle(void) 102/* Note: cpu parameter is only relevant for PVH */
103static void cpu_bringup_and_idle(int cpu)
101{ 104{
105#ifdef CONFIG_X86_64
106 if (xen_feature(XENFEAT_auto_translated_physmap) &&
107 xen_feature(XENFEAT_supervisor_mode_kernel))
108 xen_pvh_secondary_vcpu_init(cpu);
109#endif
102 cpu_bringup(); 110 cpu_bringup();
103 cpu_startup_entry(CPUHP_ONLINE); 111 cpu_startup_entry(CPUHP_ONLINE);
104} 112}
@@ -274,9 +282,10 @@ static void __init xen_smp_prepare_boot_cpu(void)
274 native_smp_prepare_boot_cpu(); 282 native_smp_prepare_boot_cpu();
275 283
276 if (xen_pv_domain()) { 284 if (xen_pv_domain()) {
277 /* We've switched to the "real" per-cpu gdt, so make sure the 285 if (!xen_feature(XENFEAT_writable_page_tables))
278 old memory can be recycled */ 286 /* We've switched to the "real" per-cpu gdt, so make
279 make_lowmem_page_readwrite(xen_initial_gdt); 287 * sure the old memory can be recycled. */
288 make_lowmem_page_readwrite(xen_initial_gdt);
280 289
281#ifdef CONFIG_X86_32 290#ifdef CONFIG_X86_32
282 /* 291 /*
@@ -360,22 +369,21 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
360 369
361 gdt = get_cpu_gdt_table(cpu); 370 gdt = get_cpu_gdt_table(cpu);
362 371
363 ctxt->flags = VGCF_IN_KERNEL;
364 ctxt->user_regs.ss = __KERNEL_DS;
365#ifdef CONFIG_X86_32 372#ifdef CONFIG_X86_32
373 /* Note: PVH is not yet supported on x86_32. */
366 ctxt->user_regs.fs = __KERNEL_PERCPU; 374 ctxt->user_regs.fs = __KERNEL_PERCPU;
367 ctxt->user_regs.gs = __KERNEL_STACK_CANARY; 375 ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
368#else
369 ctxt->gs_base_kernel = per_cpu_offset(cpu);
370#endif 376#endif
371 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; 377 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
372 378
373 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); 379 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
374 380
375 { 381 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
382 ctxt->flags = VGCF_IN_KERNEL;
376 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ 383 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
377 ctxt->user_regs.ds = __USER_DS; 384 ctxt->user_regs.ds = __USER_DS;
378 ctxt->user_regs.es = __USER_DS; 385 ctxt->user_regs.es = __USER_DS;
386 ctxt->user_regs.ss = __KERNEL_DS;
379 387
380 xen_copy_trap_info(ctxt->trap_ctxt); 388 xen_copy_trap_info(ctxt->trap_ctxt);
381 389
@@ -396,18 +404,27 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
396#ifdef CONFIG_X86_32 404#ifdef CONFIG_X86_32
397 ctxt->event_callback_cs = __KERNEL_CS; 405 ctxt->event_callback_cs = __KERNEL_CS;
398 ctxt->failsafe_callback_cs = __KERNEL_CS; 406 ctxt->failsafe_callback_cs = __KERNEL_CS;
407#else
408 ctxt->gs_base_kernel = per_cpu_offset(cpu);
399#endif 409#endif
400 ctxt->event_callback_eip = 410 ctxt->event_callback_eip =
401 (unsigned long)xen_hypervisor_callback; 411 (unsigned long)xen_hypervisor_callback;
402 ctxt->failsafe_callback_eip = 412 ctxt->failsafe_callback_eip =
403 (unsigned long)xen_failsafe_callback; 413 (unsigned long)xen_failsafe_callback;
414 ctxt->user_regs.cs = __KERNEL_CS;
415 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
416#ifdef CONFIG_X86_32
404 } 417 }
405 ctxt->user_regs.cs = __KERNEL_CS; 418#else
419 } else
420 /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with
421 * %rdi having the cpu number - which means are passing in
422 * as the first parameter the cpu. Subtle!
423 */
424 ctxt->user_regs.rdi = cpu;
425#endif
406 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); 426 ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
407
408 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
409 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); 427 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
410
411 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) 428 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
412 BUG(); 429 BUG();
413 430
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 0e36cde12f7e..581521c843a5 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -106,7 +106,7 @@ static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
106static cpumask_t waiting_cpus; 106static cpumask_t waiting_cpus;
107 107
108static bool xen_pvspin = true; 108static bool xen_pvspin = true;
109static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) 109__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
110{ 110{
111 int irq = __this_cpu_read(lock_kicker_irq); 111 int irq = __this_cpu_read(lock_kicker_irq);
112 struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting); 112 struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting);
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 12a1ca707b94..7b78f88c1707 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -446,6 +446,7 @@ void xen_setup_timer(int cpu)
446 IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| 446 IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
447 IRQF_FORCE_RESUME, 447 IRQF_FORCE_RESUME,
448 name, NULL); 448 name, NULL);
449 (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
449 450
450 memcpy(evt, xen_clockevent, sizeof(*evt)); 451 memcpy(evt, xen_clockevent, sizeof(*evt));
451 452
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 7faed5869e5b..485b69585540 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -11,8 +11,28 @@
11#include <asm/page_types.h> 11#include <asm/page_types.h>
12 12
13#include <xen/interface/elfnote.h> 13#include <xen/interface/elfnote.h>
14#include <xen/interface/features.h>
14#include <asm/xen/interface.h> 15#include <asm/xen/interface.h>
15 16
17#ifdef CONFIG_XEN_PVH
18#define PVH_FEATURES_STR "|writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel"
19/* Note the lack of 'hvm_callback_vector'. Older hypervisor will
20 * balk at this being part of XEN_ELFNOTE_FEATURES, so we put it in
21 * XEN_ELFNOTE_SUPPORTED_FEATURES which older hypervisors will ignore.
22 */
23#define PVH_FEATURES ((1 << XENFEAT_writable_page_tables) | \
24 (1 << XENFEAT_auto_translated_physmap) | \
25 (1 << XENFEAT_supervisor_mode_kernel) | \
26 (1 << XENFEAT_hvm_callback_vector))
27/* The XENFEAT_writable_page_tables is not stricly neccessary as we set that
28 * up regardless whether this CONFIG option is enabled or not, but it
29 * clarifies what the right flags need to be.
30 */
31#else
32#define PVH_FEATURES_STR ""
33#define PVH_FEATURES (0)
34#endif
35
16 __INIT 36 __INIT
17ENTRY(startup_xen) 37ENTRY(startup_xen)
18 cld 38 cld
@@ -95,7 +115,10 @@ NEXT_HYPERCALL(arch_6)
95#endif 115#endif
96 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) 116 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
97 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) 117 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
98 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") 118 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR)
119 ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long (PVH_FEATURES) |
120 (1 << XENFEAT_writable_page_tables) |
121 (1 << XENFEAT_dom0))
99 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") 122 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
100 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") 123 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
101 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, 124 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 95f8c6142328..1cb6f4c37300 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -123,4 +123,5 @@ __visible void xen_adjust_exception_frame(void);
123 123
124extern int xen_panic_handler_init(void); 124extern int xen_panic_handler_init(void);
125 125
126void xen_pvh_secondary_vcpu_init(int cpu);
126#endif /* XEN_OPS_H */ 127#endif /* XEN_OPS_H */