diff options
author | H. Peter Anvin <hpa@linux.intel.com> | 2014-02-07 14:27:30 -0500 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2014-02-07 14:27:30 -0500 |
commit | a3b072cd180c12e8fe0ece9487b9065808327640 (patch) | |
tree | 62b982041be84748852d77cdf6ca5639ef40858f /arch/x86 | |
parent | 75a1ba5b2c529db60ca49626bcaf0bddf4548438 (diff) | |
parent | 081cd62a010f97b5bc1d2b0cd123c5abc692b68a (diff) |
Merge tag 'efi-urgent' into x86/urgent
* Avoid WARN_ON() when mapping BGRT on Baytrail (EFI 32-bit).
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86')
86 files changed, 4228 insertions, 576 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3b6922ebf170..0af5250d914f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -23,6 +23,7 @@ config X86 | |||
23 | def_bool y | 23 | def_bool y |
24 | select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS | 24 | select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS |
25 | select ARCH_MIGHT_HAVE_PC_PARPORT | 25 | select ARCH_MIGHT_HAVE_PC_PARPORT |
26 | select ARCH_MIGHT_HAVE_PC_SERIO | ||
26 | select HAVE_AOUT if X86_32 | 27 | select HAVE_AOUT if X86_32 |
27 | select HAVE_UNSTABLE_SCHED_CLOCK | 28 | select HAVE_UNSTABLE_SCHED_CLOCK |
28 | select ARCH_SUPPORTS_NUMA_BALANCING | 29 | select ARCH_SUPPORTS_NUMA_BALANCING |
@@ -279,13 +280,13 @@ config SMP | |||
279 | bool "Symmetric multi-processing support" | 280 | bool "Symmetric multi-processing support" |
280 | ---help--- | 281 | ---help--- |
281 | This enables support for systems with more than one CPU. If you have | 282 | This enables support for systems with more than one CPU. If you have |
282 | a system with only one CPU, like most personal computers, say N. If | 283 | a system with only one CPU, say N. If you have a system with more |
283 | you have a system with more than one CPU, say Y. | 284 | than one CPU, say Y. |
284 | 285 | ||
285 | If you say N here, the kernel will run on single and multiprocessor | 286 | If you say N here, the kernel will run on uni- and multiprocessor |
286 | machines, but will use only one CPU of a multiprocessor machine. If | 287 | machines, but will use only one CPU of a multiprocessor machine. If |
287 | you say Y here, the kernel will run on many, but not all, | 288 | you say Y here, the kernel will run on many, but not all, |
288 | singleprocessor machines. On a singleprocessor machine, the kernel | 289 | uniprocessor machines. On a uniprocessor machine, the kernel |
289 | will run faster if you say N here. | 290 | will run faster if you say N here. |
290 | 291 | ||
291 | Note that if you say Y here and choose architecture "586" or | 292 | Note that if you say Y here and choose architecture "586" or |
@@ -732,6 +733,7 @@ config APB_TIMER | |||
732 | # The code disables itself when not needed. | 733 | # The code disables itself when not needed. |
733 | config DMI | 734 | config DMI |
734 | default y | 735 | default y |
736 | select DMI_SCAN_MACHINE_NON_EFI_FALLBACK | ||
735 | bool "Enable DMI scanning" if EXPERT | 737 | bool "Enable DMI scanning" if EXPERT |
736 | ---help--- | 738 | ---help--- |
737 | Enabled scanning of DMI to identify machine quirks. Say Y | 739 | Enabled scanning of DMI to identify machine quirks. Say Y |
@@ -939,7 +941,7 @@ config X86_ANCIENT_MCE | |||
939 | depends on X86_32 && X86_MCE | 941 | depends on X86_32 && X86_MCE |
940 | ---help--- | 942 | ---help--- |
941 | Include support for machine check handling on old Pentium 5 or WinChip | 943 | Include support for machine check handling on old Pentium 5 or WinChip |
942 | systems. These typically need to be enabled explicitely on the command | 944 | systems. These typically need to be enabled explicitly on the command |
943 | line. | 945 | line. |
944 | 946 | ||
945 | config X86_MCE_THRESHOLD | 947 | config X86_MCE_THRESHOLD |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 13b22e0f681d..eeda43abed6e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -11,6 +11,28 @@ else | |||
11 | KBUILD_DEFCONFIG := $(ARCH)_defconfig | 11 | KBUILD_DEFCONFIG := $(ARCH)_defconfig |
12 | endif | 12 | endif |
13 | 13 | ||
14 | # How to compile the 16-bit code. Note we always compile for -march=i386; | ||
15 | # that way we can complain to the user if the CPU is insufficient. | ||
16 | # | ||
17 | # The -m16 option is supported by GCC >= 4.9 and clang >= 3.5. For | ||
18 | # older versions of GCC, we need to play evil and unreliable tricks to | ||
19 | # attempt to ensure that our asm(".code16gcc") is first in the asm | ||
20 | # output. | ||
21 | CODE16GCC_CFLAGS := -m32 -include $(srctree)/arch/x86/boot/code16gcc.h \ | ||
22 | $(call cc-option, -fno-toplevel-reorder,\ | ||
23 | $(call cc-option, -fno-unit-at-a-time)) | ||
24 | M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS)) | ||
25 | |||
26 | REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \ | ||
27 | -DDISABLE_BRANCH_PROFILING \ | ||
28 | -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ | ||
29 | -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ | ||
30 | -mno-mmx -mno-sse \ | ||
31 | $(call cc-option, -ffreestanding) \ | ||
32 | $(call cc-option, -fno-stack-protector) \ | ||
33 | $(call cc-option, -mpreferred-stack-boundary=2) | ||
34 | export REALMODE_CFLAGS | ||
35 | |||
14 | # BITS is used as extension for files which are available in a 32 bit | 36 | # BITS is used as extension for files which are available in a 32 bit |
15 | # and a 64 bit version to simplify shared Makefiles. | 37 | # and a 64 bit version to simplify shared Makefiles. |
16 | # e.g.: obj-y += foo_$(BITS).o | 38 | # e.g.: obj-y += foo_$(BITS).o |
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index de7066918005..878df7e88cd4 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile | |||
@@ -51,20 +51,7 @@ $(obj)/cpustr.h: $(obj)/mkcpustr FORCE | |||
51 | 51 | ||
52 | # --------------------------------------------------------------------------- | 52 | # --------------------------------------------------------------------------- |
53 | 53 | ||
54 | # How to compile the 16-bit code. Note we always compile for -march=i386, | 54 | KBUILD_CFLAGS := $(USERINCLUDE) $(REALMODE_CFLAGS) -D_SETUP |
55 | # that way we can complain to the user if the CPU is insufficient. | ||
56 | KBUILD_CFLAGS := $(USERINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ \ | ||
57 | -DDISABLE_BRANCH_PROFILING \ | ||
58 | -Wall -Wstrict-prototypes \ | ||
59 | -march=i386 -mregparm=3 \ | ||
60 | -include $(srctree)/$(src)/code16gcc.h \ | ||
61 | -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ | ||
62 | -mno-mmx -mno-sse \ | ||
63 | $(call cc-option, -ffreestanding) \ | ||
64 | $(call cc-option, -fno-toplevel-reorder,\ | ||
65 | $(call cc-option, -fno-unit-at-a-time)) \ | ||
66 | $(call cc-option, -fno-stack-protector) \ | ||
67 | $(call cc-option, -mpreferred-stack-boundary=2) | ||
68 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ | 55 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ |
69 | GCOV_PROFILE := n | 56 | GCOV_PROFILE := n |
70 | 57 | ||
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c index a9fcb7cfb241..431fa5f84537 100644 --- a/arch/x86/boot/cpuflags.c +++ b/arch/x86/boot/cpuflags.c | |||
@@ -28,20 +28,35 @@ static int has_fpu(void) | |||
28 | return fsw == 0 && (fcw & 0x103f) == 0x003f; | 28 | return fsw == 0 && (fcw & 0x103f) == 0x003f; |
29 | } | 29 | } |
30 | 30 | ||
31 | /* | ||
32 | * For building the 16-bit code we want to explicitly specify 32-bit | ||
33 | * push/pop operations, rather than just saying 'pushf' or 'popf' and | ||
34 | * letting the compiler choose. But this is also included from the | ||
35 | * compressed/ directory where it may be 64-bit code, and thus needs | ||
36 | * to be 'pushfq' or 'popfq' in that case. | ||
37 | */ | ||
38 | #ifdef __x86_64__ | ||
39 | #define PUSHF "pushfq" | ||
40 | #define POPF "popfq" | ||
41 | #else | ||
42 | #define PUSHF "pushfl" | ||
43 | #define POPF "popfl" | ||
44 | #endif | ||
45 | |||
31 | int has_eflag(unsigned long mask) | 46 | int has_eflag(unsigned long mask) |
32 | { | 47 | { |
33 | unsigned long f0, f1; | 48 | unsigned long f0, f1; |
34 | 49 | ||
35 | asm volatile("pushf \n\t" | 50 | asm volatile(PUSHF " \n\t" |
36 | "pushf \n\t" | 51 | PUSHF " \n\t" |
37 | "pop %0 \n\t" | 52 | "pop %0 \n\t" |
38 | "mov %0,%1 \n\t" | 53 | "mov %0,%1 \n\t" |
39 | "xor %2,%1 \n\t" | 54 | "xor %2,%1 \n\t" |
40 | "push %1 \n\t" | 55 | "push %1 \n\t" |
41 | "popf \n\t" | 56 | POPF " \n\t" |
42 | "pushf \n\t" | 57 | PUSHF " \n\t" |
43 | "pop %1 \n\t" | 58 | "pop %1 \n\t" |
44 | "popf" | 59 | POPF |
45 | : "=&r" (f0), "=&r" (f1) | 60 | : "=&r" (f0), "=&r" (f1) |
46 | : "ri" (mask)); | 61 | : "ri" (mask)); |
47 | 62 | ||
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h index ff339c5db311..0bb25491262d 100644 --- a/arch/x86/boot/video.h +++ b/arch/x86/boot/video.h | |||
@@ -80,7 +80,7 @@ struct card_info { | |||
80 | u16 xmode_n; /* Size of unprobed mode range */ | 80 | u16 xmode_n; /* Size of unprobed mode range */ |
81 | }; | 81 | }; |
82 | 82 | ||
83 | #define __videocard struct card_info __attribute__((section(".videocards"))) | 83 | #define __videocard struct card_info __attribute__((used,section(".videocards"))) |
84 | extern struct card_info video_cards[], video_cards_end[]; | 84 | extern struct card_info video_cards[], video_cards_end[]; |
85 | 85 | ||
86 | int mode_defined(u16 mode); /* video.c */ | 86 | int mode_defined(u16 mode); /* video.c */ |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e0fc24db234a..6ba54d640383 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -76,6 +76,7 @@ ifeq ($(avx2_supported),yes) | |||
76 | endif | 76 | endif |
77 | 77 | ||
78 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o | 78 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
79 | aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o | ||
79 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | 80 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o |
80 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o | 81 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o |
81 | crc32c-intel-y := crc32c-intel_glue.o | 82 | crc32c-intel-y := crc32c-intel_glue.o |
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S new file mode 100644 index 000000000000..522ab68d1c88 --- /dev/null +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S | |||
@@ -0,0 +1,2811 @@ | |||
1 | ######################################################################## | ||
2 | # Copyright (c) 2013, Intel Corporation | ||
3 | # | ||
4 | # This software is available to you under a choice of one of two | ||
5 | # licenses. You may choose to be licensed under the terms of the GNU | ||
6 | # General Public License (GPL) Version 2, available from the file | ||
7 | # COPYING in the main directory of this source tree, or the | ||
8 | # OpenIB.org BSD license below: | ||
9 | # | ||
10 | # Redistribution and use in source and binary forms, with or without | ||
11 | # modification, are permitted provided that the following conditions are | ||
12 | # met: | ||
13 | # | ||
14 | # * Redistributions of source code must retain the above copyright | ||
15 | # notice, this list of conditions and the following disclaimer. | ||
16 | # | ||
17 | # * Redistributions in binary form must reproduce the above copyright | ||
18 | # notice, this list of conditions and the following disclaimer in the | ||
19 | # documentation and/or other materials provided with the | ||
20 | # distribution. | ||
21 | # | ||
22 | # * Neither the name of the Intel Corporation nor the names of its | ||
23 | # contributors may be used to endorse or promote products derived from | ||
24 | # this software without specific prior written permission. | ||
25 | # | ||
26 | # | ||
27 | # THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY | ||
28 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
29 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
30 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR | ||
31 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
32 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
33 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES# LOSS OF USE, DATA, OR | ||
34 | # PROFITS# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
35 | # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
36 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
37 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | ######################################################################## | ||
39 | ## | ||
40 | ## Authors: | ||
41 | ## Erdinc Ozturk <erdinc.ozturk@intel.com> | ||
42 | ## Vinodh Gopal <vinodh.gopal@intel.com> | ||
43 | ## James Guilford <james.guilford@intel.com> | ||
44 | ## Tim Chen <tim.c.chen@linux.intel.com> | ||
45 | ## | ||
46 | ## References: | ||
47 | ## This code was derived and highly optimized from the code described in paper: | ||
48 | ## Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation | ||
49 | ## on Intel Architecture Processors. August, 2010 | ||
50 | ## The details of the implementation is explained in: | ||
51 | ## Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode | ||
52 | ## on Intel Architecture Processors. October, 2012. | ||
53 | ## | ||
54 | ## Assumptions: | ||
55 | ## | ||
56 | ## | ||
57 | ## | ||
58 | ## iv: | ||
59 | ## 0 1 2 3 | ||
60 | ## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
61 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
62 | ## | Salt (From the SA) | | ||
63 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
64 | ## | Initialization Vector | | ||
65 | ## | (This is the sequence number from IPSec header) | | ||
66 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
67 | ## | 0x1 | | ||
68 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
69 | ## | ||
70 | ## | ||
71 | ## | ||
72 | ## AAD: | ||
73 | ## AAD padded to 128 bits with 0 | ||
74 | ## for example, assume AAD is a u32 vector | ||
75 | ## | ||
76 | ## if AAD is 8 bytes: | ||
77 | ## AAD[3] = {A0, A1}# | ||
78 | ## padded AAD in xmm register = {A1 A0 0 0} | ||
79 | ## | ||
80 | ## 0 1 2 3 | ||
81 | ## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
82 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
83 | ## | SPI (A1) | | ||
84 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
85 | ## | 32-bit Sequence Number (A0) | | ||
86 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
87 | ## | 0x0 | | ||
88 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
89 | ## | ||
90 | ## AAD Format with 32-bit Sequence Number | ||
91 | ## | ||
92 | ## if AAD is 12 bytes: | ||
93 | ## AAD[3] = {A0, A1, A2}# | ||
94 | ## padded AAD in xmm register = {A2 A1 A0 0} | ||
95 | ## | ||
96 | ## 0 1 2 3 | ||
97 | ## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
98 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
99 | ## | SPI (A2) | | ||
100 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
101 | ## | 64-bit Extended Sequence Number {A1,A0} | | ||
102 | ## | | | ||
103 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
104 | ## | 0x0 | | ||
105 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
106 | ## | ||
107 | ## AAD Format with 64-bit Extended Sequence Number | ||
108 | ## | ||
109 | ## | ||
110 | ## aadLen: | ||
111 | ## from the definition of the spec, aadLen can only be 8 or 12 bytes. | ||
112 | ## The code additionally supports aadLen of length 16 bytes. | ||
113 | ## | ||
114 | ## TLen: | ||
115 | ## from the definition of the spec, TLen can only be 8, 12 or 16 bytes. | ||
116 | ## | ||
117 | ## poly = x^128 + x^127 + x^126 + x^121 + 1 | ||
118 | ## throughout the code, one tab and two tab indentations are used. one tab is | ||
119 | ## for GHASH part, two tabs is for AES part. | ||
120 | ## | ||
121 | |||
122 | #include <linux/linkage.h> | ||
123 | #include <asm/inst.h> | ||
124 | |||
125 | .data | ||
126 | .align 16 | ||
127 | |||
128 | POLY: .octa 0xC2000000000000000000000000000001 | ||
129 | POLY2: .octa 0xC20000000000000000000001C2000000 | ||
130 | TWOONE: .octa 0x00000001000000000000000000000001 | ||
131 | |||
132 | # order of these constants should not change. | ||
133 | # more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F | ||
134 | |||
135 | SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F | ||
136 | SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 | ||
137 | ALL_F: .octa 0xffffffffffffffffffffffffffffffff | ||
138 | ZERO: .octa 0x00000000000000000000000000000000 | ||
139 | ONE: .octa 0x00000000000000000000000000000001 | ||
140 | ONEf: .octa 0x01000000000000000000000000000000 | ||
141 | |||
142 | .text | ||
143 | |||
144 | |||
145 | ##define the fields of the gcm aes context | ||
146 | #{ | ||
147 | # u8 expanded_keys[16*11] store expanded keys | ||
148 | # u8 shifted_hkey_1[16] store HashKey <<1 mod poly here | ||
149 | # u8 shifted_hkey_2[16] store HashKey^2 <<1 mod poly here | ||
150 | # u8 shifted_hkey_3[16] store HashKey^3 <<1 mod poly here | ||
151 | # u8 shifted_hkey_4[16] store HashKey^4 <<1 mod poly here | ||
152 | # u8 shifted_hkey_5[16] store HashKey^5 <<1 mod poly here | ||
153 | # u8 shifted_hkey_6[16] store HashKey^6 <<1 mod poly here | ||
154 | # u8 shifted_hkey_7[16] store HashKey^7 <<1 mod poly here | ||
155 | # u8 shifted_hkey_8[16] store HashKey^8 <<1 mod poly here | ||
156 | # u8 shifted_hkey_1_k[16] store XOR HashKey <<1 mod poly here (for Karatsuba purposes) | ||
157 | # u8 shifted_hkey_2_k[16] store XOR HashKey^2 <<1 mod poly here (for Karatsuba purposes) | ||
158 | # u8 shifted_hkey_3_k[16] store XOR HashKey^3 <<1 mod poly here (for Karatsuba purposes) | ||
159 | # u8 shifted_hkey_4_k[16] store XOR HashKey^4 <<1 mod poly here (for Karatsuba purposes) | ||
160 | # u8 shifted_hkey_5_k[16] store XOR HashKey^5 <<1 mod poly here (for Karatsuba purposes) | ||
161 | # u8 shifted_hkey_6_k[16] store XOR HashKey^6 <<1 mod poly here (for Karatsuba purposes) | ||
162 | # u8 shifted_hkey_7_k[16] store XOR HashKey^7 <<1 mod poly here (for Karatsuba purposes) | ||
163 | # u8 shifted_hkey_8_k[16] store XOR HashKey^8 <<1 mod poly here (for Karatsuba purposes) | ||
164 | #} gcm_ctx# | ||
165 | |||
166 | HashKey = 16*11 # store HashKey <<1 mod poly here | ||
167 | HashKey_2 = 16*12 # store HashKey^2 <<1 mod poly here | ||
168 | HashKey_3 = 16*13 # store HashKey^3 <<1 mod poly here | ||
169 | HashKey_4 = 16*14 # store HashKey^4 <<1 mod poly here | ||
170 | HashKey_5 = 16*15 # store HashKey^5 <<1 mod poly here | ||
171 | HashKey_6 = 16*16 # store HashKey^6 <<1 mod poly here | ||
172 | HashKey_7 = 16*17 # store HashKey^7 <<1 mod poly here | ||
173 | HashKey_8 = 16*18 # store HashKey^8 <<1 mod poly here | ||
174 | HashKey_k = 16*19 # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes) | ||
175 | HashKey_2_k = 16*20 # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes) | ||
176 | HashKey_3_k = 16*21 # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes) | ||
177 | HashKey_4_k = 16*22 # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes) | ||
178 | HashKey_5_k = 16*23 # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes) | ||
179 | HashKey_6_k = 16*24 # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes) | ||
180 | HashKey_7_k = 16*25 # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes) | ||
181 | HashKey_8_k = 16*26 # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes) | ||
182 | |||
183 | #define arg1 %rdi | ||
184 | #define arg2 %rsi | ||
185 | #define arg3 %rdx | ||
186 | #define arg4 %rcx | ||
187 | #define arg5 %r8 | ||
188 | #define arg6 %r9 | ||
189 | #define arg7 STACK_OFFSET+8*1(%r14) | ||
190 | #define arg8 STACK_OFFSET+8*2(%r14) | ||
191 | #define arg9 STACK_OFFSET+8*3(%r14) | ||
192 | |||
193 | i = 0 | ||
194 | j = 0 | ||
195 | |||
196 | out_order = 0 | ||
197 | in_order = 1 | ||
198 | DEC = 0 | ||
199 | ENC = 1 | ||
200 | |||
201 | .macro define_reg r n | ||
202 | reg_\r = %xmm\n | ||
203 | .endm | ||
204 | |||
205 | .macro setreg | ||
206 | .altmacro | ||
207 | define_reg i %i | ||
208 | define_reg j %j | ||
209 | .noaltmacro | ||
210 | .endm | ||
211 | |||
212 | # need to push 4 registers into stack to maintain | ||
213 | STACK_OFFSET = 8*4 | ||
214 | |||
215 | TMP1 = 16*0 # Temporary storage for AAD | ||
216 | TMP2 = 16*1 # Temporary storage for AES State 2 (State 1 is stored in an XMM register) | ||
217 | TMP3 = 16*2 # Temporary storage for AES State 3 | ||
218 | TMP4 = 16*3 # Temporary storage for AES State 4 | ||
219 | TMP5 = 16*4 # Temporary storage for AES State 5 | ||
220 | TMP6 = 16*5 # Temporary storage for AES State 6 | ||
221 | TMP7 = 16*6 # Temporary storage for AES State 7 | ||
222 | TMP8 = 16*7 # Temporary storage for AES State 8 | ||
223 | |||
224 | VARIABLE_OFFSET = 16*8 | ||
225 | |||
226 | ################################ | ||
227 | # Utility Macros | ||
228 | ################################ | ||
229 | |||
230 | # Encryption of a single block | ||
231 | .macro ENCRYPT_SINGLE_BLOCK XMM0 | ||
232 | vpxor (arg1), \XMM0, \XMM0 | ||
233 | i = 1 | ||
234 | setreg | ||
235 | .rep 9 | ||
236 | vaesenc 16*i(arg1), \XMM0, \XMM0 | ||
237 | i = (i+1) | ||
238 | setreg | ||
239 | .endr | ||
240 | vaesenclast 16*10(arg1), \XMM0, \XMM0 | ||
241 | .endm | ||
242 | |||
243 | #ifdef CONFIG_AS_AVX | ||
244 | ############################################################################### | ||
245 | # GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) | ||
246 | # Input: A and B (128-bits each, bit-reflected) | ||
247 | # Output: C = A*B*x mod poly, (i.e. >>1 ) | ||
248 | # To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input | ||
249 | # GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. | ||
250 | ############################################################################### | ||
251 | .macro GHASH_MUL_AVX GH HK T1 T2 T3 T4 T5 | ||
252 | |||
253 | vpshufd $0b01001110, \GH, \T2 | ||
254 | vpshufd $0b01001110, \HK, \T3 | ||
255 | vpxor \GH , \T2, \T2 # T2 = (a1+a0) | ||
256 | vpxor \HK , \T3, \T3 # T3 = (b1+b0) | ||
257 | |||
258 | vpclmulqdq $0x11, \HK, \GH, \T1 # T1 = a1*b1 | ||
259 | vpclmulqdq $0x00, \HK, \GH, \GH # GH = a0*b0 | ||
260 | vpclmulqdq $0x00, \T3, \T2, \T2 # T2 = (a1+a0)*(b1+b0) | ||
261 | vpxor \GH, \T2,\T2 | ||
262 | vpxor \T1, \T2,\T2 # T2 = a0*b1+a1*b0 | ||
263 | |||
264 | vpslldq $8, \T2,\T3 # shift-L T3 2 DWs | ||
265 | vpsrldq $8, \T2,\T2 # shift-R T2 2 DWs | ||
266 | vpxor \T3, \GH, \GH | ||
267 | vpxor \T2, \T1, \T1 # <T1:GH> = GH x HK | ||
268 | |||
269 | #first phase of the reduction | ||
270 | vpslld $31, \GH, \T2 # packed right shifting << 31 | ||
271 | vpslld $30, \GH, \T3 # packed right shifting shift << 30 | ||
272 | vpslld $25, \GH, \T4 # packed right shifting shift << 25 | ||
273 | |||
274 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
275 | vpxor \T4, \T2, \T2 | ||
276 | |||
277 | vpsrldq $4, \T2, \T5 # shift-R T5 1 DW | ||
278 | |||
279 | vpslldq $12, \T2, \T2 # shift-L T2 3 DWs | ||
280 | vpxor \T2, \GH, \GH # first phase of the reduction complete | ||
281 | |||
282 | #second phase of the reduction | ||
283 | |||
284 | vpsrld $1,\GH, \T2 # packed left shifting >> 1 | ||
285 | vpsrld $2,\GH, \T3 # packed left shifting >> 2 | ||
286 | vpsrld $7,\GH, \T4 # packed left shifting >> 7 | ||
287 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
288 | vpxor \T4, \T2, \T2 | ||
289 | |||
290 | vpxor \T5, \T2, \T2 | ||
291 | vpxor \T2, \GH, \GH | ||
292 | vpxor \T1, \GH, \GH # the result is in GH | ||
293 | |||
294 | |||
295 | .endm | ||
296 | |||
297 | .macro PRECOMPUTE_AVX HK T1 T2 T3 T4 T5 T6 | ||
298 | |||
299 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
300 | vmovdqa \HK, \T5 | ||
301 | |||
302 | vpshufd $0b01001110, \T5, \T1 | ||
303 | vpxor \T5, \T1, \T1 | ||
304 | vmovdqa \T1, HashKey_k(arg1) | ||
305 | |||
306 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly | ||
307 | vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly | ||
308 | vpshufd $0b01001110, \T5, \T1 | ||
309 | vpxor \T5, \T1, \T1 | ||
310 | vmovdqa \T1, HashKey_2_k(arg1) | ||
311 | |||
312 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly | ||
313 | vmovdqa \T5, HashKey_3(arg1) | ||
314 | vpshufd $0b01001110, \T5, \T1 | ||
315 | vpxor \T5, \T1, \T1 | ||
316 | vmovdqa \T1, HashKey_3_k(arg1) | ||
317 | |||
318 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly | ||
319 | vmovdqa \T5, HashKey_4(arg1) | ||
320 | vpshufd $0b01001110, \T5, \T1 | ||
321 | vpxor \T5, \T1, \T1 | ||
322 | vmovdqa \T1, HashKey_4_k(arg1) | ||
323 | |||
324 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly | ||
325 | vmovdqa \T5, HashKey_5(arg1) | ||
326 | vpshufd $0b01001110, \T5, \T1 | ||
327 | vpxor \T5, \T1, \T1 | ||
328 | vmovdqa \T1, HashKey_5_k(arg1) | ||
329 | |||
330 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly | ||
331 | vmovdqa \T5, HashKey_6(arg1) | ||
332 | vpshufd $0b01001110, \T5, \T1 | ||
333 | vpxor \T5, \T1, \T1 | ||
334 | vmovdqa \T1, HashKey_6_k(arg1) | ||
335 | |||
336 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly | ||
337 | vmovdqa \T5, HashKey_7(arg1) | ||
338 | vpshufd $0b01001110, \T5, \T1 | ||
339 | vpxor \T5, \T1, \T1 | ||
340 | vmovdqa \T1, HashKey_7_k(arg1) | ||
341 | |||
342 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly | ||
343 | vmovdqa \T5, HashKey_8(arg1) | ||
344 | vpshufd $0b01001110, \T5, \T1 | ||
345 | vpxor \T5, \T1, \T1 | ||
346 | vmovdqa \T1, HashKey_8_k(arg1) | ||
347 | |||
348 | .endm | ||
349 | |||
350 | ## if a = number of total plaintext bytes | ||
351 | ## b = floor(a/16) | ||
352 | ## num_initial_blocks = b mod 4# | ||
353 | ## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext | ||
354 | ## r10, r11, r12, rax are clobbered | ||
355 | ## arg1, arg2, arg3, r14 are used as a pointer only, not modified | ||
356 | |||
357 | .macro INITIAL_BLOCKS_AVX num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC | ||
358 | i = (8-\num_initial_blocks) | ||
359 | setreg | ||
360 | |||
361 | mov arg6, %r10 # r10 = AAD | ||
362 | mov arg7, %r12 # r12 = aadLen | ||
363 | |||
364 | |||
365 | mov %r12, %r11 | ||
366 | |||
367 | vpxor reg_i, reg_i, reg_i | ||
368 | _get_AAD_loop\@: | ||
369 | vmovd (%r10), \T1 | ||
370 | vpslldq $12, \T1, \T1 | ||
371 | vpsrldq $4, reg_i, reg_i | ||
372 | vpxor \T1, reg_i, reg_i | ||
373 | |||
374 | add $4, %r10 | ||
375 | sub $4, %r12 | ||
376 | jg _get_AAD_loop\@ | ||
377 | |||
378 | |||
379 | cmp $16, %r11 | ||
380 | je _get_AAD_loop2_done\@ | ||
381 | mov $16, %r12 | ||
382 | |||
383 | _get_AAD_loop2\@: | ||
384 | vpsrldq $4, reg_i, reg_i | ||
385 | sub $4, %r12 | ||
386 | cmp %r11, %r12 | ||
387 | jg _get_AAD_loop2\@ | ||
388 | |||
389 | _get_AAD_loop2_done\@: | ||
390 | |||
391 | #byte-reflect the AAD data | ||
392 | vpshufb SHUF_MASK(%rip), reg_i, reg_i | ||
393 | |||
394 | # initialize the data pointer offset as zero | ||
395 | xor %r11, %r11 | ||
396 | |||
397 | # start AES for num_initial_blocks blocks | ||
398 | mov arg5, %rax # rax = *Y0 | ||
399 | vmovdqu (%rax), \CTR # CTR = Y0 | ||
400 | vpshufb SHUF_MASK(%rip), \CTR, \CTR | ||
401 | |||
402 | |||
403 | i = (9-\num_initial_blocks) | ||
404 | setreg | ||
405 | .rep \num_initial_blocks | ||
406 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
407 | vmovdqa \CTR, reg_i | ||
408 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap | ||
409 | i = (i+1) | ||
410 | setreg | ||
411 | .endr | ||
412 | |||
413 | vmovdqa (arg1), \T_key | ||
414 | i = (9-\num_initial_blocks) | ||
415 | setreg | ||
416 | .rep \num_initial_blocks | ||
417 | vpxor \T_key, reg_i, reg_i | ||
418 | i = (i+1) | ||
419 | setreg | ||
420 | .endr | ||
421 | |||
422 | j = 1 | ||
423 | setreg | ||
424 | .rep 9 | ||
425 | vmovdqa 16*j(arg1), \T_key | ||
426 | i = (9-\num_initial_blocks) | ||
427 | setreg | ||
428 | .rep \num_initial_blocks | ||
429 | vaesenc \T_key, reg_i, reg_i | ||
430 | i = (i+1) | ||
431 | setreg | ||
432 | .endr | ||
433 | |||
434 | j = (j+1) | ||
435 | setreg | ||
436 | .endr | ||
437 | |||
438 | |||
439 | vmovdqa 16*10(arg1), \T_key | ||
440 | i = (9-\num_initial_blocks) | ||
441 | setreg | ||
442 | .rep \num_initial_blocks | ||
443 | vaesenclast \T_key, reg_i, reg_i | ||
444 | i = (i+1) | ||
445 | setreg | ||
446 | .endr | ||
447 | |||
448 | i = (9-\num_initial_blocks) | ||
449 | setreg | ||
450 | .rep \num_initial_blocks | ||
451 | vmovdqu (arg3, %r11), \T1 | ||
452 | vpxor \T1, reg_i, reg_i | ||
453 | vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for num_initial_blocks blocks | ||
454 | add $16, %r11 | ||
455 | .if \ENC_DEC == DEC | ||
456 | vmovdqa \T1, reg_i | ||
457 | .endif | ||
458 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations | ||
459 | i = (i+1) | ||
460 | setreg | ||
461 | .endr | ||
462 | |||
463 | |||
464 | i = (8-\num_initial_blocks) | ||
465 | j = (9-\num_initial_blocks) | ||
466 | setreg | ||
467 | GHASH_MUL_AVX reg_i, \T2, \T1, \T3, \T4, \T5, \T6 | ||
468 | |||
469 | .rep \num_initial_blocks | ||
470 | vpxor reg_i, reg_j, reg_j | ||
471 | GHASH_MUL_AVX reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks | ||
472 | i = (i+1) | ||
473 | j = (j+1) | ||
474 | setreg | ||
475 | .endr | ||
476 | # XMM8 has the combined result here | ||
477 | |||
478 | vmovdqa \XMM8, TMP1(%rsp) | ||
479 | vmovdqa \XMM8, \T3 | ||
480 | |||
481 | cmp $128, %r13 | ||
482 | jl _initial_blocks_done\@ # no need for precomputed constants | ||
483 | |||
484 | ############################################################################### | ||
485 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
486 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
487 | vmovdqa \CTR, \XMM1 | ||
488 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
489 | |||
490 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
491 | vmovdqa \CTR, \XMM2 | ||
492 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
493 | |||
494 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
495 | vmovdqa \CTR, \XMM3 | ||
496 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
497 | |||
498 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
499 | vmovdqa \CTR, \XMM4 | ||
500 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
501 | |||
502 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
503 | vmovdqa \CTR, \XMM5 | ||
504 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
505 | |||
506 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
507 | vmovdqa \CTR, \XMM6 | ||
508 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
509 | |||
510 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
511 | vmovdqa \CTR, \XMM7 | ||
512 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
513 | |||
514 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
515 | vmovdqa \CTR, \XMM8 | ||
516 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
517 | |||
518 | vmovdqa (arg1), \T_key | ||
519 | vpxor \T_key, \XMM1, \XMM1 | ||
520 | vpxor \T_key, \XMM2, \XMM2 | ||
521 | vpxor \T_key, \XMM3, \XMM3 | ||
522 | vpxor \T_key, \XMM4, \XMM4 | ||
523 | vpxor \T_key, \XMM5, \XMM5 | ||
524 | vpxor \T_key, \XMM6, \XMM6 | ||
525 | vpxor \T_key, \XMM7, \XMM7 | ||
526 | vpxor \T_key, \XMM8, \XMM8 | ||
527 | |||
528 | i = 1 | ||
529 | setreg | ||
530 | .rep 9 # do 9 rounds | ||
531 | vmovdqa 16*i(arg1), \T_key | ||
532 | vaesenc \T_key, \XMM1, \XMM1 | ||
533 | vaesenc \T_key, \XMM2, \XMM2 | ||
534 | vaesenc \T_key, \XMM3, \XMM3 | ||
535 | vaesenc \T_key, \XMM4, \XMM4 | ||
536 | vaesenc \T_key, \XMM5, \XMM5 | ||
537 | vaesenc \T_key, \XMM6, \XMM6 | ||
538 | vaesenc \T_key, \XMM7, \XMM7 | ||
539 | vaesenc \T_key, \XMM8, \XMM8 | ||
540 | i = (i+1) | ||
541 | setreg | ||
542 | .endr | ||
543 | |||
544 | |||
545 | vmovdqa 16*i(arg1), \T_key | ||
546 | vaesenclast \T_key, \XMM1, \XMM1 | ||
547 | vaesenclast \T_key, \XMM2, \XMM2 | ||
548 | vaesenclast \T_key, \XMM3, \XMM3 | ||
549 | vaesenclast \T_key, \XMM4, \XMM4 | ||
550 | vaesenclast \T_key, \XMM5, \XMM5 | ||
551 | vaesenclast \T_key, \XMM6, \XMM6 | ||
552 | vaesenclast \T_key, \XMM7, \XMM7 | ||
553 | vaesenclast \T_key, \XMM8, \XMM8 | ||
554 | |||
555 | vmovdqu (arg3, %r11), \T1 | ||
556 | vpxor \T1, \XMM1, \XMM1 | ||
557 | vmovdqu \XMM1, (arg2 , %r11) | ||
558 | .if \ENC_DEC == DEC | ||
559 | vmovdqa \T1, \XMM1 | ||
560 | .endif | ||
561 | |||
562 | vmovdqu 16*1(arg3, %r11), \T1 | ||
563 | vpxor \T1, \XMM2, \XMM2 | ||
564 | vmovdqu \XMM2, 16*1(arg2 , %r11) | ||
565 | .if \ENC_DEC == DEC | ||
566 | vmovdqa \T1, \XMM2 | ||
567 | .endif | ||
568 | |||
569 | vmovdqu 16*2(arg3, %r11), \T1 | ||
570 | vpxor \T1, \XMM3, \XMM3 | ||
571 | vmovdqu \XMM3, 16*2(arg2 , %r11) | ||
572 | .if \ENC_DEC == DEC | ||
573 | vmovdqa \T1, \XMM3 | ||
574 | .endif | ||
575 | |||
576 | vmovdqu 16*3(arg3, %r11), \T1 | ||
577 | vpxor \T1, \XMM4, \XMM4 | ||
578 | vmovdqu \XMM4, 16*3(arg2 , %r11) | ||
579 | .if \ENC_DEC == DEC | ||
580 | vmovdqa \T1, \XMM4 | ||
581 | .endif | ||
582 | |||
583 | vmovdqu 16*4(arg3, %r11), \T1 | ||
584 | vpxor \T1, \XMM5, \XMM5 | ||
585 | vmovdqu \XMM5, 16*4(arg2 , %r11) | ||
586 | .if \ENC_DEC == DEC | ||
587 | vmovdqa \T1, \XMM5 | ||
588 | .endif | ||
589 | |||
590 | vmovdqu 16*5(arg3, %r11), \T1 | ||
591 | vpxor \T1, \XMM6, \XMM6 | ||
592 | vmovdqu \XMM6, 16*5(arg2 , %r11) | ||
593 | .if \ENC_DEC == DEC | ||
594 | vmovdqa \T1, \XMM6 | ||
595 | .endif | ||
596 | |||
597 | vmovdqu 16*6(arg3, %r11), \T1 | ||
598 | vpxor \T1, \XMM7, \XMM7 | ||
599 | vmovdqu \XMM7, 16*6(arg2 , %r11) | ||
600 | .if \ENC_DEC == DEC | ||
601 | vmovdqa \T1, \XMM7 | ||
602 | .endif | ||
603 | |||
604 | vmovdqu 16*7(arg3, %r11), \T1 | ||
605 | vpxor \T1, \XMM8, \XMM8 | ||
606 | vmovdqu \XMM8, 16*7(arg2 , %r11) | ||
607 | .if \ENC_DEC == DEC | ||
608 | vmovdqa \T1, \XMM8 | ||
609 | .endif | ||
610 | |||
611 | add $128, %r11 | ||
612 | |||
613 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
614 | vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with the corresponding ciphertext | ||
615 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
616 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
617 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
618 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
619 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
620 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
621 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
622 | |||
623 | ############################################################################### | ||
624 | |||
625 | _initial_blocks_done\@: | ||
626 | |||
627 | .endm | ||
628 | |||
629 | # encrypt 8 blocks at a time | ||
630 | # ghash the 8 previously encrypted ciphertext blocks | ||
631 | # arg1, arg2, arg3 are used as pointers only, not modified | ||
632 | # r11 is the data offset value | ||
633 | .macro GHASH_8_ENCRYPT_8_PARALLEL_AVX T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC | ||
634 | |||
635 | vmovdqa \XMM1, \T2 | ||
636 | vmovdqa \XMM2, TMP2(%rsp) | ||
637 | vmovdqa \XMM3, TMP3(%rsp) | ||
638 | vmovdqa \XMM4, TMP4(%rsp) | ||
639 | vmovdqa \XMM5, TMP5(%rsp) | ||
640 | vmovdqa \XMM6, TMP6(%rsp) | ||
641 | vmovdqa \XMM7, TMP7(%rsp) | ||
642 | vmovdqa \XMM8, TMP8(%rsp) | ||
643 | |||
644 | .if \loop_idx == in_order | ||
645 | vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT | ||
646 | vpaddd ONE(%rip), \XMM1, \XMM2 | ||
647 | vpaddd ONE(%rip), \XMM2, \XMM3 | ||
648 | vpaddd ONE(%rip), \XMM3, \XMM4 | ||
649 | vpaddd ONE(%rip), \XMM4, \XMM5 | ||
650 | vpaddd ONE(%rip), \XMM5, \XMM6 | ||
651 | vpaddd ONE(%rip), \XMM6, \XMM7 | ||
652 | vpaddd ONE(%rip), \XMM7, \XMM8 | ||
653 | vmovdqa \XMM8, \CTR | ||
654 | |||
655 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
656 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
657 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
658 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
659 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
660 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
661 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
662 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
663 | .else | ||
664 | vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT | ||
665 | vpaddd ONEf(%rip), \XMM1, \XMM2 | ||
666 | vpaddd ONEf(%rip), \XMM2, \XMM3 | ||
667 | vpaddd ONEf(%rip), \XMM3, \XMM4 | ||
668 | vpaddd ONEf(%rip), \XMM4, \XMM5 | ||
669 | vpaddd ONEf(%rip), \XMM5, \XMM6 | ||
670 | vpaddd ONEf(%rip), \XMM6, \XMM7 | ||
671 | vpaddd ONEf(%rip), \XMM7, \XMM8 | ||
672 | vmovdqa \XMM8, \CTR | ||
673 | .endif | ||
674 | |||
675 | |||
676 | ####################################################################### | ||
677 | |||
678 | vmovdqu (arg1), \T1 | ||
679 | vpxor \T1, \XMM1, \XMM1 | ||
680 | vpxor \T1, \XMM2, \XMM2 | ||
681 | vpxor \T1, \XMM3, \XMM3 | ||
682 | vpxor \T1, \XMM4, \XMM4 | ||
683 | vpxor \T1, \XMM5, \XMM5 | ||
684 | vpxor \T1, \XMM6, \XMM6 | ||
685 | vpxor \T1, \XMM7, \XMM7 | ||
686 | vpxor \T1, \XMM8, \XMM8 | ||
687 | |||
688 | ####################################################################### | ||
689 | |||
690 | |||
691 | |||
692 | |||
693 | |||
694 | vmovdqu 16*1(arg1), \T1 | ||
695 | vaesenc \T1, \XMM1, \XMM1 | ||
696 | vaesenc \T1, \XMM2, \XMM2 | ||
697 | vaesenc \T1, \XMM3, \XMM3 | ||
698 | vaesenc \T1, \XMM4, \XMM4 | ||
699 | vaesenc \T1, \XMM5, \XMM5 | ||
700 | vaesenc \T1, \XMM6, \XMM6 | ||
701 | vaesenc \T1, \XMM7, \XMM7 | ||
702 | vaesenc \T1, \XMM8, \XMM8 | ||
703 | |||
704 | vmovdqu 16*2(arg1), \T1 | ||
705 | vaesenc \T1, \XMM1, \XMM1 | ||
706 | vaesenc \T1, \XMM2, \XMM2 | ||
707 | vaesenc \T1, \XMM3, \XMM3 | ||
708 | vaesenc \T1, \XMM4, \XMM4 | ||
709 | vaesenc \T1, \XMM5, \XMM5 | ||
710 | vaesenc \T1, \XMM6, \XMM6 | ||
711 | vaesenc \T1, \XMM7, \XMM7 | ||
712 | vaesenc \T1, \XMM8, \XMM8 | ||
713 | |||
714 | |||
715 | ####################################################################### | ||
716 | |||
717 | vmovdqa HashKey_8(arg1), \T5 | ||
718 | vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 | ||
719 | vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 | ||
720 | |||
721 | vpshufd $0b01001110, \T2, \T6 | ||
722 | vpxor \T2, \T6, \T6 | ||
723 | |||
724 | vmovdqa HashKey_8_k(arg1), \T5 | ||
725 | vpclmulqdq $0x00, \T5, \T6, \T6 | ||
726 | |||
727 | vmovdqu 16*3(arg1), \T1 | ||
728 | vaesenc \T1, \XMM1, \XMM1 | ||
729 | vaesenc \T1, \XMM2, \XMM2 | ||
730 | vaesenc \T1, \XMM3, \XMM3 | ||
731 | vaesenc \T1, \XMM4, \XMM4 | ||
732 | vaesenc \T1, \XMM5, \XMM5 | ||
733 | vaesenc \T1, \XMM6, \XMM6 | ||
734 | vaesenc \T1, \XMM7, \XMM7 | ||
735 | vaesenc \T1, \XMM8, \XMM8 | ||
736 | |||
737 | vmovdqa TMP2(%rsp), \T1 | ||
738 | vmovdqa HashKey_7(arg1), \T5 | ||
739 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
740 | vpxor \T3, \T4, \T4 | ||
741 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
742 | vpxor \T3, \T7, \T7 | ||
743 | |||
744 | vpshufd $0b01001110, \T1, \T3 | ||
745 | vpxor \T1, \T3, \T3 | ||
746 | vmovdqa HashKey_7_k(arg1), \T5 | ||
747 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
748 | vpxor \T3, \T6, \T6 | ||
749 | |||
750 | vmovdqu 16*4(arg1), \T1 | ||
751 | vaesenc \T1, \XMM1, \XMM1 | ||
752 | vaesenc \T1, \XMM2, \XMM2 | ||
753 | vaesenc \T1, \XMM3, \XMM3 | ||
754 | vaesenc \T1, \XMM4, \XMM4 | ||
755 | vaesenc \T1, \XMM5, \XMM5 | ||
756 | vaesenc \T1, \XMM6, \XMM6 | ||
757 | vaesenc \T1, \XMM7, \XMM7 | ||
758 | vaesenc \T1, \XMM8, \XMM8 | ||
759 | |||
760 | ####################################################################### | ||
761 | |||
762 | vmovdqa TMP3(%rsp), \T1 | ||
763 | vmovdqa HashKey_6(arg1), \T5 | ||
764 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
765 | vpxor \T3, \T4, \T4 | ||
766 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
767 | vpxor \T3, \T7, \T7 | ||
768 | |||
769 | vpshufd $0b01001110, \T1, \T3 | ||
770 | vpxor \T1, \T3, \T3 | ||
771 | vmovdqa HashKey_6_k(arg1), \T5 | ||
772 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
773 | vpxor \T3, \T6, \T6 | ||
774 | |||
775 | vmovdqu 16*5(arg1), \T1 | ||
776 | vaesenc \T1, \XMM1, \XMM1 | ||
777 | vaesenc \T1, \XMM2, \XMM2 | ||
778 | vaesenc \T1, \XMM3, \XMM3 | ||
779 | vaesenc \T1, \XMM4, \XMM4 | ||
780 | vaesenc \T1, \XMM5, \XMM5 | ||
781 | vaesenc \T1, \XMM6, \XMM6 | ||
782 | vaesenc \T1, \XMM7, \XMM7 | ||
783 | vaesenc \T1, \XMM8, \XMM8 | ||
784 | |||
785 | vmovdqa TMP4(%rsp), \T1 | ||
786 | vmovdqa HashKey_5(arg1), \T5 | ||
787 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
788 | vpxor \T3, \T4, \T4 | ||
789 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
790 | vpxor \T3, \T7, \T7 | ||
791 | |||
792 | vpshufd $0b01001110, \T1, \T3 | ||
793 | vpxor \T1, \T3, \T3 | ||
794 | vmovdqa HashKey_5_k(arg1), \T5 | ||
795 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
796 | vpxor \T3, \T6, \T6 | ||
797 | |||
798 | vmovdqu 16*6(arg1), \T1 | ||
799 | vaesenc \T1, \XMM1, \XMM1 | ||
800 | vaesenc \T1, \XMM2, \XMM2 | ||
801 | vaesenc \T1, \XMM3, \XMM3 | ||
802 | vaesenc \T1, \XMM4, \XMM4 | ||
803 | vaesenc \T1, \XMM5, \XMM5 | ||
804 | vaesenc \T1, \XMM6, \XMM6 | ||
805 | vaesenc \T1, \XMM7, \XMM7 | ||
806 | vaesenc \T1, \XMM8, \XMM8 | ||
807 | |||
808 | |||
809 | vmovdqa TMP5(%rsp), \T1 | ||
810 | vmovdqa HashKey_4(arg1), \T5 | ||
811 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
812 | vpxor \T3, \T4, \T4 | ||
813 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
814 | vpxor \T3, \T7, \T7 | ||
815 | |||
816 | vpshufd $0b01001110, \T1, \T3 | ||
817 | vpxor \T1, \T3, \T3 | ||
818 | vmovdqa HashKey_4_k(arg1), \T5 | ||
819 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
820 | vpxor \T3, \T6, \T6 | ||
821 | |||
822 | vmovdqu 16*7(arg1), \T1 | ||
823 | vaesenc \T1, \XMM1, \XMM1 | ||
824 | vaesenc \T1, \XMM2, \XMM2 | ||
825 | vaesenc \T1, \XMM3, \XMM3 | ||
826 | vaesenc \T1, \XMM4, \XMM4 | ||
827 | vaesenc \T1, \XMM5, \XMM5 | ||
828 | vaesenc \T1, \XMM6, \XMM6 | ||
829 | vaesenc \T1, \XMM7, \XMM7 | ||
830 | vaesenc \T1, \XMM8, \XMM8 | ||
831 | |||
832 | vmovdqa TMP6(%rsp), \T1 | ||
833 | vmovdqa HashKey_3(arg1), \T5 | ||
834 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
835 | vpxor \T3, \T4, \T4 | ||
836 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
837 | vpxor \T3, \T7, \T7 | ||
838 | |||
839 | vpshufd $0b01001110, \T1, \T3 | ||
840 | vpxor \T1, \T3, \T3 | ||
841 | vmovdqa HashKey_3_k(arg1), \T5 | ||
842 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
843 | vpxor \T3, \T6, \T6 | ||
844 | |||
845 | |||
846 | vmovdqu 16*8(arg1), \T1 | ||
847 | vaesenc \T1, \XMM1, \XMM1 | ||
848 | vaesenc \T1, \XMM2, \XMM2 | ||
849 | vaesenc \T1, \XMM3, \XMM3 | ||
850 | vaesenc \T1, \XMM4, \XMM4 | ||
851 | vaesenc \T1, \XMM5, \XMM5 | ||
852 | vaesenc \T1, \XMM6, \XMM6 | ||
853 | vaesenc \T1, \XMM7, \XMM7 | ||
854 | vaesenc \T1, \XMM8, \XMM8 | ||
855 | |||
856 | vmovdqa TMP7(%rsp), \T1 | ||
857 | vmovdqa HashKey_2(arg1), \T5 | ||
858 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
859 | vpxor \T3, \T4, \T4 | ||
860 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
861 | vpxor \T3, \T7, \T7 | ||
862 | |||
863 | vpshufd $0b01001110, \T1, \T3 | ||
864 | vpxor \T1, \T3, \T3 | ||
865 | vmovdqa HashKey_2_k(arg1), \T5 | ||
866 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
867 | vpxor \T3, \T6, \T6 | ||
868 | |||
869 | ####################################################################### | ||
870 | |||
871 | vmovdqu 16*9(arg1), \T5 | ||
872 | vaesenc \T5, \XMM1, \XMM1 | ||
873 | vaesenc \T5, \XMM2, \XMM2 | ||
874 | vaesenc \T5, \XMM3, \XMM3 | ||
875 | vaesenc \T5, \XMM4, \XMM4 | ||
876 | vaesenc \T5, \XMM5, \XMM5 | ||
877 | vaesenc \T5, \XMM6, \XMM6 | ||
878 | vaesenc \T5, \XMM7, \XMM7 | ||
879 | vaesenc \T5, \XMM8, \XMM8 | ||
880 | |||
881 | vmovdqa TMP8(%rsp), \T1 | ||
882 | vmovdqa HashKey(arg1), \T5 | ||
883 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
884 | vpxor \T3, \T4, \T4 | ||
885 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
886 | vpxor \T3, \T7, \T7 | ||
887 | |||
888 | vpshufd $0b01001110, \T1, \T3 | ||
889 | vpxor \T1, \T3, \T3 | ||
890 | vmovdqa HashKey_k(arg1), \T5 | ||
891 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
892 | vpxor \T3, \T6, \T6 | ||
893 | |||
894 | vpxor \T4, \T6, \T6 | ||
895 | vpxor \T7, \T6, \T6 | ||
896 | |||
897 | vmovdqu 16*10(arg1), \T5 | ||
898 | |||
899 | i = 0 | ||
900 | j = 1 | ||
901 | setreg | ||
902 | .rep 8 | ||
903 | vpxor 16*i(arg3, %r11), \T5, \T2 | ||
904 | .if \ENC_DEC == ENC | ||
905 | vaesenclast \T2, reg_j, reg_j | ||
906 | .else | ||
907 | vaesenclast \T2, reg_j, \T3 | ||
908 | vmovdqu 16*i(arg3, %r11), reg_j | ||
909 | vmovdqu \T3, 16*i(arg2, %r11) | ||
910 | .endif | ||
911 | i = (i+1) | ||
912 | j = (j+1) | ||
913 | setreg | ||
914 | .endr | ||
915 | ####################################################################### | ||
916 | |||
917 | |||
918 | vpslldq $8, \T6, \T3 # shift-L T3 2 DWs | ||
919 | vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs | ||
920 | vpxor \T3, \T7, \T7 | ||
921 | vpxor \T4, \T6, \T6 # accumulate the results in T6:T7 | ||
922 | |||
923 | |||
924 | |||
925 | ####################################################################### | ||
926 | #first phase of the reduction | ||
927 | ####################################################################### | ||
928 | vpslld $31, \T7, \T2 # packed right shifting << 31 | ||
929 | vpslld $30, \T7, \T3 # packed right shifting shift << 30 | ||
930 | vpslld $25, \T7, \T4 # packed right shifting shift << 25 | ||
931 | |||
932 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
933 | vpxor \T4, \T2, \T2 | ||
934 | |||
935 | vpsrldq $4, \T2, \T1 # shift-R T1 1 DW | ||
936 | |||
937 | vpslldq $12, \T2, \T2 # shift-L T2 3 DWs | ||
938 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
939 | ####################################################################### | ||
940 | .if \ENC_DEC == ENC | ||
941 | vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer | ||
942 | vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer | ||
943 | vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer | ||
944 | vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer | ||
945 | vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer | ||
946 | vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer | ||
947 | vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer | ||
948 | vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer | ||
949 | .endif | ||
950 | |||
951 | ####################################################################### | ||
952 | #second phase of the reduction | ||
953 | vpsrld $1, \T7, \T2 # packed left shifting >> 1 | ||
954 | vpsrld $2, \T7, \T3 # packed left shifting >> 2 | ||
955 | vpsrld $7, \T7, \T4 # packed left shifting >> 7 | ||
956 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
957 | vpxor \T4, \T2, \T2 | ||
958 | |||
959 | vpxor \T1, \T2, \T2 | ||
960 | vpxor \T2, \T7, \T7 | ||
961 | vpxor \T7, \T6, \T6 # the result is in T6 | ||
962 | ####################################################################### | ||
963 | |||
964 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
965 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
966 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
967 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
968 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
969 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
970 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
971 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
972 | |||
973 | |||
974 | vpxor \T6, \XMM1, \XMM1 | ||
975 | |||
976 | |||
977 | |||
978 | .endm | ||
979 | |||
980 | |||
981 | # GHASH the last 4 ciphertext blocks. | ||
982 | .macro GHASH_LAST_8_AVX T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 | ||
983 | |||
984 | ## Karatsuba Method | ||
985 | |||
986 | |||
987 | vpshufd $0b01001110, \XMM1, \T2 | ||
988 | vpxor \XMM1, \T2, \T2 | ||
989 | vmovdqa HashKey_8(arg1), \T5 | ||
990 | vpclmulqdq $0x11, \T5, \XMM1, \T6 | ||
991 | vpclmulqdq $0x00, \T5, \XMM1, \T7 | ||
992 | |||
993 | vmovdqa HashKey_8_k(arg1), \T3 | ||
994 | vpclmulqdq $0x00, \T3, \T2, \XMM1 | ||
995 | |||
996 | ###################### | ||
997 | |||
998 | vpshufd $0b01001110, \XMM2, \T2 | ||
999 | vpxor \XMM2, \T2, \T2 | ||
1000 | vmovdqa HashKey_7(arg1), \T5 | ||
1001 | vpclmulqdq $0x11, \T5, \XMM2, \T4 | ||
1002 | vpxor \T4, \T6, \T6 | ||
1003 | |||
1004 | vpclmulqdq $0x00, \T5, \XMM2, \T4 | ||
1005 | vpxor \T4, \T7, \T7 | ||
1006 | |||
1007 | vmovdqa HashKey_7_k(arg1), \T3 | ||
1008 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1009 | vpxor \T2, \XMM1, \XMM1 | ||
1010 | |||
1011 | ###################### | ||
1012 | |||
1013 | vpshufd $0b01001110, \XMM3, \T2 | ||
1014 | vpxor \XMM3, \T2, \T2 | ||
1015 | vmovdqa HashKey_6(arg1), \T5 | ||
1016 | vpclmulqdq $0x11, \T5, \XMM3, \T4 | ||
1017 | vpxor \T4, \T6, \T6 | ||
1018 | |||
1019 | vpclmulqdq $0x00, \T5, \XMM3, \T4 | ||
1020 | vpxor \T4, \T7, \T7 | ||
1021 | |||
1022 | vmovdqa HashKey_6_k(arg1), \T3 | ||
1023 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1024 | vpxor \T2, \XMM1, \XMM1 | ||
1025 | |||
1026 | ###################### | ||
1027 | |||
1028 | vpshufd $0b01001110, \XMM4, \T2 | ||
1029 | vpxor \XMM4, \T2, \T2 | ||
1030 | vmovdqa HashKey_5(arg1), \T5 | ||
1031 | vpclmulqdq $0x11, \T5, \XMM4, \T4 | ||
1032 | vpxor \T4, \T6, \T6 | ||
1033 | |||
1034 | vpclmulqdq $0x00, \T5, \XMM4, \T4 | ||
1035 | vpxor \T4, \T7, \T7 | ||
1036 | |||
1037 | vmovdqa HashKey_5_k(arg1), \T3 | ||
1038 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1039 | vpxor \T2, \XMM1, \XMM1 | ||
1040 | |||
1041 | ###################### | ||
1042 | |||
1043 | vpshufd $0b01001110, \XMM5, \T2 | ||
1044 | vpxor \XMM5, \T2, \T2 | ||
1045 | vmovdqa HashKey_4(arg1), \T5 | ||
1046 | vpclmulqdq $0x11, \T5, \XMM5, \T4 | ||
1047 | vpxor \T4, \T6, \T6 | ||
1048 | |||
1049 | vpclmulqdq $0x00, \T5, \XMM5, \T4 | ||
1050 | vpxor \T4, \T7, \T7 | ||
1051 | |||
1052 | vmovdqa HashKey_4_k(arg1), \T3 | ||
1053 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1054 | vpxor \T2, \XMM1, \XMM1 | ||
1055 | |||
1056 | ###################### | ||
1057 | |||
1058 | vpshufd $0b01001110, \XMM6, \T2 | ||
1059 | vpxor \XMM6, \T2, \T2 | ||
1060 | vmovdqa HashKey_3(arg1), \T5 | ||
1061 | vpclmulqdq $0x11, \T5, \XMM6, \T4 | ||
1062 | vpxor \T4, \T6, \T6 | ||
1063 | |||
1064 | vpclmulqdq $0x00, \T5, \XMM6, \T4 | ||
1065 | vpxor \T4, \T7, \T7 | ||
1066 | |||
1067 | vmovdqa HashKey_3_k(arg1), \T3 | ||
1068 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1069 | vpxor \T2, \XMM1, \XMM1 | ||
1070 | |||
1071 | ###################### | ||
1072 | |||
1073 | vpshufd $0b01001110, \XMM7, \T2 | ||
1074 | vpxor \XMM7, \T2, \T2 | ||
1075 | vmovdqa HashKey_2(arg1), \T5 | ||
1076 | vpclmulqdq $0x11, \T5, \XMM7, \T4 | ||
1077 | vpxor \T4, \T6, \T6 | ||
1078 | |||
1079 | vpclmulqdq $0x00, \T5, \XMM7, \T4 | ||
1080 | vpxor \T4, \T7, \T7 | ||
1081 | |||
1082 | vmovdqa HashKey_2_k(arg1), \T3 | ||
1083 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1084 | vpxor \T2, \XMM1, \XMM1 | ||
1085 | |||
1086 | ###################### | ||
1087 | |||
1088 | vpshufd $0b01001110, \XMM8, \T2 | ||
1089 | vpxor \XMM8, \T2, \T2 | ||
1090 | vmovdqa HashKey(arg1), \T5 | ||
1091 | vpclmulqdq $0x11, \T5, \XMM8, \T4 | ||
1092 | vpxor \T4, \T6, \T6 | ||
1093 | |||
1094 | vpclmulqdq $0x00, \T5, \XMM8, \T4 | ||
1095 | vpxor \T4, \T7, \T7 | ||
1096 | |||
1097 | vmovdqa HashKey_k(arg1), \T3 | ||
1098 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1099 | |||
1100 | vpxor \T2, \XMM1, \XMM1 | ||
1101 | vpxor \T6, \XMM1, \XMM1 | ||
1102 | vpxor \T7, \XMM1, \T2 | ||
1103 | |||
1104 | |||
1105 | |||
1106 | |||
1107 | vpslldq $8, \T2, \T4 | ||
1108 | vpsrldq $8, \T2, \T2 | ||
1109 | |||
1110 | vpxor \T4, \T7, \T7 | ||
1111 | vpxor \T2, \T6, \T6 # <T6:T7> holds the result of | ||
1112 | # the accumulated carry-less multiplications | ||
1113 | |||
1114 | ####################################################################### | ||
1115 | #first phase of the reduction | ||
1116 | vpslld $31, \T7, \T2 # packed right shifting << 31 | ||
1117 | vpslld $30, \T7, \T3 # packed right shifting shift << 30 | ||
1118 | vpslld $25, \T7, \T4 # packed right shifting shift << 25 | ||
1119 | |||
1120 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
1121 | vpxor \T4, \T2, \T2 | ||
1122 | |||
1123 | vpsrldq $4, \T2, \T1 # shift-R T1 1 DW | ||
1124 | |||
1125 | vpslldq $12, \T2, \T2 # shift-L T2 3 DWs | ||
1126 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
1127 | ####################################################################### | ||
1128 | |||
1129 | |||
1130 | #second phase of the reduction | ||
1131 | vpsrld $1, \T7, \T2 # packed left shifting >> 1 | ||
1132 | vpsrld $2, \T7, \T3 # packed left shifting >> 2 | ||
1133 | vpsrld $7, \T7, \T4 # packed left shifting >> 7 | ||
1134 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
1135 | vpxor \T4, \T2, \T2 | ||
1136 | |||
1137 | vpxor \T1, \T2, \T2 | ||
1138 | vpxor \T2, \T7, \T7 | ||
1139 | vpxor \T7, \T6, \T6 # the result is in T6 | ||
1140 | |||
1141 | .endm | ||
1142 | |||
1143 | |||
1144 | # combined for GCM encrypt and decrypt functions | ||
1145 | # clobbering all xmm registers | ||
1146 | # clobbering r10, r11, r12, r13, r14, r15 | ||
1147 | .macro GCM_ENC_DEC_AVX ENC_DEC | ||
1148 | |||
1149 | #the number of pushes must equal STACK_OFFSET | ||
1150 | push %r12 | ||
1151 | push %r13 | ||
1152 | push %r14 | ||
1153 | push %r15 | ||
1154 | |||
1155 | mov %rsp, %r14 | ||
1156 | |||
1157 | |||
1158 | |||
1159 | |||
1160 | sub $VARIABLE_OFFSET, %rsp | ||
1161 | and $~63, %rsp # align rsp to 64 bytes | ||
1162 | |||
1163 | |||
1164 | vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey | ||
1165 | |||
1166 | mov arg4, %r13 # save the number of bytes of plaintext/ciphertext | ||
1167 | and $-16, %r13 # r13 = r13 - (r13 mod 16) | ||
1168 | |||
1169 | mov %r13, %r12 | ||
1170 | shr $4, %r12 | ||
1171 | and $7, %r12 | ||
1172 | jz _initial_num_blocks_is_0\@ | ||
1173 | |||
1174 | cmp $7, %r12 | ||
1175 | je _initial_num_blocks_is_7\@ | ||
1176 | cmp $6, %r12 | ||
1177 | je _initial_num_blocks_is_6\@ | ||
1178 | cmp $5, %r12 | ||
1179 | je _initial_num_blocks_is_5\@ | ||
1180 | cmp $4, %r12 | ||
1181 | je _initial_num_blocks_is_4\@ | ||
1182 | cmp $3, %r12 | ||
1183 | je _initial_num_blocks_is_3\@ | ||
1184 | cmp $2, %r12 | ||
1185 | je _initial_num_blocks_is_2\@ | ||
1186 | |||
1187 | jmp _initial_num_blocks_is_1\@ | ||
1188 | |||
1189 | _initial_num_blocks_is_7\@: | ||
1190 | INITIAL_BLOCKS_AVX 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1191 | sub $16*7, %r13 | ||
1192 | jmp _initial_blocks_encrypted\@ | ||
1193 | |||
1194 | _initial_num_blocks_is_6\@: | ||
1195 | INITIAL_BLOCKS_AVX 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1196 | sub $16*6, %r13 | ||
1197 | jmp _initial_blocks_encrypted\@ | ||
1198 | |||
1199 | _initial_num_blocks_is_5\@: | ||
1200 | INITIAL_BLOCKS_AVX 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1201 | sub $16*5, %r13 | ||
1202 | jmp _initial_blocks_encrypted\@ | ||
1203 | |||
1204 | _initial_num_blocks_is_4\@: | ||
1205 | INITIAL_BLOCKS_AVX 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1206 | sub $16*4, %r13 | ||
1207 | jmp _initial_blocks_encrypted\@ | ||
1208 | |||
1209 | _initial_num_blocks_is_3\@: | ||
1210 | INITIAL_BLOCKS_AVX 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1211 | sub $16*3, %r13 | ||
1212 | jmp _initial_blocks_encrypted\@ | ||
1213 | |||
1214 | _initial_num_blocks_is_2\@: | ||
1215 | INITIAL_BLOCKS_AVX 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1216 | sub $16*2, %r13 | ||
1217 | jmp _initial_blocks_encrypted\@ | ||
1218 | |||
1219 | _initial_num_blocks_is_1\@: | ||
1220 | INITIAL_BLOCKS_AVX 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1221 | sub $16*1, %r13 | ||
1222 | jmp _initial_blocks_encrypted\@ | ||
1223 | |||
1224 | _initial_num_blocks_is_0\@: | ||
1225 | INITIAL_BLOCKS_AVX 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1226 | |||
1227 | |||
1228 | _initial_blocks_encrypted\@: | ||
1229 | cmp $0, %r13 | ||
1230 | je _zero_cipher_left\@ | ||
1231 | |||
1232 | sub $128, %r13 | ||
1233 | je _eight_cipher_left\@ | ||
1234 | |||
1235 | |||
1236 | |||
1237 | |||
1238 | vmovd %xmm9, %r15d | ||
1239 | and $255, %r15d | ||
1240 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1241 | |||
1242 | |||
1243 | _encrypt_by_8_new\@: | ||
1244 | cmp $(255-8), %r15d | ||
1245 | jg _encrypt_by_8\@ | ||
1246 | |||
1247 | |||
1248 | |||
1249 | add $8, %r15b | ||
1250 | GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC | ||
1251 | add $128, %r11 | ||
1252 | sub $128, %r13 | ||
1253 | jne _encrypt_by_8_new\@ | ||
1254 | |||
1255 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1256 | jmp _eight_cipher_left\@ | ||
1257 | |||
1258 | _encrypt_by_8\@: | ||
1259 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1260 | add $8, %r15b | ||
1261 | GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC | ||
1262 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1263 | add $128, %r11 | ||
1264 | sub $128, %r13 | ||
1265 | jne _encrypt_by_8_new\@ | ||
1266 | |||
1267 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1268 | |||
1269 | |||
1270 | |||
1271 | |||
1272 | _eight_cipher_left\@: | ||
1273 | GHASH_LAST_8_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 | ||
1274 | |||
1275 | |||
1276 | _zero_cipher_left\@: | ||
1277 | cmp $16, arg4 | ||
1278 | jl _only_less_than_16\@ | ||
1279 | |||
1280 | mov arg4, %r13 | ||
1281 | and $15, %r13 # r13 = (arg4 mod 16) | ||
1282 | |||
1283 | je _multiple_of_16_bytes\@ | ||
1284 | |||
1285 | # handle the last <16 Byte block seperately | ||
1286 | |||
1287 | |||
1288 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
1289 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1290 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
1291 | |||
1292 | sub $16, %r11 | ||
1293 | add %r13, %r11 | ||
1294 | vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block | ||
1295 | |||
1296 | lea SHIFT_MASK+16(%rip), %r12 | ||
1297 | sub %r13, %r12 # adjust the shuffle mask pointer to be | ||
1298 | # able to shift 16-r13 bytes (r13 is the | ||
1299 | # number of bytes in plaintext mod 16) | ||
1300 | vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask | ||
1301 | vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes | ||
1302 | jmp _final_ghash_mul\@ | ||
1303 | |||
1304 | _only_less_than_16\@: | ||
1305 | # check for 0 length | ||
1306 | mov arg4, %r13 | ||
1307 | and $15, %r13 # r13 = (arg4 mod 16) | ||
1308 | |||
1309 | je _multiple_of_16_bytes\@ | ||
1310 | |||
1311 | # handle the last <16 Byte block seperately | ||
1312 | |||
1313 | |||
1314 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
1315 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1316 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
1317 | |||
1318 | |||
1319 | lea SHIFT_MASK+16(%rip), %r12 | ||
1320 | sub %r13, %r12 # adjust the shuffle mask pointer to be | ||
1321 | # able to shift 16-r13 bytes (r13 is the | ||
1322 | # number of bytes in plaintext mod 16) | ||
1323 | |||
1324 | _get_last_16_byte_loop\@: | ||
1325 | movb (arg3, %r11), %al | ||
1326 | movb %al, TMP1 (%rsp , %r11) | ||
1327 | add $1, %r11 | ||
1328 | cmp %r13, %r11 | ||
1329 | jne _get_last_16_byte_loop\@ | ||
1330 | |||
1331 | vmovdqu TMP1(%rsp), %xmm1 | ||
1332 | |||
1333 | sub $16, %r11 | ||
1334 | |||
1335 | _final_ghash_mul\@: | ||
1336 | .if \ENC_DEC == DEC | ||
1337 | vmovdqa %xmm1, %xmm2 | ||
1338 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
1339 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to | ||
1340 | # mask out top 16-r13 bytes of xmm9 | ||
1341 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
1342 | vpand %xmm1, %xmm2, %xmm2 | ||
1343 | vpshufb SHUF_MASK(%rip), %xmm2, %xmm2 | ||
1344 | vpxor %xmm2, %xmm14, %xmm14 | ||
1345 | #GHASH computation for the last <16 Byte block | ||
1346 | GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1347 | sub %r13, %r11 | ||
1348 | add $16, %r11 | ||
1349 | .else | ||
1350 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
1351 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to | ||
1352 | # mask out top 16-r13 bytes of xmm9 | ||
1353 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
1354 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1355 | vpxor %xmm9, %xmm14, %xmm14 | ||
1356 | #GHASH computation for the last <16 Byte block | ||
1357 | GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1358 | sub %r13, %r11 | ||
1359 | add $16, %r11 | ||
1360 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext | ||
1361 | .endif | ||
1362 | |||
1363 | |||
1364 | ############################# | ||
1365 | # output r13 Bytes | ||
1366 | vmovq %xmm9, %rax | ||
1367 | cmp $8, %r13 | ||
1368 | jle _less_than_8_bytes_left\@ | ||
1369 | |||
1370 | mov %rax, (arg2 , %r11) | ||
1371 | add $8, %r11 | ||
1372 | vpsrldq $8, %xmm9, %xmm9 | ||
1373 | vmovq %xmm9, %rax | ||
1374 | sub $8, %r13 | ||
1375 | |||
1376 | _less_than_8_bytes_left\@: | ||
1377 | movb %al, (arg2 , %r11) | ||
1378 | add $1, %r11 | ||
1379 | shr $8, %rax | ||
1380 | sub $1, %r13 | ||
1381 | jne _less_than_8_bytes_left\@ | ||
1382 | ############################# | ||
1383 | |||
1384 | _multiple_of_16_bytes\@: | ||
1385 | mov arg7, %r12 # r12 = aadLen (number of bytes) | ||
1386 | shl $3, %r12 # convert into number of bits | ||
1387 | vmovd %r12d, %xmm15 # len(A) in xmm15 | ||
1388 | |||
1389 | shl $3, arg4 # len(C) in bits (*128) | ||
1390 | vmovq arg4, %xmm1 | ||
1391 | vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000 | ||
1392 | vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C) | ||
1393 | |||
1394 | vpxor %xmm15, %xmm14, %xmm14 | ||
1395 | GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation | ||
1396 | vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap | ||
1397 | |||
1398 | mov arg5, %rax # rax = *Y0 | ||
1399 | vmovdqu (%rax), %xmm9 # xmm9 = Y0 | ||
1400 | |||
1401 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0) | ||
1402 | |||
1403 | vpxor %xmm14, %xmm9, %xmm9 | ||
1404 | |||
1405 | |||
1406 | |||
1407 | _return_T\@: | ||
1408 | mov arg8, %r10 # r10 = authTag | ||
1409 | mov arg9, %r11 # r11 = auth_tag_len | ||
1410 | |||
1411 | cmp $16, %r11 | ||
1412 | je _T_16\@ | ||
1413 | |||
1414 | cmp $12, %r11 | ||
1415 | je _T_12\@ | ||
1416 | |||
1417 | _T_8\@: | ||
1418 | vmovq %xmm9, %rax | ||
1419 | mov %rax, (%r10) | ||
1420 | jmp _return_T_done\@ | ||
1421 | _T_12\@: | ||
1422 | vmovq %xmm9, %rax | ||
1423 | mov %rax, (%r10) | ||
1424 | vpsrldq $8, %xmm9, %xmm9 | ||
1425 | vmovd %xmm9, %eax | ||
1426 | mov %eax, 8(%r10) | ||
1427 | jmp _return_T_done\@ | ||
1428 | |||
1429 | _T_16\@: | ||
1430 | vmovdqu %xmm9, (%r10) | ||
1431 | |||
1432 | _return_T_done\@: | ||
1433 | mov %r14, %rsp | ||
1434 | |||
1435 | pop %r15 | ||
1436 | pop %r14 | ||
1437 | pop %r13 | ||
1438 | pop %r12 | ||
1439 | .endm | ||
1440 | |||
1441 | |||
1442 | ############################################################# | ||
1443 | #void aesni_gcm_precomp_avx_gen2 | ||
1444 | # (gcm_data *my_ctx_data, | ||
1445 | # u8 *hash_subkey)# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */ | ||
1446 | ############################################################# | ||
1447 | ENTRY(aesni_gcm_precomp_avx_gen2) | ||
1448 | #the number of pushes must equal STACK_OFFSET | ||
1449 | push %r12 | ||
1450 | push %r13 | ||
1451 | push %r14 | ||
1452 | push %r15 | ||
1453 | |||
1454 | mov %rsp, %r14 | ||
1455 | |||
1456 | |||
1457 | |||
1458 | sub $VARIABLE_OFFSET, %rsp | ||
1459 | and $~63, %rsp # align rsp to 64 bytes | ||
1460 | |||
1461 | vmovdqu (arg2), %xmm6 # xmm6 = HashKey | ||
1462 | |||
1463 | vpshufb SHUF_MASK(%rip), %xmm6, %xmm6 | ||
1464 | ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey | ||
1465 | vmovdqa %xmm6, %xmm2 | ||
1466 | vpsllq $1, %xmm6, %xmm6 | ||
1467 | vpsrlq $63, %xmm2, %xmm2 | ||
1468 | vmovdqa %xmm2, %xmm1 | ||
1469 | vpslldq $8, %xmm2, %xmm2 | ||
1470 | vpsrldq $8, %xmm1, %xmm1 | ||
1471 | vpor %xmm2, %xmm6, %xmm6 | ||
1472 | #reduction | ||
1473 | vpshufd $0b00100100, %xmm1, %xmm2 | ||
1474 | vpcmpeqd TWOONE(%rip), %xmm2, %xmm2 | ||
1475 | vpand POLY(%rip), %xmm2, %xmm2 | ||
1476 | vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly | ||
1477 | ####################################################################### | ||
1478 | vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly | ||
1479 | |||
1480 | |||
1481 | PRECOMPUTE_AVX %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5 | ||
1482 | |||
1483 | mov %r14, %rsp | ||
1484 | |||
1485 | pop %r15 | ||
1486 | pop %r14 | ||
1487 | pop %r13 | ||
1488 | pop %r12 | ||
1489 | ret | ||
1490 | ENDPROC(aesni_gcm_precomp_avx_gen2) | ||
1491 | |||
1492 | ############################################################################### | ||
1493 | #void aesni_gcm_enc_avx_gen2( | ||
1494 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
1495 | # u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ | ||
1496 | # const u8 *in, /* Plaintext input */ | ||
1497 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
1498 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
1499 | # (from Security Association) concatenated with 8 byte | ||
1500 | # Initialisation Vector (from IPSec ESP Payload) | ||
1501 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
1502 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
1503 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
1504 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
1505 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
1506 | # Valid values are 16 (most likely), 12 or 8. */ | ||
1507 | ############################################################################### | ||
1508 | ENTRY(aesni_gcm_enc_avx_gen2) | ||
1509 | GCM_ENC_DEC_AVX ENC | ||
1510 | ret | ||
1511 | ENDPROC(aesni_gcm_enc_avx_gen2) | ||
1512 | |||
1513 | ############################################################################### | ||
1514 | #void aesni_gcm_dec_avx_gen2( | ||
1515 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
1516 | # u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ | ||
1517 | # const u8 *in, /* Ciphertext input */ | ||
1518 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
1519 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
1520 | # (from Security Association) concatenated with 8 byte | ||
1521 | # Initialisation Vector (from IPSec ESP Payload) | ||
1522 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
1523 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
1524 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
1525 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
1526 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
1527 | # Valid values are 16 (most likely), 12 or 8. */ | ||
1528 | ############################################################################### | ||
1529 | ENTRY(aesni_gcm_dec_avx_gen2) | ||
1530 | GCM_ENC_DEC_AVX DEC | ||
1531 | ret | ||
1532 | ENDPROC(aesni_gcm_dec_avx_gen2) | ||
1533 | #endif /* CONFIG_AS_AVX */ | ||
1534 | |||
1535 | #ifdef CONFIG_AS_AVX2 | ||
1536 | ############################################################################### | ||
1537 | # GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) | ||
1538 | # Input: A and B (128-bits each, bit-reflected) | ||
1539 | # Output: C = A*B*x mod poly, (i.e. >>1 ) | ||
1540 | # To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input | ||
1541 | # GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. | ||
1542 | ############################################################################### | ||
1543 | .macro GHASH_MUL_AVX2 GH HK T1 T2 T3 T4 T5 | ||
1544 | |||
1545 | vpclmulqdq $0x11,\HK,\GH,\T1 # T1 = a1*b1 | ||
1546 | vpclmulqdq $0x00,\HK,\GH,\T2 # T2 = a0*b0 | ||
1547 | vpclmulqdq $0x01,\HK,\GH,\T3 # T3 = a1*b0 | ||
1548 | vpclmulqdq $0x10,\HK,\GH,\GH # GH = a0*b1 | ||
1549 | vpxor \T3, \GH, \GH | ||
1550 | |||
1551 | |||
1552 | vpsrldq $8 , \GH, \T3 # shift-R GH 2 DWs | ||
1553 | vpslldq $8 , \GH, \GH # shift-L GH 2 DWs | ||
1554 | |||
1555 | vpxor \T3, \T1, \T1 | ||
1556 | vpxor \T2, \GH, \GH | ||
1557 | |||
1558 | ####################################################################### | ||
1559 | #first phase of the reduction | ||
1560 | vmovdqa POLY2(%rip), \T3 | ||
1561 | |||
1562 | vpclmulqdq $0x01, \GH, \T3, \T2 | ||
1563 | vpslldq $8, \T2, \T2 # shift-L T2 2 DWs | ||
1564 | |||
1565 | vpxor \T2, \GH, \GH # first phase of the reduction complete | ||
1566 | ####################################################################### | ||
1567 | #second phase of the reduction | ||
1568 | vpclmulqdq $0x00, \GH, \T3, \T2 | ||
1569 | vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) | ||
1570 | |||
1571 | vpclmulqdq $0x10, \GH, \T3, \GH | ||
1572 | vpslldq $4, \GH, \GH # shift-L GH 1 DW (Shift-L 1-DW to obtain result with no shifts) | ||
1573 | |||
1574 | vpxor \T2, \GH, \GH # second phase of the reduction complete | ||
1575 | ####################################################################### | ||
1576 | vpxor \T1, \GH, \GH # the result is in GH | ||
1577 | |||
1578 | |||
1579 | .endm | ||
1580 | |||
1581 | .macro PRECOMPUTE_AVX2 HK T1 T2 T3 T4 T5 T6 | ||
1582 | |||
1583 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
1584 | vmovdqa \HK, \T5 | ||
1585 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly | ||
1586 | vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly | ||
1587 | |||
1588 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly | ||
1589 | vmovdqa \T5, HashKey_3(arg1) | ||
1590 | |||
1591 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly | ||
1592 | vmovdqa \T5, HashKey_4(arg1) | ||
1593 | |||
1594 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly | ||
1595 | vmovdqa \T5, HashKey_5(arg1) | ||
1596 | |||
1597 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly | ||
1598 | vmovdqa \T5, HashKey_6(arg1) | ||
1599 | |||
1600 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly | ||
1601 | vmovdqa \T5, HashKey_7(arg1) | ||
1602 | |||
1603 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly | ||
1604 | vmovdqa \T5, HashKey_8(arg1) | ||
1605 | |||
1606 | .endm | ||
1607 | |||
1608 | |||
1609 | ## if a = number of total plaintext bytes | ||
1610 | ## b = floor(a/16) | ||
1611 | ## num_initial_blocks = b mod 4# | ||
1612 | ## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext | ||
1613 | ## r10, r11, r12, rax are clobbered | ||
1614 | ## arg1, arg2, arg3, r14 are used as a pointer only, not modified | ||
1615 | |||
1616 | .macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER | ||
1617 | i = (8-\num_initial_blocks) | ||
1618 | setreg | ||
1619 | |||
1620 | mov arg6, %r10 # r10 = AAD | ||
1621 | mov arg7, %r12 # r12 = aadLen | ||
1622 | |||
1623 | |||
1624 | mov %r12, %r11 | ||
1625 | |||
1626 | vpxor reg_i, reg_i, reg_i | ||
1627 | _get_AAD_loop\@: | ||
1628 | vmovd (%r10), \T1 | ||
1629 | vpslldq $12, \T1, \T1 | ||
1630 | vpsrldq $4, reg_i, reg_i | ||
1631 | vpxor \T1, reg_i, reg_i | ||
1632 | |||
1633 | add $4, %r10 | ||
1634 | sub $4, %r12 | ||
1635 | jg _get_AAD_loop\@ | ||
1636 | |||
1637 | |||
1638 | cmp $16, %r11 | ||
1639 | je _get_AAD_loop2_done\@ | ||
1640 | mov $16, %r12 | ||
1641 | |||
1642 | _get_AAD_loop2\@: | ||
1643 | vpsrldq $4, reg_i, reg_i | ||
1644 | sub $4, %r12 | ||
1645 | cmp %r11, %r12 | ||
1646 | jg _get_AAD_loop2\@ | ||
1647 | |||
1648 | _get_AAD_loop2_done\@: | ||
1649 | |||
1650 | #byte-reflect the AAD data | ||
1651 | vpshufb SHUF_MASK(%rip), reg_i, reg_i | ||
1652 | |||
1653 | # initialize the data pointer offset as zero | ||
1654 | xor %r11, %r11 | ||
1655 | |||
1656 | # start AES for num_initial_blocks blocks | ||
1657 | mov arg5, %rax # rax = *Y0 | ||
1658 | vmovdqu (%rax), \CTR # CTR = Y0 | ||
1659 | vpshufb SHUF_MASK(%rip), \CTR, \CTR | ||
1660 | |||
1661 | |||
1662 | i = (9-\num_initial_blocks) | ||
1663 | setreg | ||
1664 | .rep \num_initial_blocks | ||
1665 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1666 | vmovdqa \CTR, reg_i | ||
1667 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap | ||
1668 | i = (i+1) | ||
1669 | setreg | ||
1670 | .endr | ||
1671 | |||
1672 | vmovdqa (arg1), \T_key | ||
1673 | i = (9-\num_initial_blocks) | ||
1674 | setreg | ||
1675 | .rep \num_initial_blocks | ||
1676 | vpxor \T_key, reg_i, reg_i | ||
1677 | i = (i+1) | ||
1678 | setreg | ||
1679 | .endr | ||
1680 | |||
1681 | j = 1 | ||
1682 | setreg | ||
1683 | .rep 9 | ||
1684 | vmovdqa 16*j(arg1), \T_key | ||
1685 | i = (9-\num_initial_blocks) | ||
1686 | setreg | ||
1687 | .rep \num_initial_blocks | ||
1688 | vaesenc \T_key, reg_i, reg_i | ||
1689 | i = (i+1) | ||
1690 | setreg | ||
1691 | .endr | ||
1692 | |||
1693 | j = (j+1) | ||
1694 | setreg | ||
1695 | .endr | ||
1696 | |||
1697 | |||
1698 | vmovdqa 16*10(arg1), \T_key | ||
1699 | i = (9-\num_initial_blocks) | ||
1700 | setreg | ||
1701 | .rep \num_initial_blocks | ||
1702 | vaesenclast \T_key, reg_i, reg_i | ||
1703 | i = (i+1) | ||
1704 | setreg | ||
1705 | .endr | ||
1706 | |||
1707 | i = (9-\num_initial_blocks) | ||
1708 | setreg | ||
1709 | .rep \num_initial_blocks | ||
1710 | vmovdqu (arg3, %r11), \T1 | ||
1711 | vpxor \T1, reg_i, reg_i | ||
1712 | vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for | ||
1713 | # num_initial_blocks blocks | ||
1714 | add $16, %r11 | ||
1715 | .if \ENC_DEC == DEC | ||
1716 | vmovdqa \T1, reg_i | ||
1717 | .endif | ||
1718 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations | ||
1719 | i = (i+1) | ||
1720 | setreg | ||
1721 | .endr | ||
1722 | |||
1723 | |||
1724 | i = (8-\num_initial_blocks) | ||
1725 | j = (9-\num_initial_blocks) | ||
1726 | setreg | ||
1727 | GHASH_MUL_AVX2 reg_i, \T2, \T1, \T3, \T4, \T5, \T6 | ||
1728 | |||
1729 | .rep \num_initial_blocks | ||
1730 | vpxor reg_i, reg_j, reg_j | ||
1731 | GHASH_MUL_AVX2 reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks | ||
1732 | i = (i+1) | ||
1733 | j = (j+1) | ||
1734 | setreg | ||
1735 | .endr | ||
1736 | # XMM8 has the combined result here | ||
1737 | |||
1738 | vmovdqa \XMM8, TMP1(%rsp) | ||
1739 | vmovdqa \XMM8, \T3 | ||
1740 | |||
1741 | cmp $128, %r13 | ||
1742 | jl _initial_blocks_done\@ # no need for precomputed constants | ||
1743 | |||
1744 | ############################################################################### | ||
1745 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
1746 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1747 | vmovdqa \CTR, \XMM1 | ||
1748 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
1749 | |||
1750 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1751 | vmovdqa \CTR, \XMM2 | ||
1752 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
1753 | |||
1754 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1755 | vmovdqa \CTR, \XMM3 | ||
1756 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
1757 | |||
1758 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1759 | vmovdqa \CTR, \XMM4 | ||
1760 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
1761 | |||
1762 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1763 | vmovdqa \CTR, \XMM5 | ||
1764 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
1765 | |||
1766 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1767 | vmovdqa \CTR, \XMM6 | ||
1768 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
1769 | |||
1770 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1771 | vmovdqa \CTR, \XMM7 | ||
1772 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
1773 | |||
1774 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1775 | vmovdqa \CTR, \XMM8 | ||
1776 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
1777 | |||
1778 | vmovdqa (arg1), \T_key | ||
1779 | vpxor \T_key, \XMM1, \XMM1 | ||
1780 | vpxor \T_key, \XMM2, \XMM2 | ||
1781 | vpxor \T_key, \XMM3, \XMM3 | ||
1782 | vpxor \T_key, \XMM4, \XMM4 | ||
1783 | vpxor \T_key, \XMM5, \XMM5 | ||
1784 | vpxor \T_key, \XMM6, \XMM6 | ||
1785 | vpxor \T_key, \XMM7, \XMM7 | ||
1786 | vpxor \T_key, \XMM8, \XMM8 | ||
1787 | |||
1788 | i = 1 | ||
1789 | setreg | ||
1790 | .rep 9 # do 9 rounds | ||
1791 | vmovdqa 16*i(arg1), \T_key | ||
1792 | vaesenc \T_key, \XMM1, \XMM1 | ||
1793 | vaesenc \T_key, \XMM2, \XMM2 | ||
1794 | vaesenc \T_key, \XMM3, \XMM3 | ||
1795 | vaesenc \T_key, \XMM4, \XMM4 | ||
1796 | vaesenc \T_key, \XMM5, \XMM5 | ||
1797 | vaesenc \T_key, \XMM6, \XMM6 | ||
1798 | vaesenc \T_key, \XMM7, \XMM7 | ||
1799 | vaesenc \T_key, \XMM8, \XMM8 | ||
1800 | i = (i+1) | ||
1801 | setreg | ||
1802 | .endr | ||
1803 | |||
1804 | |||
1805 | vmovdqa 16*i(arg1), \T_key | ||
1806 | vaesenclast \T_key, \XMM1, \XMM1 | ||
1807 | vaesenclast \T_key, \XMM2, \XMM2 | ||
1808 | vaesenclast \T_key, \XMM3, \XMM3 | ||
1809 | vaesenclast \T_key, \XMM4, \XMM4 | ||
1810 | vaesenclast \T_key, \XMM5, \XMM5 | ||
1811 | vaesenclast \T_key, \XMM6, \XMM6 | ||
1812 | vaesenclast \T_key, \XMM7, \XMM7 | ||
1813 | vaesenclast \T_key, \XMM8, \XMM8 | ||
1814 | |||
1815 | vmovdqu (arg3, %r11), \T1 | ||
1816 | vpxor \T1, \XMM1, \XMM1 | ||
1817 | vmovdqu \XMM1, (arg2 , %r11) | ||
1818 | .if \ENC_DEC == DEC | ||
1819 | vmovdqa \T1, \XMM1 | ||
1820 | .endif | ||
1821 | |||
1822 | vmovdqu 16*1(arg3, %r11), \T1 | ||
1823 | vpxor \T1, \XMM2, \XMM2 | ||
1824 | vmovdqu \XMM2, 16*1(arg2 , %r11) | ||
1825 | .if \ENC_DEC == DEC | ||
1826 | vmovdqa \T1, \XMM2 | ||
1827 | .endif | ||
1828 | |||
1829 | vmovdqu 16*2(arg3, %r11), \T1 | ||
1830 | vpxor \T1, \XMM3, \XMM3 | ||
1831 | vmovdqu \XMM3, 16*2(arg2 , %r11) | ||
1832 | .if \ENC_DEC == DEC | ||
1833 | vmovdqa \T1, \XMM3 | ||
1834 | .endif | ||
1835 | |||
1836 | vmovdqu 16*3(arg3, %r11), \T1 | ||
1837 | vpxor \T1, \XMM4, \XMM4 | ||
1838 | vmovdqu \XMM4, 16*3(arg2 , %r11) | ||
1839 | .if \ENC_DEC == DEC | ||
1840 | vmovdqa \T1, \XMM4 | ||
1841 | .endif | ||
1842 | |||
1843 | vmovdqu 16*4(arg3, %r11), \T1 | ||
1844 | vpxor \T1, \XMM5, \XMM5 | ||
1845 | vmovdqu \XMM5, 16*4(arg2 , %r11) | ||
1846 | .if \ENC_DEC == DEC | ||
1847 | vmovdqa \T1, \XMM5 | ||
1848 | .endif | ||
1849 | |||
1850 | vmovdqu 16*5(arg3, %r11), \T1 | ||
1851 | vpxor \T1, \XMM6, \XMM6 | ||
1852 | vmovdqu \XMM6, 16*5(arg2 , %r11) | ||
1853 | .if \ENC_DEC == DEC | ||
1854 | vmovdqa \T1, \XMM6 | ||
1855 | .endif | ||
1856 | |||
1857 | vmovdqu 16*6(arg3, %r11), \T1 | ||
1858 | vpxor \T1, \XMM7, \XMM7 | ||
1859 | vmovdqu \XMM7, 16*6(arg2 , %r11) | ||
1860 | .if \ENC_DEC == DEC | ||
1861 | vmovdqa \T1, \XMM7 | ||
1862 | .endif | ||
1863 | |||
1864 | vmovdqu 16*7(arg3, %r11), \T1 | ||
1865 | vpxor \T1, \XMM8, \XMM8 | ||
1866 | vmovdqu \XMM8, 16*7(arg2 , %r11) | ||
1867 | .if \ENC_DEC == DEC | ||
1868 | vmovdqa \T1, \XMM8 | ||
1869 | .endif | ||
1870 | |||
1871 | add $128, %r11 | ||
1872 | |||
1873 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
1874 | vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with | ||
1875 | # the corresponding ciphertext | ||
1876 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
1877 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
1878 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
1879 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
1880 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
1881 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
1882 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
1883 | |||
1884 | ############################################################################### | ||
1885 | |||
1886 | _initial_blocks_done\@: | ||
1887 | |||
1888 | |||
1889 | .endm | ||
1890 | |||
1891 | |||
1892 | |||
1893 | # encrypt 8 blocks at a time | ||
1894 | # ghash the 8 previously encrypted ciphertext blocks | ||
1895 | # arg1, arg2, arg3 are used as pointers only, not modified | ||
1896 | # r11 is the data offset value | ||
1897 | .macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC | ||
1898 | |||
1899 | vmovdqa \XMM1, \T2 | ||
1900 | vmovdqa \XMM2, TMP2(%rsp) | ||
1901 | vmovdqa \XMM3, TMP3(%rsp) | ||
1902 | vmovdqa \XMM4, TMP4(%rsp) | ||
1903 | vmovdqa \XMM5, TMP5(%rsp) | ||
1904 | vmovdqa \XMM6, TMP6(%rsp) | ||
1905 | vmovdqa \XMM7, TMP7(%rsp) | ||
1906 | vmovdqa \XMM8, TMP8(%rsp) | ||
1907 | |||
1908 | .if \loop_idx == in_order | ||
1909 | vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT | ||
1910 | vpaddd ONE(%rip), \XMM1, \XMM2 | ||
1911 | vpaddd ONE(%rip), \XMM2, \XMM3 | ||
1912 | vpaddd ONE(%rip), \XMM3, \XMM4 | ||
1913 | vpaddd ONE(%rip), \XMM4, \XMM5 | ||
1914 | vpaddd ONE(%rip), \XMM5, \XMM6 | ||
1915 | vpaddd ONE(%rip), \XMM6, \XMM7 | ||
1916 | vpaddd ONE(%rip), \XMM7, \XMM8 | ||
1917 | vmovdqa \XMM8, \CTR | ||
1918 | |||
1919 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
1920 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
1921 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
1922 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
1923 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
1924 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
1925 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
1926 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
1927 | .else | ||
1928 | vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT | ||
1929 | vpaddd ONEf(%rip), \XMM1, \XMM2 | ||
1930 | vpaddd ONEf(%rip), \XMM2, \XMM3 | ||
1931 | vpaddd ONEf(%rip), \XMM3, \XMM4 | ||
1932 | vpaddd ONEf(%rip), \XMM4, \XMM5 | ||
1933 | vpaddd ONEf(%rip), \XMM5, \XMM6 | ||
1934 | vpaddd ONEf(%rip), \XMM6, \XMM7 | ||
1935 | vpaddd ONEf(%rip), \XMM7, \XMM8 | ||
1936 | vmovdqa \XMM8, \CTR | ||
1937 | .endif | ||
1938 | |||
1939 | |||
1940 | ####################################################################### | ||
1941 | |||
1942 | vmovdqu (arg1), \T1 | ||
1943 | vpxor \T1, \XMM1, \XMM1 | ||
1944 | vpxor \T1, \XMM2, \XMM2 | ||
1945 | vpxor \T1, \XMM3, \XMM3 | ||
1946 | vpxor \T1, \XMM4, \XMM4 | ||
1947 | vpxor \T1, \XMM5, \XMM5 | ||
1948 | vpxor \T1, \XMM6, \XMM6 | ||
1949 | vpxor \T1, \XMM7, \XMM7 | ||
1950 | vpxor \T1, \XMM8, \XMM8 | ||
1951 | |||
1952 | ####################################################################### | ||
1953 | |||
1954 | |||
1955 | |||
1956 | |||
1957 | |||
1958 | vmovdqu 16*1(arg1), \T1 | ||
1959 | vaesenc \T1, \XMM1, \XMM1 | ||
1960 | vaesenc \T1, \XMM2, \XMM2 | ||
1961 | vaesenc \T1, \XMM3, \XMM3 | ||
1962 | vaesenc \T1, \XMM4, \XMM4 | ||
1963 | vaesenc \T1, \XMM5, \XMM5 | ||
1964 | vaesenc \T1, \XMM6, \XMM6 | ||
1965 | vaesenc \T1, \XMM7, \XMM7 | ||
1966 | vaesenc \T1, \XMM8, \XMM8 | ||
1967 | |||
1968 | vmovdqu 16*2(arg1), \T1 | ||
1969 | vaesenc \T1, \XMM1, \XMM1 | ||
1970 | vaesenc \T1, \XMM2, \XMM2 | ||
1971 | vaesenc \T1, \XMM3, \XMM3 | ||
1972 | vaesenc \T1, \XMM4, \XMM4 | ||
1973 | vaesenc \T1, \XMM5, \XMM5 | ||
1974 | vaesenc \T1, \XMM6, \XMM6 | ||
1975 | vaesenc \T1, \XMM7, \XMM7 | ||
1976 | vaesenc \T1, \XMM8, \XMM8 | ||
1977 | |||
1978 | |||
1979 | ####################################################################### | ||
1980 | |||
1981 | vmovdqa HashKey_8(arg1), \T5 | ||
1982 | vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 | ||
1983 | vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 | ||
1984 | vpclmulqdq $0x01, \T5, \T2, \T6 # T6 = a1*b0 | ||
1985 | vpclmulqdq $0x10, \T5, \T2, \T5 # T5 = a0*b1 | ||
1986 | vpxor \T5, \T6, \T6 | ||
1987 | |||
1988 | vmovdqu 16*3(arg1), \T1 | ||
1989 | vaesenc \T1, \XMM1, \XMM1 | ||
1990 | vaesenc \T1, \XMM2, \XMM2 | ||
1991 | vaesenc \T1, \XMM3, \XMM3 | ||
1992 | vaesenc \T1, \XMM4, \XMM4 | ||
1993 | vaesenc \T1, \XMM5, \XMM5 | ||
1994 | vaesenc \T1, \XMM6, \XMM6 | ||
1995 | vaesenc \T1, \XMM7, \XMM7 | ||
1996 | vaesenc \T1, \XMM8, \XMM8 | ||
1997 | |||
1998 | vmovdqa TMP2(%rsp), \T1 | ||
1999 | vmovdqa HashKey_7(arg1), \T5 | ||
2000 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2001 | vpxor \T3, \T4, \T4 | ||
2002 | |||
2003 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2004 | vpxor \T3, \T7, \T7 | ||
2005 | |||
2006 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2007 | vpxor \T3, \T6, \T6 | ||
2008 | |||
2009 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2010 | vpxor \T3, \T6, \T6 | ||
2011 | |||
2012 | vmovdqu 16*4(arg1), \T1 | ||
2013 | vaesenc \T1, \XMM1, \XMM1 | ||
2014 | vaesenc \T1, \XMM2, \XMM2 | ||
2015 | vaesenc \T1, \XMM3, \XMM3 | ||
2016 | vaesenc \T1, \XMM4, \XMM4 | ||
2017 | vaesenc \T1, \XMM5, \XMM5 | ||
2018 | vaesenc \T1, \XMM6, \XMM6 | ||
2019 | vaesenc \T1, \XMM7, \XMM7 | ||
2020 | vaesenc \T1, \XMM8, \XMM8 | ||
2021 | |||
2022 | ####################################################################### | ||
2023 | |||
2024 | vmovdqa TMP3(%rsp), \T1 | ||
2025 | vmovdqa HashKey_6(arg1), \T5 | ||
2026 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2027 | vpxor \T3, \T4, \T4 | ||
2028 | |||
2029 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2030 | vpxor \T3, \T7, \T7 | ||
2031 | |||
2032 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2033 | vpxor \T3, \T6, \T6 | ||
2034 | |||
2035 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2036 | vpxor \T3, \T6, \T6 | ||
2037 | |||
2038 | vmovdqu 16*5(arg1), \T1 | ||
2039 | vaesenc \T1, \XMM1, \XMM1 | ||
2040 | vaesenc \T1, \XMM2, \XMM2 | ||
2041 | vaesenc \T1, \XMM3, \XMM3 | ||
2042 | vaesenc \T1, \XMM4, \XMM4 | ||
2043 | vaesenc \T1, \XMM5, \XMM5 | ||
2044 | vaesenc \T1, \XMM6, \XMM6 | ||
2045 | vaesenc \T1, \XMM7, \XMM7 | ||
2046 | vaesenc \T1, \XMM8, \XMM8 | ||
2047 | |||
2048 | vmovdqa TMP4(%rsp), \T1 | ||
2049 | vmovdqa HashKey_5(arg1), \T5 | ||
2050 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2051 | vpxor \T3, \T4, \T4 | ||
2052 | |||
2053 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2054 | vpxor \T3, \T7, \T7 | ||
2055 | |||
2056 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2057 | vpxor \T3, \T6, \T6 | ||
2058 | |||
2059 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2060 | vpxor \T3, \T6, \T6 | ||
2061 | |||
2062 | vmovdqu 16*6(arg1), \T1 | ||
2063 | vaesenc \T1, \XMM1, \XMM1 | ||
2064 | vaesenc \T1, \XMM2, \XMM2 | ||
2065 | vaesenc \T1, \XMM3, \XMM3 | ||
2066 | vaesenc \T1, \XMM4, \XMM4 | ||
2067 | vaesenc \T1, \XMM5, \XMM5 | ||
2068 | vaesenc \T1, \XMM6, \XMM6 | ||
2069 | vaesenc \T1, \XMM7, \XMM7 | ||
2070 | vaesenc \T1, \XMM8, \XMM8 | ||
2071 | |||
2072 | |||
2073 | vmovdqa TMP5(%rsp), \T1 | ||
2074 | vmovdqa HashKey_4(arg1), \T5 | ||
2075 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2076 | vpxor \T3, \T4, \T4 | ||
2077 | |||
2078 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2079 | vpxor \T3, \T7, \T7 | ||
2080 | |||
2081 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2082 | vpxor \T3, \T6, \T6 | ||
2083 | |||
2084 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2085 | vpxor \T3, \T6, \T6 | ||
2086 | |||
2087 | vmovdqu 16*7(arg1), \T1 | ||
2088 | vaesenc \T1, \XMM1, \XMM1 | ||
2089 | vaesenc \T1, \XMM2, \XMM2 | ||
2090 | vaesenc \T1, \XMM3, \XMM3 | ||
2091 | vaesenc \T1, \XMM4, \XMM4 | ||
2092 | vaesenc \T1, \XMM5, \XMM5 | ||
2093 | vaesenc \T1, \XMM6, \XMM6 | ||
2094 | vaesenc \T1, \XMM7, \XMM7 | ||
2095 | vaesenc \T1, \XMM8, \XMM8 | ||
2096 | |||
2097 | vmovdqa TMP6(%rsp), \T1 | ||
2098 | vmovdqa HashKey_3(arg1), \T5 | ||
2099 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2100 | vpxor \T3, \T4, \T4 | ||
2101 | |||
2102 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2103 | vpxor \T3, \T7, \T7 | ||
2104 | |||
2105 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2106 | vpxor \T3, \T6, \T6 | ||
2107 | |||
2108 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2109 | vpxor \T3, \T6, \T6 | ||
2110 | |||
2111 | vmovdqu 16*8(arg1), \T1 | ||
2112 | vaesenc \T1, \XMM1, \XMM1 | ||
2113 | vaesenc \T1, \XMM2, \XMM2 | ||
2114 | vaesenc \T1, \XMM3, \XMM3 | ||
2115 | vaesenc \T1, \XMM4, \XMM4 | ||
2116 | vaesenc \T1, \XMM5, \XMM5 | ||
2117 | vaesenc \T1, \XMM6, \XMM6 | ||
2118 | vaesenc \T1, \XMM7, \XMM7 | ||
2119 | vaesenc \T1, \XMM8, \XMM8 | ||
2120 | |||
2121 | vmovdqa TMP7(%rsp), \T1 | ||
2122 | vmovdqa HashKey_2(arg1), \T5 | ||
2123 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2124 | vpxor \T3, \T4, \T4 | ||
2125 | |||
2126 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2127 | vpxor \T3, \T7, \T7 | ||
2128 | |||
2129 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2130 | vpxor \T3, \T6, \T6 | ||
2131 | |||
2132 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2133 | vpxor \T3, \T6, \T6 | ||
2134 | |||
2135 | |||
2136 | ####################################################################### | ||
2137 | |||
2138 | vmovdqu 16*9(arg1), \T5 | ||
2139 | vaesenc \T5, \XMM1, \XMM1 | ||
2140 | vaesenc \T5, \XMM2, \XMM2 | ||
2141 | vaesenc \T5, \XMM3, \XMM3 | ||
2142 | vaesenc \T5, \XMM4, \XMM4 | ||
2143 | vaesenc \T5, \XMM5, \XMM5 | ||
2144 | vaesenc \T5, \XMM6, \XMM6 | ||
2145 | vaesenc \T5, \XMM7, \XMM7 | ||
2146 | vaesenc \T5, \XMM8, \XMM8 | ||
2147 | |||
2148 | vmovdqa TMP8(%rsp), \T1 | ||
2149 | vmovdqa HashKey(arg1), \T5 | ||
2150 | |||
2151 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2152 | vpxor \T3, \T7, \T7 | ||
2153 | |||
2154 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2155 | vpxor \T3, \T6, \T6 | ||
2156 | |||
2157 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2158 | vpxor \T3, \T6, \T6 | ||
2159 | |||
2160 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2161 | vpxor \T3, \T4, \T1 | ||
2162 | |||
2163 | |||
2164 | vmovdqu 16*10(arg1), \T5 | ||
2165 | |||
2166 | i = 0 | ||
2167 | j = 1 | ||
2168 | setreg | ||
2169 | .rep 8 | ||
2170 | vpxor 16*i(arg3, %r11), \T5, \T2 | ||
2171 | .if \ENC_DEC == ENC | ||
2172 | vaesenclast \T2, reg_j, reg_j | ||
2173 | .else | ||
2174 | vaesenclast \T2, reg_j, \T3 | ||
2175 | vmovdqu 16*i(arg3, %r11), reg_j | ||
2176 | vmovdqu \T3, 16*i(arg2, %r11) | ||
2177 | .endif | ||
2178 | i = (i+1) | ||
2179 | j = (j+1) | ||
2180 | setreg | ||
2181 | .endr | ||
2182 | ####################################################################### | ||
2183 | |||
2184 | |||
2185 | vpslldq $8, \T6, \T3 # shift-L T3 2 DWs | ||
2186 | vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs | ||
2187 | vpxor \T3, \T7, \T7 | ||
2188 | vpxor \T6, \T1, \T1 # accumulate the results in T1:T7 | ||
2189 | |||
2190 | |||
2191 | |||
2192 | ####################################################################### | ||
2193 | #first phase of the reduction | ||
2194 | vmovdqa POLY2(%rip), \T3 | ||
2195 | |||
2196 | vpclmulqdq $0x01, \T7, \T3, \T2 | ||
2197 | vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs | ||
2198 | |||
2199 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
2200 | ####################################################################### | ||
2201 | .if \ENC_DEC == ENC | ||
2202 | vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer | ||
2203 | vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer | ||
2204 | vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer | ||
2205 | vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer | ||
2206 | vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer | ||
2207 | vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer | ||
2208 | vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer | ||
2209 | vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer | ||
2210 | .endif | ||
2211 | |||
2212 | ####################################################################### | ||
2213 | #second phase of the reduction | ||
2214 | vpclmulqdq $0x00, \T7, \T3, \T2 | ||
2215 | vpsrldq $4, \T2, \T2 # shift-R xmm2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) | ||
2216 | |||
2217 | vpclmulqdq $0x10, \T7, \T3, \T4 | ||
2218 | vpslldq $4, \T4, \T4 # shift-L xmm0 1 DW (Shift-L 1-DW to obtain result with no shifts) | ||
2219 | |||
2220 | vpxor \T2, \T4, \T4 # second phase of the reduction complete | ||
2221 | ####################################################################### | ||
2222 | vpxor \T4, \T1, \T1 # the result is in T1 | ||
2223 | |||
2224 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
2225 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
2226 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
2227 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
2228 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
2229 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
2230 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
2231 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
2232 | |||
2233 | |||
2234 | vpxor \T1, \XMM1, \XMM1 | ||
2235 | |||
2236 | |||
2237 | |||
2238 | .endm | ||
2239 | |||
2240 | |||
2241 | # GHASH the last 4 ciphertext blocks. | ||
2242 | .macro GHASH_LAST_8_AVX2 T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 | ||
2243 | |||
2244 | ## Karatsuba Method | ||
2245 | |||
2246 | vmovdqa HashKey_8(arg1), \T5 | ||
2247 | |||
2248 | vpshufd $0b01001110, \XMM1, \T2 | ||
2249 | vpshufd $0b01001110, \T5, \T3 | ||
2250 | vpxor \XMM1, \T2, \T2 | ||
2251 | vpxor \T5, \T3, \T3 | ||
2252 | |||
2253 | vpclmulqdq $0x11, \T5, \XMM1, \T6 | ||
2254 | vpclmulqdq $0x00, \T5, \XMM1, \T7 | ||
2255 | |||
2256 | vpclmulqdq $0x00, \T3, \T2, \XMM1 | ||
2257 | |||
2258 | ###################### | ||
2259 | |||
2260 | vmovdqa HashKey_7(arg1), \T5 | ||
2261 | vpshufd $0b01001110, \XMM2, \T2 | ||
2262 | vpshufd $0b01001110, \T5, \T3 | ||
2263 | vpxor \XMM2, \T2, \T2 | ||
2264 | vpxor \T5, \T3, \T3 | ||
2265 | |||
2266 | vpclmulqdq $0x11, \T5, \XMM2, \T4 | ||
2267 | vpxor \T4, \T6, \T6 | ||
2268 | |||
2269 | vpclmulqdq $0x00, \T5, \XMM2, \T4 | ||
2270 | vpxor \T4, \T7, \T7 | ||
2271 | |||
2272 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2273 | |||
2274 | vpxor \T2, \XMM1, \XMM1 | ||
2275 | |||
2276 | ###################### | ||
2277 | |||
2278 | vmovdqa HashKey_6(arg1), \T5 | ||
2279 | vpshufd $0b01001110, \XMM3, \T2 | ||
2280 | vpshufd $0b01001110, \T5, \T3 | ||
2281 | vpxor \XMM3, \T2, \T2 | ||
2282 | vpxor \T5, \T3, \T3 | ||
2283 | |||
2284 | vpclmulqdq $0x11, \T5, \XMM3, \T4 | ||
2285 | vpxor \T4, \T6, \T6 | ||
2286 | |||
2287 | vpclmulqdq $0x00, \T5, \XMM3, \T4 | ||
2288 | vpxor \T4, \T7, \T7 | ||
2289 | |||
2290 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2291 | |||
2292 | vpxor \T2, \XMM1, \XMM1 | ||
2293 | |||
2294 | ###################### | ||
2295 | |||
2296 | vmovdqa HashKey_5(arg1), \T5 | ||
2297 | vpshufd $0b01001110, \XMM4, \T2 | ||
2298 | vpshufd $0b01001110, \T5, \T3 | ||
2299 | vpxor \XMM4, \T2, \T2 | ||
2300 | vpxor \T5, \T3, \T3 | ||
2301 | |||
2302 | vpclmulqdq $0x11, \T5, \XMM4, \T4 | ||
2303 | vpxor \T4, \T6, \T6 | ||
2304 | |||
2305 | vpclmulqdq $0x00, \T5, \XMM4, \T4 | ||
2306 | vpxor \T4, \T7, \T7 | ||
2307 | |||
2308 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2309 | |||
2310 | vpxor \T2, \XMM1, \XMM1 | ||
2311 | |||
2312 | ###################### | ||
2313 | |||
2314 | vmovdqa HashKey_4(arg1), \T5 | ||
2315 | vpshufd $0b01001110, \XMM5, \T2 | ||
2316 | vpshufd $0b01001110, \T5, \T3 | ||
2317 | vpxor \XMM5, \T2, \T2 | ||
2318 | vpxor \T5, \T3, \T3 | ||
2319 | |||
2320 | vpclmulqdq $0x11, \T5, \XMM5, \T4 | ||
2321 | vpxor \T4, \T6, \T6 | ||
2322 | |||
2323 | vpclmulqdq $0x00, \T5, \XMM5, \T4 | ||
2324 | vpxor \T4, \T7, \T7 | ||
2325 | |||
2326 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2327 | |||
2328 | vpxor \T2, \XMM1, \XMM1 | ||
2329 | |||
2330 | ###################### | ||
2331 | |||
2332 | vmovdqa HashKey_3(arg1), \T5 | ||
2333 | vpshufd $0b01001110, \XMM6, \T2 | ||
2334 | vpshufd $0b01001110, \T5, \T3 | ||
2335 | vpxor \XMM6, \T2, \T2 | ||
2336 | vpxor \T5, \T3, \T3 | ||
2337 | |||
2338 | vpclmulqdq $0x11, \T5, \XMM6, \T4 | ||
2339 | vpxor \T4, \T6, \T6 | ||
2340 | |||
2341 | vpclmulqdq $0x00, \T5, \XMM6, \T4 | ||
2342 | vpxor \T4, \T7, \T7 | ||
2343 | |||
2344 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2345 | |||
2346 | vpxor \T2, \XMM1, \XMM1 | ||
2347 | |||
2348 | ###################### | ||
2349 | |||
2350 | vmovdqa HashKey_2(arg1), \T5 | ||
2351 | vpshufd $0b01001110, \XMM7, \T2 | ||
2352 | vpshufd $0b01001110, \T5, \T3 | ||
2353 | vpxor \XMM7, \T2, \T2 | ||
2354 | vpxor \T5, \T3, \T3 | ||
2355 | |||
2356 | vpclmulqdq $0x11, \T5, \XMM7, \T4 | ||
2357 | vpxor \T4, \T6, \T6 | ||
2358 | |||
2359 | vpclmulqdq $0x00, \T5, \XMM7, \T4 | ||
2360 | vpxor \T4, \T7, \T7 | ||
2361 | |||
2362 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2363 | |||
2364 | vpxor \T2, \XMM1, \XMM1 | ||
2365 | |||
2366 | ###################### | ||
2367 | |||
2368 | vmovdqa HashKey(arg1), \T5 | ||
2369 | vpshufd $0b01001110, \XMM8, \T2 | ||
2370 | vpshufd $0b01001110, \T5, \T3 | ||
2371 | vpxor \XMM8, \T2, \T2 | ||
2372 | vpxor \T5, \T3, \T3 | ||
2373 | |||
2374 | vpclmulqdq $0x11, \T5, \XMM8, \T4 | ||
2375 | vpxor \T4, \T6, \T6 | ||
2376 | |||
2377 | vpclmulqdq $0x00, \T5, \XMM8, \T4 | ||
2378 | vpxor \T4, \T7, \T7 | ||
2379 | |||
2380 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2381 | |||
2382 | vpxor \T2, \XMM1, \XMM1 | ||
2383 | vpxor \T6, \XMM1, \XMM1 | ||
2384 | vpxor \T7, \XMM1, \T2 | ||
2385 | |||
2386 | |||
2387 | |||
2388 | |||
2389 | vpslldq $8, \T2, \T4 | ||
2390 | vpsrldq $8, \T2, \T2 | ||
2391 | |||
2392 | vpxor \T4, \T7, \T7 | ||
2393 | vpxor \T2, \T6, \T6 # <T6:T7> holds the result of the | ||
2394 | # accumulated carry-less multiplications | ||
2395 | |||
2396 | ####################################################################### | ||
2397 | #first phase of the reduction | ||
2398 | vmovdqa POLY2(%rip), \T3 | ||
2399 | |||
2400 | vpclmulqdq $0x01, \T7, \T3, \T2 | ||
2401 | vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs | ||
2402 | |||
2403 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
2404 | ####################################################################### | ||
2405 | |||
2406 | |||
2407 | #second phase of the reduction | ||
2408 | vpclmulqdq $0x00, \T7, \T3, \T2 | ||
2409 | vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) | ||
2410 | |||
2411 | vpclmulqdq $0x10, \T7, \T3, \T4 | ||
2412 | vpslldq $4, \T4, \T4 # shift-L T4 1 DW (Shift-L 1-DW to obtain result with no shifts) | ||
2413 | |||
2414 | vpxor \T2, \T4, \T4 # second phase of the reduction complete | ||
2415 | ####################################################################### | ||
2416 | vpxor \T4, \T6, \T6 # the result is in T6 | ||
2417 | .endm | ||
2418 | |||
2419 | |||
2420 | |||
2421 | # combined for GCM encrypt and decrypt functions | ||
2422 | # clobbering all xmm registers | ||
2423 | # clobbering r10, r11, r12, r13, r14, r15 | ||
2424 | .macro GCM_ENC_DEC_AVX2 ENC_DEC | ||
2425 | |||
2426 | #the number of pushes must equal STACK_OFFSET | ||
2427 | push %r12 | ||
2428 | push %r13 | ||
2429 | push %r14 | ||
2430 | push %r15 | ||
2431 | |||
2432 | mov %rsp, %r14 | ||
2433 | |||
2434 | |||
2435 | |||
2436 | |||
2437 | sub $VARIABLE_OFFSET, %rsp | ||
2438 | and $~63, %rsp # align rsp to 64 bytes | ||
2439 | |||
2440 | |||
2441 | vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey | ||
2442 | |||
2443 | mov arg4, %r13 # save the number of bytes of plaintext/ciphertext | ||
2444 | and $-16, %r13 # r13 = r13 - (r13 mod 16) | ||
2445 | |||
2446 | mov %r13, %r12 | ||
2447 | shr $4, %r12 | ||
2448 | and $7, %r12 | ||
2449 | jz _initial_num_blocks_is_0\@ | ||
2450 | |||
2451 | cmp $7, %r12 | ||
2452 | je _initial_num_blocks_is_7\@ | ||
2453 | cmp $6, %r12 | ||
2454 | je _initial_num_blocks_is_6\@ | ||
2455 | cmp $5, %r12 | ||
2456 | je _initial_num_blocks_is_5\@ | ||
2457 | cmp $4, %r12 | ||
2458 | je _initial_num_blocks_is_4\@ | ||
2459 | cmp $3, %r12 | ||
2460 | je _initial_num_blocks_is_3\@ | ||
2461 | cmp $2, %r12 | ||
2462 | je _initial_num_blocks_is_2\@ | ||
2463 | |||
2464 | jmp _initial_num_blocks_is_1\@ | ||
2465 | |||
2466 | _initial_num_blocks_is_7\@: | ||
2467 | INITIAL_BLOCKS_AVX2 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2468 | sub $16*7, %r13 | ||
2469 | jmp _initial_blocks_encrypted\@ | ||
2470 | |||
2471 | _initial_num_blocks_is_6\@: | ||
2472 | INITIAL_BLOCKS_AVX2 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2473 | sub $16*6, %r13 | ||
2474 | jmp _initial_blocks_encrypted\@ | ||
2475 | |||
2476 | _initial_num_blocks_is_5\@: | ||
2477 | INITIAL_BLOCKS_AVX2 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2478 | sub $16*5, %r13 | ||
2479 | jmp _initial_blocks_encrypted\@ | ||
2480 | |||
2481 | _initial_num_blocks_is_4\@: | ||
2482 | INITIAL_BLOCKS_AVX2 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2483 | sub $16*4, %r13 | ||
2484 | jmp _initial_blocks_encrypted\@ | ||
2485 | |||
2486 | _initial_num_blocks_is_3\@: | ||
2487 | INITIAL_BLOCKS_AVX2 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2488 | sub $16*3, %r13 | ||
2489 | jmp _initial_blocks_encrypted\@ | ||
2490 | |||
2491 | _initial_num_blocks_is_2\@: | ||
2492 | INITIAL_BLOCKS_AVX2 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2493 | sub $16*2, %r13 | ||
2494 | jmp _initial_blocks_encrypted\@ | ||
2495 | |||
2496 | _initial_num_blocks_is_1\@: | ||
2497 | INITIAL_BLOCKS_AVX2 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2498 | sub $16*1, %r13 | ||
2499 | jmp _initial_blocks_encrypted\@ | ||
2500 | |||
2501 | _initial_num_blocks_is_0\@: | ||
2502 | INITIAL_BLOCKS_AVX2 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2503 | |||
2504 | |||
2505 | _initial_blocks_encrypted\@: | ||
2506 | cmp $0, %r13 | ||
2507 | je _zero_cipher_left\@ | ||
2508 | |||
2509 | sub $128, %r13 | ||
2510 | je _eight_cipher_left\@ | ||
2511 | |||
2512 | |||
2513 | |||
2514 | |||
2515 | vmovd %xmm9, %r15d | ||
2516 | and $255, %r15d | ||
2517 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2518 | |||
2519 | |||
2520 | _encrypt_by_8_new\@: | ||
2521 | cmp $(255-8), %r15d | ||
2522 | jg _encrypt_by_8\@ | ||
2523 | |||
2524 | |||
2525 | |||
2526 | add $8, %r15b | ||
2527 | GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC | ||
2528 | add $128, %r11 | ||
2529 | sub $128, %r13 | ||
2530 | jne _encrypt_by_8_new\@ | ||
2531 | |||
2532 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2533 | jmp _eight_cipher_left\@ | ||
2534 | |||
2535 | _encrypt_by_8\@: | ||
2536 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2537 | add $8, %r15b | ||
2538 | GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC | ||
2539 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2540 | add $128, %r11 | ||
2541 | sub $128, %r13 | ||
2542 | jne _encrypt_by_8_new\@ | ||
2543 | |||
2544 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2545 | |||
2546 | |||
2547 | |||
2548 | |||
2549 | _eight_cipher_left\@: | ||
2550 | GHASH_LAST_8_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 | ||
2551 | |||
2552 | |||
2553 | _zero_cipher_left\@: | ||
2554 | cmp $16, arg4 | ||
2555 | jl _only_less_than_16\@ | ||
2556 | |||
2557 | mov arg4, %r13 | ||
2558 | and $15, %r13 # r13 = (arg4 mod 16) | ||
2559 | |||
2560 | je _multiple_of_16_bytes\@ | ||
2561 | |||
2562 | # handle the last <16 Byte block seperately | ||
2563 | |||
2564 | |||
2565 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
2566 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2567 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
2568 | |||
2569 | sub $16, %r11 | ||
2570 | add %r13, %r11 | ||
2571 | vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block | ||
2572 | |||
2573 | lea SHIFT_MASK+16(%rip), %r12 | ||
2574 | sub %r13, %r12 # adjust the shuffle mask pointer | ||
2575 | # to be able to shift 16-r13 bytes | ||
2576 | # (r13 is the number of bytes in plaintext mod 16) | ||
2577 | vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask | ||
2578 | vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes | ||
2579 | jmp _final_ghash_mul\@ | ||
2580 | |||
2581 | _only_less_than_16\@: | ||
2582 | # check for 0 length | ||
2583 | mov arg4, %r13 | ||
2584 | and $15, %r13 # r13 = (arg4 mod 16) | ||
2585 | |||
2586 | je _multiple_of_16_bytes\@ | ||
2587 | |||
2588 | # handle the last <16 Byte block seperately | ||
2589 | |||
2590 | |||
2591 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
2592 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2593 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
2594 | |||
2595 | |||
2596 | lea SHIFT_MASK+16(%rip), %r12 | ||
2597 | sub %r13, %r12 # adjust the shuffle mask pointer to be | ||
2598 | # able to shift 16-r13 bytes (r13 is the | ||
2599 | # number of bytes in plaintext mod 16) | ||
2600 | |||
2601 | _get_last_16_byte_loop\@: | ||
2602 | movb (arg3, %r11), %al | ||
2603 | movb %al, TMP1 (%rsp , %r11) | ||
2604 | add $1, %r11 | ||
2605 | cmp %r13, %r11 | ||
2606 | jne _get_last_16_byte_loop\@ | ||
2607 | |||
2608 | vmovdqu TMP1(%rsp), %xmm1 | ||
2609 | |||
2610 | sub $16, %r11 | ||
2611 | |||
2612 | _final_ghash_mul\@: | ||
2613 | .if \ENC_DEC == DEC | ||
2614 | vmovdqa %xmm1, %xmm2 | ||
2615 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
2616 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9 | ||
2617 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
2618 | vpand %xmm1, %xmm2, %xmm2 | ||
2619 | vpshufb SHUF_MASK(%rip), %xmm2, %xmm2 | ||
2620 | vpxor %xmm2, %xmm14, %xmm14 | ||
2621 | #GHASH computation for the last <16 Byte block | ||
2622 | GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
2623 | sub %r13, %r11 | ||
2624 | add $16, %r11 | ||
2625 | .else | ||
2626 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
2627 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9 | ||
2628 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
2629 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2630 | vpxor %xmm9, %xmm14, %xmm14 | ||
2631 | #GHASH computation for the last <16 Byte block | ||
2632 | GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
2633 | sub %r13, %r11 | ||
2634 | add $16, %r11 | ||
2635 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext | ||
2636 | .endif | ||
2637 | |||
2638 | |||
2639 | ############################# | ||
2640 | # output r13 Bytes | ||
2641 | vmovq %xmm9, %rax | ||
2642 | cmp $8, %r13 | ||
2643 | jle _less_than_8_bytes_left\@ | ||
2644 | |||
2645 | mov %rax, (arg2 , %r11) | ||
2646 | add $8, %r11 | ||
2647 | vpsrldq $8, %xmm9, %xmm9 | ||
2648 | vmovq %xmm9, %rax | ||
2649 | sub $8, %r13 | ||
2650 | |||
2651 | _less_than_8_bytes_left\@: | ||
2652 | movb %al, (arg2 , %r11) | ||
2653 | add $1, %r11 | ||
2654 | shr $8, %rax | ||
2655 | sub $1, %r13 | ||
2656 | jne _less_than_8_bytes_left\@ | ||
2657 | ############################# | ||
2658 | |||
2659 | _multiple_of_16_bytes\@: | ||
2660 | mov arg7, %r12 # r12 = aadLen (number of bytes) | ||
2661 | shl $3, %r12 # convert into number of bits | ||
2662 | vmovd %r12d, %xmm15 # len(A) in xmm15 | ||
2663 | |||
2664 | shl $3, arg4 # len(C) in bits (*128) | ||
2665 | vmovq arg4, %xmm1 | ||
2666 | vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000 | ||
2667 | vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C) | ||
2668 | |||
2669 | vpxor %xmm15, %xmm14, %xmm14 | ||
2670 | GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation | ||
2671 | vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap | ||
2672 | |||
2673 | mov arg5, %rax # rax = *Y0 | ||
2674 | vmovdqu (%rax), %xmm9 # xmm9 = Y0 | ||
2675 | |||
2676 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0) | ||
2677 | |||
2678 | vpxor %xmm14, %xmm9, %xmm9 | ||
2679 | |||
2680 | |||
2681 | |||
2682 | _return_T\@: | ||
2683 | mov arg8, %r10 # r10 = authTag | ||
2684 | mov arg9, %r11 # r11 = auth_tag_len | ||
2685 | |||
2686 | cmp $16, %r11 | ||
2687 | je _T_16\@ | ||
2688 | |||
2689 | cmp $12, %r11 | ||
2690 | je _T_12\@ | ||
2691 | |||
2692 | _T_8\@: | ||
2693 | vmovq %xmm9, %rax | ||
2694 | mov %rax, (%r10) | ||
2695 | jmp _return_T_done\@ | ||
2696 | _T_12\@: | ||
2697 | vmovq %xmm9, %rax | ||
2698 | mov %rax, (%r10) | ||
2699 | vpsrldq $8, %xmm9, %xmm9 | ||
2700 | vmovd %xmm9, %eax | ||
2701 | mov %eax, 8(%r10) | ||
2702 | jmp _return_T_done\@ | ||
2703 | |||
2704 | _T_16\@: | ||
2705 | vmovdqu %xmm9, (%r10) | ||
2706 | |||
2707 | _return_T_done\@: | ||
2708 | mov %r14, %rsp | ||
2709 | |||
2710 | pop %r15 | ||
2711 | pop %r14 | ||
2712 | pop %r13 | ||
2713 | pop %r12 | ||
2714 | .endm | ||
2715 | |||
2716 | |||
2717 | ############################################################# | ||
2718 | #void aesni_gcm_precomp_avx_gen4 | ||
2719 | # (gcm_data *my_ctx_data, | ||
2720 | # u8 *hash_subkey)# /* H, the Hash sub key input. | ||
2721 | # Data starts on a 16-byte boundary. */ | ||
2722 | ############################################################# | ||
2723 | ENTRY(aesni_gcm_precomp_avx_gen4) | ||
2724 | #the number of pushes must equal STACK_OFFSET | ||
2725 | push %r12 | ||
2726 | push %r13 | ||
2727 | push %r14 | ||
2728 | push %r15 | ||
2729 | |||
2730 | mov %rsp, %r14 | ||
2731 | |||
2732 | |||
2733 | |||
2734 | sub $VARIABLE_OFFSET, %rsp | ||
2735 | and $~63, %rsp # align rsp to 64 bytes | ||
2736 | |||
2737 | vmovdqu (arg2), %xmm6 # xmm6 = HashKey | ||
2738 | |||
2739 | vpshufb SHUF_MASK(%rip), %xmm6, %xmm6 | ||
2740 | ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey | ||
2741 | vmovdqa %xmm6, %xmm2 | ||
2742 | vpsllq $1, %xmm6, %xmm6 | ||
2743 | vpsrlq $63, %xmm2, %xmm2 | ||
2744 | vmovdqa %xmm2, %xmm1 | ||
2745 | vpslldq $8, %xmm2, %xmm2 | ||
2746 | vpsrldq $8, %xmm1, %xmm1 | ||
2747 | vpor %xmm2, %xmm6, %xmm6 | ||
2748 | #reduction | ||
2749 | vpshufd $0b00100100, %xmm1, %xmm2 | ||
2750 | vpcmpeqd TWOONE(%rip), %xmm2, %xmm2 | ||
2751 | vpand POLY(%rip), %xmm2, %xmm2 | ||
2752 | vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly | ||
2753 | ####################################################################### | ||
2754 | vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly | ||
2755 | |||
2756 | |||
2757 | PRECOMPUTE_AVX2 %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5 | ||
2758 | |||
2759 | mov %r14, %rsp | ||
2760 | |||
2761 | pop %r15 | ||
2762 | pop %r14 | ||
2763 | pop %r13 | ||
2764 | pop %r12 | ||
2765 | ret | ||
2766 | ENDPROC(aesni_gcm_precomp_avx_gen4) | ||
2767 | |||
2768 | |||
2769 | ############################################################################### | ||
2770 | #void aesni_gcm_enc_avx_gen4( | ||
2771 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
2772 | # u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ | ||
2773 | # const u8 *in, /* Plaintext input */ | ||
2774 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
2775 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
2776 | # (from Security Association) concatenated with 8 byte | ||
2777 | # Initialisation Vector (from IPSec ESP Payload) | ||
2778 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
2779 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
2780 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
2781 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
2782 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
2783 | # Valid values are 16 (most likely), 12 or 8. */ | ||
2784 | ############################################################################### | ||
2785 | ENTRY(aesni_gcm_enc_avx_gen4) | ||
2786 | GCM_ENC_DEC_AVX2 ENC | ||
2787 | ret | ||
2788 | ENDPROC(aesni_gcm_enc_avx_gen4) | ||
2789 | |||
2790 | ############################################################################### | ||
2791 | #void aesni_gcm_dec_avx_gen4( | ||
2792 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
2793 | # u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ | ||
2794 | # const u8 *in, /* Ciphertext input */ | ||
2795 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
2796 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
2797 | # (from Security Association) concatenated with 8 byte | ||
2798 | # Initialisation Vector (from IPSec ESP Payload) | ||
2799 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
2800 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
2801 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
2802 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
2803 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
2804 | # Valid values are 16 (most likely), 12 or 8. */ | ||
2805 | ############################################################################### | ||
2806 | ENTRY(aesni_gcm_dec_avx_gen4) | ||
2807 | GCM_ENC_DEC_AVX2 DEC | ||
2808 | ret | ||
2809 | ENDPROC(aesni_gcm_dec_avx_gen4) | ||
2810 | |||
2811 | #endif /* CONFIG_AS_AVX2 */ | ||
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 835488b745ee..948ad0e77741 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -101,6 +101,9 @@ asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, | |||
101 | int crypto_fpu_init(void); | 101 | int crypto_fpu_init(void); |
102 | void crypto_fpu_exit(void); | 102 | void crypto_fpu_exit(void); |
103 | 103 | ||
104 | #define AVX_GEN2_OPTSIZE 640 | ||
105 | #define AVX_GEN4_OPTSIZE 4096 | ||
106 | |||
104 | #ifdef CONFIG_X86_64 | 107 | #ifdef CONFIG_X86_64 |
105 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, | 108 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, |
106 | const u8 *in, unsigned int len, u8 *iv); | 109 | const u8 *in, unsigned int len, u8 *iv); |
@@ -150,6 +153,123 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, | |||
150 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | 153 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, |
151 | u8 *auth_tag, unsigned long auth_tag_len); | 154 | u8 *auth_tag, unsigned long auth_tag_len); |
152 | 155 | ||
156 | |||
157 | #ifdef CONFIG_AS_AVX | ||
158 | /* | ||
159 | * asmlinkage void aesni_gcm_precomp_avx_gen2() | ||
160 | * gcm_data *my_ctx_data, context data | ||
161 | * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. | ||
162 | */ | ||
163 | asmlinkage void aesni_gcm_precomp_avx_gen2(void *my_ctx_data, u8 *hash_subkey); | ||
164 | |||
165 | asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx, u8 *out, | ||
166 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
167 | const u8 *aad, unsigned long aad_len, | ||
168 | u8 *auth_tag, unsigned long auth_tag_len); | ||
169 | |||
170 | asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out, | ||
171 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
172 | const u8 *aad, unsigned long aad_len, | ||
173 | u8 *auth_tag, unsigned long auth_tag_len); | ||
174 | |||
175 | static void aesni_gcm_enc_avx(void *ctx, u8 *out, | ||
176 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
177 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
178 | u8 *auth_tag, unsigned long auth_tag_len) | ||
179 | { | ||
180 | if (plaintext_len < AVX_GEN2_OPTSIZE) { | ||
181 | aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, | ||
182 | aad_len, auth_tag, auth_tag_len); | ||
183 | } else { | ||
184 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
185 | aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, | ||
186 | aad_len, auth_tag, auth_tag_len); | ||
187 | } | ||
188 | } | ||
189 | |||
190 | static void aesni_gcm_dec_avx(void *ctx, u8 *out, | ||
191 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
192 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
193 | u8 *auth_tag, unsigned long auth_tag_len) | ||
194 | { | ||
195 | if (ciphertext_len < AVX_GEN2_OPTSIZE) { | ||
196 | aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad, | ||
197 | aad_len, auth_tag, auth_tag_len); | ||
198 | } else { | ||
199 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
200 | aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, | ||
201 | aad_len, auth_tag, auth_tag_len); | ||
202 | } | ||
203 | } | ||
204 | #endif | ||
205 | |||
206 | #ifdef CONFIG_AS_AVX2 | ||
207 | /* | ||
208 | * asmlinkage void aesni_gcm_precomp_avx_gen4() | ||
209 | * gcm_data *my_ctx_data, context data | ||
210 | * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. | ||
211 | */ | ||
212 | asmlinkage void aesni_gcm_precomp_avx_gen4(void *my_ctx_data, u8 *hash_subkey); | ||
213 | |||
214 | asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx, u8 *out, | ||
215 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
216 | const u8 *aad, unsigned long aad_len, | ||
217 | u8 *auth_tag, unsigned long auth_tag_len); | ||
218 | |||
219 | asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out, | ||
220 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
221 | const u8 *aad, unsigned long aad_len, | ||
222 | u8 *auth_tag, unsigned long auth_tag_len); | ||
223 | |||
224 | static void aesni_gcm_enc_avx2(void *ctx, u8 *out, | ||
225 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
226 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
227 | u8 *auth_tag, unsigned long auth_tag_len) | ||
228 | { | ||
229 | if (plaintext_len < AVX_GEN2_OPTSIZE) { | ||
230 | aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, | ||
231 | aad_len, auth_tag, auth_tag_len); | ||
232 | } else if (plaintext_len < AVX_GEN4_OPTSIZE) { | ||
233 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
234 | aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, | ||
235 | aad_len, auth_tag, auth_tag_len); | ||
236 | } else { | ||
237 | aesni_gcm_precomp_avx_gen4(ctx, hash_subkey); | ||
238 | aesni_gcm_enc_avx_gen4(ctx, out, in, plaintext_len, iv, aad, | ||
239 | aad_len, auth_tag, auth_tag_len); | ||
240 | } | ||
241 | } | ||
242 | |||
243 | static void aesni_gcm_dec_avx2(void *ctx, u8 *out, | ||
244 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
245 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
246 | u8 *auth_tag, unsigned long auth_tag_len) | ||
247 | { | ||
248 | if (ciphertext_len < AVX_GEN2_OPTSIZE) { | ||
249 | aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, | ||
250 | aad, aad_len, auth_tag, auth_tag_len); | ||
251 | } else if (ciphertext_len < AVX_GEN4_OPTSIZE) { | ||
252 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
253 | aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, | ||
254 | aad_len, auth_tag, auth_tag_len); | ||
255 | } else { | ||
256 | aesni_gcm_precomp_avx_gen4(ctx, hash_subkey); | ||
257 | aesni_gcm_dec_avx_gen4(ctx, out, in, ciphertext_len, iv, aad, | ||
258 | aad_len, auth_tag, auth_tag_len); | ||
259 | } | ||
260 | } | ||
261 | #endif | ||
262 | |||
263 | static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out, | ||
264 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
265 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
266 | u8 *auth_tag, unsigned long auth_tag_len); | ||
267 | |||
268 | static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out, | ||
269 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
270 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
271 | u8 *auth_tag, unsigned long auth_tag_len); | ||
272 | |||
153 | static inline struct | 273 | static inline struct |
154 | aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) | 274 | aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) |
155 | { | 275 | { |
@@ -915,7 +1035,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req) | |||
915 | dst = src; | 1035 | dst = src; |
916 | } | 1036 | } |
917 | 1037 | ||
918 | aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, | 1038 | aesni_gcm_enc_tfm(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, |
919 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst | 1039 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst |
920 | + ((unsigned long)req->cryptlen), auth_tag_len); | 1040 | + ((unsigned long)req->cryptlen), auth_tag_len); |
921 | 1041 | ||
@@ -996,12 +1116,12 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) | |||
996 | dst = src; | 1116 | dst = src; |
997 | } | 1117 | } |
998 | 1118 | ||
999 | aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv, | 1119 | aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv, |
1000 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, | 1120 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, |
1001 | authTag, auth_tag_len); | 1121 | authTag, auth_tag_len); |
1002 | 1122 | ||
1003 | /* Compare generated tag with passed in tag. */ | 1123 | /* Compare generated tag with passed in tag. */ |
1004 | retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ? | 1124 | retval = crypto_memneq(src + tempCipherLen, authTag, auth_tag_len) ? |
1005 | -EBADMSG : 0; | 1125 | -EBADMSG : 0; |
1006 | 1126 | ||
1007 | if (one_entry_in_sg) { | 1127 | if (one_entry_in_sg) { |
@@ -1353,6 +1473,27 @@ static int __init aesni_init(void) | |||
1353 | 1473 | ||
1354 | if (!x86_match_cpu(aesni_cpu_id)) | 1474 | if (!x86_match_cpu(aesni_cpu_id)) |
1355 | return -ENODEV; | 1475 | return -ENODEV; |
1476 | #ifdef CONFIG_X86_64 | ||
1477 | #ifdef CONFIG_AS_AVX2 | ||
1478 | if (boot_cpu_has(X86_FEATURE_AVX2)) { | ||
1479 | pr_info("AVX2 version of gcm_enc/dec engaged.\n"); | ||
1480 | aesni_gcm_enc_tfm = aesni_gcm_enc_avx2; | ||
1481 | aesni_gcm_dec_tfm = aesni_gcm_dec_avx2; | ||
1482 | } else | ||
1483 | #endif | ||
1484 | #ifdef CONFIG_AS_AVX | ||
1485 | if (boot_cpu_has(X86_FEATURE_AVX)) { | ||
1486 | pr_info("AVX version of gcm_enc/dec engaged.\n"); | ||
1487 | aesni_gcm_enc_tfm = aesni_gcm_enc_avx; | ||
1488 | aesni_gcm_dec_tfm = aesni_gcm_dec_avx; | ||
1489 | } else | ||
1490 | #endif | ||
1491 | { | ||
1492 | pr_info("SSE version of gcm_enc/dec engaged.\n"); | ||
1493 | aesni_gcm_enc_tfm = aesni_gcm_enc; | ||
1494 | aesni_gcm_dec_tfm = aesni_gcm_dec; | ||
1495 | } | ||
1496 | #endif | ||
1356 | 1497 | ||
1357 | err = crypto_fpu_init(); | 1498 | err = crypto_fpu_init(); |
1358 | if (err) | 1499 | if (err) |
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h index fd8f9e2ca35f..535192f6bfad 100644 --- a/arch/x86/include/asm/dmi.h +++ b/arch/x86/include/asm/dmi.h | |||
@@ -13,7 +13,9 @@ static __always_inline __init void *dmi_alloc(unsigned len) | |||
13 | } | 13 | } |
14 | 14 | ||
15 | /* Use early IO mappings for DMI because it's initialized early */ | 15 | /* Use early IO mappings for DMI because it's initialized early */ |
16 | #define dmi_ioremap early_ioremap | 16 | #define dmi_early_remap early_ioremap |
17 | #define dmi_iounmap early_iounmap | 17 | #define dmi_early_unmap early_iounmap |
18 | #define dmi_remap ioremap | ||
19 | #define dmi_unmap iounmap | ||
18 | 20 | ||
19 | #endif /* _ASM_X86_DMI_H */ | 21 | #endif /* _ASM_X86_DMI_H */ |
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index e846225265ed..7252cd339175 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -175,64 +175,7 @@ static inline void __set_fixmap(enum fixed_addresses idx, | |||
175 | } | 175 | } |
176 | #endif | 176 | #endif |
177 | 177 | ||
178 | #define set_fixmap(idx, phys) \ | 178 | #include <asm-generic/fixmap.h> |
179 | __set_fixmap(idx, phys, PAGE_KERNEL) | ||
180 | |||
181 | /* | ||
182 | * Some hardware wants to get fixmapped without caching. | ||
183 | */ | ||
184 | #define set_fixmap_nocache(idx, phys) \ | ||
185 | __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) | ||
186 | |||
187 | #define clear_fixmap(idx) \ | ||
188 | __set_fixmap(idx, 0, __pgprot(0)) | ||
189 | |||
190 | #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) | ||
191 | #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) | ||
192 | |||
193 | extern void __this_fixmap_does_not_exist(void); | ||
194 | |||
195 | /* | ||
196 | * 'index to address' translation. If anyone tries to use the idx | ||
197 | * directly without translation, we catch the bug with a NULL-deference | ||
198 | * kernel oops. Illegal ranges of incoming indices are caught too. | ||
199 | */ | ||
200 | static __always_inline unsigned long fix_to_virt(const unsigned int idx) | ||
201 | { | ||
202 | /* | ||
203 | * this branch gets completely eliminated after inlining, | ||
204 | * except when someone tries to use fixaddr indices in an | ||
205 | * illegal way. (such as mixing up address types or using | ||
206 | * out-of-range indices). | ||
207 | * | ||
208 | * If it doesn't get removed, the linker will complain | ||
209 | * loudly with a reasonably clear error message.. | ||
210 | */ | ||
211 | if (idx >= __end_of_fixed_addresses) | ||
212 | __this_fixmap_does_not_exist(); | ||
213 | |||
214 | return __fix_to_virt(idx); | ||
215 | } | ||
216 | |||
217 | static inline unsigned long virt_to_fix(const unsigned long vaddr) | ||
218 | { | ||
219 | BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); | ||
220 | return __virt_to_fix(vaddr); | ||
221 | } | ||
222 | |||
223 | /* Return an pointer with offset calculated */ | ||
224 | static __always_inline unsigned long | ||
225 | __set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) | ||
226 | { | ||
227 | __set_fixmap(idx, phys, flags); | ||
228 | return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1)); | ||
229 | } | ||
230 | |||
231 | #define set_fixmap_offset(idx, phys) \ | ||
232 | __set_fixmap_offset(idx, phys, PAGE_KERNEL) | ||
233 | |||
234 | #define set_fixmap_offset_nocache(idx, phys) \ | ||
235 | __set_fixmap_offset(idx, phys, PAGE_KERNEL_NOCACHE) | ||
236 | 179 | ||
237 | #endif /* !__ASSEMBLY__ */ | 180 | #endif /* !__ASSEMBLY__ */ |
238 | #endif /* _ASM_X86_FIXMAP_H */ | 181 | #endif /* _ASM_X86_FIXMAP_H */ |
diff --git a/arch/x86/include/asm/hash.h b/arch/x86/include/asm/hash.h new file mode 100644 index 000000000000..e8c58f88b1d4 --- /dev/null +++ b/arch/x86/include/asm/hash.h | |||
@@ -0,0 +1,7 @@ | |||
1 | #ifndef _ASM_X86_HASH_H | ||
2 | #define _ASM_X86_HASH_H | ||
3 | |||
4 | struct fast_hash_ops; | ||
5 | extern void setup_arch_fast_hash(struct fast_hash_ops *ops); | ||
6 | |||
7 | #endif /* _ASM_X86_HASH_H */ | ||
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ae5d7830855c..fdf83afbb7d9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -605,6 +605,7 @@ struct kvm_arch { | |||
605 | /* fields used by HYPER-V emulation */ | 605 | /* fields used by HYPER-V emulation */ |
606 | u64 hv_guest_os_id; | 606 | u64 hv_guest_os_id; |
607 | u64 hv_hypercall; | 607 | u64 hv_hypercall; |
608 | u64 hv_tsc_page; | ||
608 | 609 | ||
609 | #ifdef CONFIG_KVM_MMU_AUDIT | 610 | #ifdef CONFIG_KVM_MMU_AUDIT |
610 | int audit_point; | 611 | int audit_point; |
@@ -699,6 +700,8 @@ struct kvm_x86_ops { | |||
699 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | 700 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
700 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | 701 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
701 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | 702 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
703 | u64 (*get_dr6)(struct kvm_vcpu *vcpu); | ||
704 | void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); | ||
702 | void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); | 705 | void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); |
703 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); | 706 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
704 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 707 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 1df115909758..c7678e43465b 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -85,28 +85,9 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, | |||
85 | return ret; | 85 | return ret; |
86 | } | 86 | } |
87 | 87 | ||
88 | static inline uint32_t kvm_cpuid_base(void) | ||
89 | { | ||
90 | if (boot_cpu_data.cpuid_level < 0) | ||
91 | return 0; /* So we don't blow up on old processors */ | ||
92 | |||
93 | if (cpu_has_hypervisor) | ||
94 | return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0); | ||
95 | |||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | static inline bool kvm_para_available(void) | ||
100 | { | ||
101 | return kvm_cpuid_base() != 0; | ||
102 | } | ||
103 | |||
104 | static inline unsigned int kvm_arch_para_features(void) | ||
105 | { | ||
106 | return cpuid_eax(KVM_CPUID_FEATURES); | ||
107 | } | ||
108 | |||
109 | #ifdef CONFIG_KVM_GUEST | 88 | #ifdef CONFIG_KVM_GUEST |
89 | bool kvm_para_available(void); | ||
90 | unsigned int kvm_arch_para_features(void); | ||
110 | void __init kvm_guest_init(void); | 91 | void __init kvm_guest_init(void); |
111 | void kvm_async_pf_task_wait(u32 token); | 92 | void kvm_async_pf_task_wait(u32 token); |
112 | void kvm_async_pf_task_wake(u32 token); | 93 | void kvm_async_pf_task_wake(u32 token); |
@@ -126,6 +107,16 @@ static inline void kvm_spinlock_init(void) | |||
126 | #define kvm_async_pf_task_wait(T) do {} while(0) | 107 | #define kvm_async_pf_task_wait(T) do {} while(0) |
127 | #define kvm_async_pf_task_wake(T) do {} while(0) | 108 | #define kvm_async_pf_task_wake(T) do {} while(0) |
128 | 109 | ||
110 | static inline bool kvm_para_available(void) | ||
111 | { | ||
112 | return 0; | ||
113 | } | ||
114 | |||
115 | static inline unsigned int kvm_arch_para_features(void) | ||
116 | { | ||
117 | return 0; | ||
118 | } | ||
119 | |||
129 | static inline u32 kvm_read_and_reset_pf_reason(void) | 120 | static inline u32 kvm_read_and_reset_pf_reason(void) |
130 | { | 121 | { |
131 | return 0; | 122 | return 0; |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 401f350ef71b..cd6e1610e29e 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -781,9 +781,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock, | |||
781 | */ | 781 | */ |
782 | #define PV_CALLEE_SAVE_REGS_THUNK(func) \ | 782 | #define PV_CALLEE_SAVE_REGS_THUNK(func) \ |
783 | extern typeof(func) __raw_callee_save_##func; \ | 783 | extern typeof(func) __raw_callee_save_##func; \ |
784 | static void *__##func##__ __used = func; \ | ||
785 | \ | 784 | \ |
786 | asm(".pushsection .text;" \ | 785 | asm(".pushsection .text;" \ |
786 | ".globl __raw_callee_save_" #func " ; " \ | ||
787 | "__raw_callee_save_" #func ": " \ | 787 | "__raw_callee_save_" #func ": " \ |
788 | PV_SAVE_ALL_CALLER_REGS \ | 788 | PV_SAVE_ALL_CALLER_REGS \ |
789 | "call " #func ";" \ | 789 | "call " #func ";" \ |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index aab8f671b523..7549b8b369e4 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -388,10 +388,11 @@ extern struct pv_lock_ops pv_lock_ops; | |||
388 | _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") | 388 | _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") |
389 | 389 | ||
390 | /* Simple instruction patching code. */ | 390 | /* Simple instruction patching code. */ |
391 | #define DEF_NATIVE(ops, name, code) \ | 391 | #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t" |
392 | extern const char start_##ops##_##name[] __visible, \ | 392 | |
393 | end_##ops##_##name[] __visible; \ | 393 | #define DEF_NATIVE(ops, name, code) \ |
394 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | 394 | __visible extern const char start_##ops##_##name[], end_##ops##_##name[]; \ |
395 | asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name)) | ||
395 | 396 | ||
396 | unsigned paravirt_patch_nop(void); | 397 | unsigned paravirt_patch_nop(void); |
397 | unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); | 398 | unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 947b5c417e83..1ac6114c9ea5 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -104,7 +104,7 @@ extern void pci_iommu_alloc(void); | |||
104 | struct msi_desc; | 104 | struct msi_desc; |
105 | int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); | 105 | int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); |
106 | void native_teardown_msi_irq(unsigned int irq); | 106 | void native_teardown_msi_irq(unsigned int irq); |
107 | void native_restore_msi_irqs(struct pci_dev *dev, int irq); | 107 | void native_restore_msi_irqs(struct pci_dev *dev); |
108 | int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, | 108 | int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, |
109 | unsigned int irq_base, unsigned int irq_offset); | 109 | unsigned int irq_base, unsigned int irq_offset); |
110 | #else | 110 | #else |
@@ -125,7 +125,6 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, | |||
125 | 125 | ||
126 | /* generic pci stuff */ | 126 | /* generic pci stuff */ |
127 | #include <asm-generic/pci.h> | 127 | #include <asm-generic/pci.h> |
128 | #define PCIBIOS_MAX_MEM_32 0xffffffff | ||
129 | 128 | ||
130 | #ifdef CONFIG_NUMA | 129 | #ifdef CONFIG_NUMA |
131 | /* Returns the node based on pci bus */ | 130 | /* Returns the node based on pci bus */ |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index a83aa44bb1fb..1aa9ccd43223 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -121,7 +121,8 @@ | |||
121 | 121 | ||
122 | /* Set of bits not changed in pte_modify */ | 122 | /* Set of bits not changed in pte_modify */ |
123 | #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ | 123 | #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ |
124 | _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) | 124 | _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ |
125 | _PAGE_SOFT_DIRTY) | ||
125 | #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) | 126 | #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) |
126 | 127 | ||
127 | #define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) | 128 | #define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 3ba3de457d05..e1940c06ed02 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -163,9 +163,11 @@ struct thread_info { | |||
163 | */ | 163 | */ |
164 | #ifndef __ASSEMBLY__ | 164 | #ifndef __ASSEMBLY__ |
165 | 165 | ||
166 | 166 | #define current_stack_pointer ({ \ | |
167 | /* how to get the current stack pointer from C */ | 167 | unsigned long sp; \ |
168 | register unsigned long current_stack_pointer asm("esp") __used; | 168 | asm("mov %%esp,%0" : "=g" (sp)); \ |
169 | sp; \ | ||
170 | }) | ||
169 | 171 | ||
170 | /* how to get the thread information struct from C */ | 172 | /* how to get the thread information struct from C */ |
171 | static inline struct thread_info *current_thread_info(void) | 173 | static inline struct thread_info *current_thread_info(void) |
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 6b964a0b86d1..062921ef34e9 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h | |||
@@ -12,7 +12,6 @@ extern enum uv_system_type get_uv_system_type(void); | |||
12 | extern int is_uv_system(void); | 12 | extern int is_uv_system(void); |
13 | extern void uv_cpu_init(void); | 13 | extern void uv_cpu_init(void); |
14 | extern void uv_nmi_init(void); | 14 | extern void uv_nmi_init(void); |
15 | extern void uv_register_nmi_notifier(void); | ||
16 | extern void uv_system_init(void); | 15 | extern void uv_system_init(void); |
17 | extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | 16 | extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, |
18 | struct mm_struct *mm, | 17 | struct mm_struct *mm, |
@@ -26,7 +25,6 @@ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } | |||
26 | static inline int is_uv_system(void) { return 0; } | 25 | static inline int is_uv_system(void) { return 0; } |
27 | static inline void uv_cpu_init(void) { } | 26 | static inline void uv_cpu_init(void) { } |
28 | static inline void uv_system_init(void) { } | 27 | static inline void uv_system_init(void) { } |
29 | static inline void uv_register_nmi_notifier(void) { } | ||
30 | static inline const struct cpumask * | 28 | static inline const struct cpumask * |
31 | uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, | 29 | uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, |
32 | unsigned long start, unsigned long end, unsigned int cpu) | 30 | unsigned long start, unsigned long end, unsigned int cpu) |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 966502d4682e..2067264fb7f5 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -100,6 +100,7 @@ | |||
100 | 100 | ||
101 | #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f | 101 | #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f |
102 | #define VMX_MISC_SAVE_EFER_LMA 0x00000020 | 102 | #define VMX_MISC_SAVE_EFER_LMA 0x00000020 |
103 | #define VMX_MISC_ACTIVITY_HLT 0x00000040 | ||
103 | 104 | ||
104 | /* VMCS Encodings */ | 105 | /* VMCS Encodings */ |
105 | enum vmcs_field { | 106 | enum vmcs_field { |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 0f1be11e43d2..e45e4da96bf1 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -181,7 +181,7 @@ struct x86_msi_ops { | |||
181 | u8 hpet_id); | 181 | u8 hpet_id); |
182 | void (*teardown_msi_irq)(unsigned int irq); | 182 | void (*teardown_msi_irq)(unsigned int irq); |
183 | void (*teardown_msi_irqs)(struct pci_dev *dev); | 183 | void (*teardown_msi_irqs)(struct pci_dev *dev); |
184 | void (*restore_msi_irqs)(struct pci_dev *dev, int irq); | 184 | void (*restore_msi_irqs)(struct pci_dev *dev); |
185 | int (*setup_hpet_msi)(unsigned int irq, unsigned int id); | 185 | int (*setup_hpet_msi)(unsigned int irq, unsigned int id); |
186 | u32 (*msi_mask_irq)(struct msi_desc *desc, u32 mask, u32 flag); | 186 | u32 (*msi_mask_irq)(struct msi_desc *desc, u32 mask, u32 flag); |
187 | u32 (*msix_mask_irq)(struct msi_desc *desc, u32 flag); | 187 | u32 (*msix_mask_irq)(struct msi_desc *desc, u32 flag); |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index b913915e8e63..787e1bb5aafc 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -52,7 +52,8 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s, | |||
52 | extern int m2p_add_override(unsigned long mfn, struct page *page, | 52 | extern int m2p_add_override(unsigned long mfn, struct page *page, |
53 | struct gnttab_map_grant_ref *kmap_op); | 53 | struct gnttab_map_grant_ref *kmap_op); |
54 | extern int m2p_remove_override(struct page *page, | 54 | extern int m2p_remove_override(struct page *page, |
55 | struct gnttab_map_grant_ref *kmap_op); | 55 | struct gnttab_map_grant_ref *kmap_op, |
56 | unsigned long mfn); | ||
56 | extern struct page *m2p_find_override(unsigned long mfn); | 57 | extern struct page *m2p_find_override(unsigned long mfn); |
57 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); | 58 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); |
58 | 59 | ||
@@ -121,7 +122,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
121 | pfn = m2p_find_override_pfn(mfn, ~0); | 122 | pfn = m2p_find_override_pfn(mfn, ~0); |
122 | } | 123 | } |
123 | 124 | ||
124 | /* | 125 | /* |
125 | * pfn is ~0 if there are no entries in the m2p for mfn or if the | 126 | * pfn is ~0 if there are no entries in the m2p for mfn or if the |
126 | * entry doesn't map back to the mfn and m2p_override doesn't have a | 127 | * entry doesn't map back to the mfn and m2p_override doesn't have a |
127 | * valid entry for it. | 128 | * valid entry for it. |
@@ -167,7 +168,12 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine) | |||
167 | */ | 168 | */ |
168 | static inline unsigned long mfn_to_local_pfn(unsigned long mfn) | 169 | static inline unsigned long mfn_to_local_pfn(unsigned long mfn) |
169 | { | 170 | { |
170 | unsigned long pfn = mfn_to_pfn(mfn); | 171 | unsigned long pfn; |
172 | |||
173 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
174 | return mfn; | ||
175 | |||
176 | pfn = mfn_to_pfn(mfn); | ||
171 | if (get_phys_to_machine(pfn) != mfn) | 177 | if (get_phys_to_machine(pfn) != mfn) |
172 | return -1; /* force !pfn_valid() */ | 178 | return -1; /* force !pfn_valid() */ |
173 | return pfn; | 179 | return pfn; |
@@ -222,5 +228,6 @@ void make_lowmem_page_readonly(void *vaddr); | |||
222 | void make_lowmem_page_readwrite(void *vaddr); | 228 | void make_lowmem_page_readwrite(void *vaddr); |
223 | 229 | ||
224 | #define xen_remap(cookie, size) ioremap((cookie), (size)); | 230 | #define xen_remap(cookie, size) ioremap((cookie), (size)); |
231 | #define xen_unmap(cookie) iounmap((cookie)) | ||
225 | 232 | ||
226 | #endif /* _ASM_X86_XEN_PAGE_H */ | 233 | #endif /* _ASM_X86_XEN_PAGE_H */ |
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index b8f1c0176cbc..462efe746d77 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h | |||
@@ -28,6 +28,9 @@ | |||
28 | /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ | 28 | /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ |
29 | #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) | 29 | #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) |
30 | 30 | ||
31 | /* A partition's reference time stamp counter (TSC) page */ | ||
32 | #define HV_X64_MSR_REFERENCE_TSC 0x40000021 | ||
33 | |||
31 | /* | 34 | /* |
32 | * There is a single feature flag that signifies the presence of the MSR | 35 | * There is a single feature flag that signifies the presence of the MSR |
33 | * that can be used to retrieve both the local APIC Timer frequency as | 36 | * that can be used to retrieve both the local APIC Timer frequency as |
@@ -198,6 +201,9 @@ | |||
198 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ | 201 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ |
199 | (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) | 202 | (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) |
200 | 203 | ||
204 | #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 | ||
205 | #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 | ||
206 | |||
201 | #define HV_PROCESSOR_POWER_STATE_C0 0 | 207 | #define HV_PROCESSOR_POWER_STATE_C0 0 |
202 | #define HV_PROCESSOR_POWER_STATE_C1 1 | 208 | #define HV_PROCESSOR_POWER_STATE_C1 1 |
203 | #define HV_PROCESSOR_POWER_STATE_C2 2 | 209 | #define HV_PROCESSOR_POWER_STATE_C2 2 |
@@ -210,4 +216,11 @@ | |||
210 | #define HV_STATUS_INVALID_ALIGNMENT 4 | 216 | #define HV_STATUS_INVALID_ALIGNMENT 4 |
211 | #define HV_STATUS_INSUFFICIENT_BUFFERS 19 | 217 | #define HV_STATUS_INSUFFICIENT_BUFFERS 19 |
212 | 218 | ||
219 | typedef struct _HV_REFERENCE_TSC_PAGE { | ||
220 | __u32 tsc_sequence; | ||
221 | __u32 res1; | ||
222 | __u64 tsc_scale; | ||
223 | __s64 tsc_offset; | ||
224 | } HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; | ||
225 | |||
213 | #endif | 226 | #endif |
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 59cea185ad1d..c19fc60ff062 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h | |||
@@ -528,6 +528,7 @@ | |||
528 | #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e | 528 | #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e |
529 | #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f | 529 | #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f |
530 | #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 | 530 | #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 |
531 | #define MSR_IA32_VMX_VMFUNC 0x00000491 | ||
531 | 532 | ||
532 | /* VMX_BASIC bits and bitmasks */ | 533 | /* VMX_BASIC bits and bitmasks */ |
533 | #define VMX_BASIC_VMCS_SIZE_SHIFT 32 | 534 | #define VMX_BASIC_VMCS_SIZE_SHIFT 32 |
diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h index ee50c801f7b7..cc2d6a3aeae7 100644 --- a/arch/x86/include/uapi/asm/sembuf.h +++ b/arch/x86/include/uapi/asm/sembuf.h | |||
@@ -13,12 +13,12 @@ | |||
13 | struct semid64_ds { | 13 | struct semid64_ds { |
14 | struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ | 14 | struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ |
15 | __kernel_time_t sem_otime; /* last semop time */ | 15 | __kernel_time_t sem_otime; /* last semop time */ |
16 | unsigned long __unused1; | 16 | __kernel_ulong_t __unused1; |
17 | __kernel_time_t sem_ctime; /* last change time */ | 17 | __kernel_time_t sem_ctime; /* last change time */ |
18 | unsigned long __unused2; | 18 | __kernel_ulong_t __unused2; |
19 | unsigned long sem_nsems; /* no. of semaphores in array */ | 19 | __kernel_ulong_t sem_nsems; /* no. of semaphores in array */ |
20 | unsigned long __unused3; | 20 | __kernel_ulong_t __unused3; |
21 | unsigned long __unused4; | 21 | __kernel_ulong_t __unused4; |
22 | }; | 22 | }; |
23 | 23 | ||
24 | #endif /* _ASM_X86_SEMBUF_H */ | 24 | #endif /* _ASM_X86_SEMBUF_H */ |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 6c0b43bd024b..1dac94265b59 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -46,7 +46,6 @@ | |||
46 | 46 | ||
47 | #include "sleep.h" /* To include x86_acpi_suspend_lowlevel */ | 47 | #include "sleep.h" /* To include x86_acpi_suspend_lowlevel */ |
48 | static int __initdata acpi_force = 0; | 48 | static int __initdata acpi_force = 0; |
49 | u32 acpi_rsdt_forced; | ||
50 | int acpi_disabled; | 49 | int acpi_disabled; |
51 | EXPORT_SYMBOL(acpi_disabled); | 50 | EXPORT_SYMBOL(acpi_disabled); |
52 | 51 | ||
@@ -1034,9 +1033,7 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, | |||
1034 | 1033 | ||
1035 | if (!acpi_ioapic) | 1034 | if (!acpi_ioapic) |
1036 | return 0; | 1035 | return 0; |
1037 | if (!dev) | 1036 | if (!dev || !dev_is_pci(dev)) |
1038 | return 0; | ||
1039 | if (dev->bus != &pci_bus_type) | ||
1040 | return 0; | 1037 | return 0; |
1041 | 1038 | ||
1042 | pdev = to_pci_dev(dev); | 1039 | pdev = to_pci_dev(dev); |
@@ -1564,7 +1561,7 @@ static int __init parse_acpi(char *arg) | |||
1564 | } | 1561 | } |
1565 | /* acpi=rsdt use RSDT instead of XSDT */ | 1562 | /* acpi=rsdt use RSDT instead of XSDT */ |
1566 | else if (strcmp(arg, "rsdt") == 0) { | 1563 | else if (strcmp(arg, "rsdt") == 0) { |
1567 | acpi_rsdt_forced = 1; | 1564 | acpi_gbl_do_not_use_xsdt = TRUE; |
1568 | } | 1565 | } |
1569 | /* "acpi=noirq" disables ACPI interrupt routing */ | 1566 | /* "acpi=noirq" disables ACPI interrupt routing */ |
1570 | else if (strcmp(arg, "noirq") == 0) { | 1567 | else if (strcmp(arg, "noirq") == 0) { |
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 5d5b9eb2b7a4..2c621a6b901a 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -20,9 +20,7 @@ | |||
20 | #include <asm/apic.h> | 20 | #include <asm/apic.h> |
21 | #include <asm/ipi.h> | 21 | #include <asm/ipi.h> |
22 | 22 | ||
23 | #ifdef CONFIG_ACPI | 23 | #include <linux/acpi.h> |
24 | #include <acpi/acpi_bus.h> | ||
25 | #endif | ||
26 | 24 | ||
27 | static struct apic apic_physflat; | 25 | static struct apic apic_physflat; |
28 | static struct apic apic_flat; | 26 | static struct apic apic_flat; |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index a43f068ebec1..6ad4658de705 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -37,9 +37,6 @@ | |||
37 | #include <linux/kthread.h> | 37 | #include <linux/kthread.h> |
38 | #include <linux/jiffies.h> /* time_after() */ | 38 | #include <linux/jiffies.h> /* time_after() */ |
39 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
40 | #ifdef CONFIG_ACPI | ||
41 | #include <acpi/acpi_bus.h> | ||
42 | #endif | ||
43 | #include <linux/bootmem.h> | 40 | #include <linux/bootmem.h> |
44 | #include <linux/dmar.h> | 41 | #include <linux/dmar.h> |
45 | #include <linux/hpet.h> | 42 | #include <linux/hpet.h> |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index ad0dc0428baf..d263b1307de1 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -980,7 +980,6 @@ void __init uv_system_init(void) | |||
980 | uv_nmi_setup(); | 980 | uv_nmi_setup(); |
981 | uv_cpu_init(); | 981 | uv_cpu_init(); |
982 | uv_scir_register_cpu_notifier(); | 982 | uv_scir_register_cpu_notifier(); |
983 | uv_register_nmi_notifier(); | ||
984 | proc_mkdir("sgi_uv", NULL); | 983 | proc_mkdir("sgi_uv", NULL); |
985 | 984 | ||
986 | /* register Legacy VGA I/O redirection handler */ | 985 | /* register Legacy VGA I/O redirection handler */ |
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index e2dbcb7dabdd..83a7995625a6 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c | |||
@@ -91,7 +91,7 @@ void __init setup_bios_corruption_check(void) | |||
91 | 91 | ||
92 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); | 92 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); |
93 | 93 | ||
94 | for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) { | 94 | for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL) { |
95 | start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE), | 95 | start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE), |
96 | PAGE_SIZE, corruption_check_size); | 96 | PAGE_SIZE, corruption_check_size); |
97 | end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE), | 97 | end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE), |
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 4a6ff747aaad..8fffd845e22b 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c | |||
@@ -433,7 +433,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, | |||
433 | if (c->x86 >= 0x15) | 433 | if (c->x86 >= 0x15) |
434 | snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); | 434 | snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); |
435 | 435 | ||
436 | if (request_firmware(&fw, (const char *)fw_name, device)) { | 436 | if (request_firmware_direct(&fw, (const char *)fw_name, device)) { |
437 | pr_debug("failed to load file %s\n", fw_name); | 437 | pr_debug("failed to load file %s\n", fw_name); |
438 | goto out; | 438 | goto out; |
439 | } | 439 | } |
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 5fb2cebf556b..a276fa75d9b5 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c | |||
@@ -278,7 +278,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device, | |||
278 | sprintf(name, "intel-ucode/%02x-%02x-%02x", | 278 | sprintf(name, "intel-ucode/%02x-%02x-%02x", |
279 | c->x86, c->x86_model, c->x86_mask); | 279 | c->x86, c->x86_model, c->x86_mask); |
280 | 280 | ||
281 | if (request_firmware(&firmware, name, device)) { | 281 | if (request_firmware_direct(&firmware, name, device)) { |
282 | pr_debug("data file %s load failed\n", name); | 282 | pr_debug("data file %s load failed\n", name); |
283 | return UCODE_NFOUND; | 283 | return UCODE_NFOUND; |
284 | } | 284 | } |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 174da5fc5a7b..988c00a1f60d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -1120,7 +1120,7 @@ void __init memblock_find_dma_reserve(void) | |||
1120 | nr_pages += end_pfn - start_pfn; | 1120 | nr_pages += end_pfn - start_pfn; |
1121 | } | 1121 | } |
1122 | 1122 | ||
1123 | for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) { | 1123 | for_each_free_mem_range(u, NUMA_NO_NODE, &start, &end, NULL) { |
1124 | start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN); | 1124 | start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN); |
1125 | end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN); | 1125 | end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN); |
1126 | if (start_pfn < end_pfn) | 1126 | if (start_pfn < end_pfn) |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 6dd802c6d780..713f1b3bad52 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -500,6 +500,38 @@ void __init kvm_guest_init(void) | |||
500 | #endif | 500 | #endif |
501 | } | 501 | } |
502 | 502 | ||
503 | static noinline uint32_t __kvm_cpuid_base(void) | ||
504 | { | ||
505 | if (boot_cpu_data.cpuid_level < 0) | ||
506 | return 0; /* So we don't blow up on old processors */ | ||
507 | |||
508 | if (cpu_has_hypervisor) | ||
509 | return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0); | ||
510 | |||
511 | return 0; | ||
512 | } | ||
513 | |||
514 | static inline uint32_t kvm_cpuid_base(void) | ||
515 | { | ||
516 | static int kvm_cpuid_base = -1; | ||
517 | |||
518 | if (kvm_cpuid_base == -1) | ||
519 | kvm_cpuid_base = __kvm_cpuid_base(); | ||
520 | |||
521 | return kvm_cpuid_base; | ||
522 | } | ||
523 | |||
524 | bool kvm_para_available(void) | ||
525 | { | ||
526 | return kvm_cpuid_base() != 0; | ||
527 | } | ||
528 | EXPORT_SYMBOL_GPL(kvm_para_available); | ||
529 | |||
530 | unsigned int kvm_arch_para_features(void) | ||
531 | { | ||
532 | return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES); | ||
533 | } | ||
534 | |||
503 | static uint32_t __init kvm_detect(void) | 535 | static uint32_t __init kvm_detect(void) |
504 | { | 536 | { |
505 | return kvm_cpuid_base(); | 537 | return kvm_cpuid_base(); |
@@ -673,7 +705,7 @@ static cpumask_t waiting_cpus; | |||
673 | /* Track spinlock on which a cpu is waiting */ | 705 | /* Track spinlock on which a cpu is waiting */ |
674 | static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting); | 706 | static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting); |
675 | 707 | ||
676 | static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | 708 | __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) |
677 | { | 709 | { |
678 | struct kvm_lock_waiting *w; | 710 | struct kvm_lock_waiting *w; |
679 | int cpu; | 711 | int cpu; |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index a3acbac2ee72..19e5adb49a27 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -180,7 +180,7 @@ static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data) | |||
180 | 180 | ||
181 | static void cyc2ns_data_init(struct cyc2ns_data *data) | 181 | static void cyc2ns_data_init(struct cyc2ns_data *data) |
182 | { | 182 | { |
183 | data->cyc2ns_mul = 1U << CYC2NS_SCALE_FACTOR; | 183 | data->cyc2ns_mul = 0; |
184 | data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; | 184 | data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; |
185 | data->cyc2ns_offset = 0; | 185 | data->cyc2ns_offset = 0; |
186 | data->__count = 0; | 186 | data->__count = 0; |
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 992f890283e9..f6584a90aba3 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c | |||
@@ -33,7 +33,7 @@ | |||
33 | * and vice versa. | 33 | * and vice versa. |
34 | */ | 34 | */ |
35 | 35 | ||
36 | static unsigned long vsmp_save_fl(void) | 36 | asmlinkage unsigned long vsmp_save_fl(void) |
37 | { | 37 | { |
38 | unsigned long flags = native_save_fl(); | 38 | unsigned long flags = native_save_fl(); |
39 | 39 | ||
@@ -43,7 +43,7 @@ static unsigned long vsmp_save_fl(void) | |||
43 | } | 43 | } |
44 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); | 44 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); |
45 | 45 | ||
46 | static void vsmp_restore_fl(unsigned long flags) | 46 | __visible void vsmp_restore_fl(unsigned long flags) |
47 | { | 47 | { |
48 | if (flags & X86_EFLAGS_IF) | 48 | if (flags & X86_EFLAGS_IF) |
49 | flags &= ~X86_EFLAGS_AC; | 49 | flags &= ~X86_EFLAGS_AC; |
@@ -53,7 +53,7 @@ static void vsmp_restore_fl(unsigned long flags) | |||
53 | } | 53 | } |
54 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); | 54 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); |
55 | 55 | ||
56 | static void vsmp_irq_disable(void) | 56 | asmlinkage void vsmp_irq_disable(void) |
57 | { | 57 | { |
58 | unsigned long flags = native_save_fl(); | 58 | unsigned long flags = native_save_fl(); |
59 | 59 | ||
@@ -61,7 +61,7 @@ static void vsmp_irq_disable(void) | |||
61 | } | 61 | } |
62 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); | 62 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); |
63 | 63 | ||
64 | static void vsmp_irq_enable(void) | 64 | asmlinkage void vsmp_irq_enable(void) |
65 | { | 65 | { |
66 | unsigned long flags = native_save_fl(); | 66 | unsigned long flags = native_save_fl(); |
67 | 67 | ||
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 021783b1f46a..e48b674639cc 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -136,9 +136,9 @@ void arch_teardown_msi_irq(unsigned int irq) | |||
136 | x86_msi.teardown_msi_irq(irq); | 136 | x86_msi.teardown_msi_irq(irq); |
137 | } | 137 | } |
138 | 138 | ||
139 | void arch_restore_msi_irqs(struct pci_dev *dev, int irq) | 139 | void arch_restore_msi_irqs(struct pci_dev *dev) |
140 | { | 140 | { |
141 | x86_msi.restore_msi_irqs(dev, irq); | 141 | x86_msi.restore_msi_irqs(dev); |
142 | } | 142 | } |
143 | u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) | 143 | u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) |
144 | { | 144 | { |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b89c5db2b832..287e4c85fff9 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -80,7 +80,7 @@ config KVM_MMU_AUDIT | |||
80 | depends on KVM && TRACEPOINTS | 80 | depends on KVM && TRACEPOINTS |
81 | ---help--- | 81 | ---help--- |
82 | This option adds a R/W kVM module parameter 'mmu_audit', which allows | 82 | This option adds a R/W kVM module parameter 'mmu_audit', which allows |
83 | audit KVM MMU at runtime. | 83 | auditing of KVM MMU events at runtime. |
84 | 84 | ||
85 | config KVM_DEVICE_ASSIGNMENT | 85 | config KVM_DEVICE_ASSIGNMENT |
86 | bool "KVM legacy PCI device assignment support" | 86 | bool "KVM legacy PCI device assignment support" |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index f1e4895174b2..a2a1bb7ed8c1 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -72,4 +72,12 @@ static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu) | |||
72 | return best && (best->ecx & bit(X86_FEATURE_PCID)); | 72 | return best && (best->ecx & bit(X86_FEATURE_PCID)); |
73 | } | 73 | } |
74 | 74 | ||
75 | static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu) | ||
76 | { | ||
77 | struct kvm_cpuid_entry2 *best; | ||
78 | |||
79 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
80 | return best && (best->ecx & bit(X86_FEATURE_X2APIC)); | ||
81 | } | ||
82 | |||
75 | #endif | 83 | #endif |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 412a5aa0ef94..518d86471b76 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -37,6 +37,7 @@ | |||
37 | 37 | ||
38 | #include "irq.h" | 38 | #include "irq.h" |
39 | #include "i8254.h" | 39 | #include "i8254.h" |
40 | #include "x86.h" | ||
40 | 41 | ||
41 | #ifndef CONFIG_X86_64 | 42 | #ifndef CONFIG_X86_64 |
42 | #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) | 43 | #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) |
@@ -349,6 +350,23 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) | |||
349 | atomic_set(&ps->pending, 0); | 350 | atomic_set(&ps->pending, 0); |
350 | ps->irq_ack = 1; | 351 | ps->irq_ack = 1; |
351 | 352 | ||
353 | /* | ||
354 | * Do not allow the guest to program periodic timers with small | ||
355 | * interval, since the hrtimers are not throttled by the host | ||
356 | * scheduler. | ||
357 | */ | ||
358 | if (ps->is_periodic) { | ||
359 | s64 min_period = min_timer_period_us * 1000LL; | ||
360 | |||
361 | if (ps->period < min_period) { | ||
362 | pr_info_ratelimited( | ||
363 | "kvm: requested %lld ns " | ||
364 | "i8254 timer period limited to %lld ns\n", | ||
365 | ps->period, min_period); | ||
366 | ps->period = min_period; | ||
367 | } | ||
368 | } | ||
369 | |||
352 | hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), | 370 | hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), |
353 | HRTIMER_MODE_ABS); | 371 | HRTIMER_MODE_ABS); |
354 | } | 372 | } |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 775702f649ca..9736529ade08 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -71,9 +71,6 @@ | |||
71 | #define VEC_POS(v) ((v) & (32 - 1)) | 71 | #define VEC_POS(v) ((v) & (32 - 1)) |
72 | #define REG_POS(v) (((v) >> 5) << 4) | 72 | #define REG_POS(v) (((v) >> 5) << 4) |
73 | 73 | ||
74 | static unsigned int min_timer_period_us = 500; | ||
75 | module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); | ||
76 | |||
77 | static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) | 74 | static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) |
78 | { | 75 | { |
79 | *((u32 *) (apic->regs + reg_off)) = val; | 76 | *((u32 *) (apic->regs + reg_off)) = val; |
@@ -435,7 +432,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) | |||
435 | u8 val; | 432 | u8 val; |
436 | if (pv_eoi_get_user(vcpu, &val) < 0) | 433 | if (pv_eoi_get_user(vcpu, &val) < 0) |
437 | apic_debug("Can't read EOI MSR value: 0x%llx\n", | 434 | apic_debug("Can't read EOI MSR value: 0x%llx\n", |
438 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | 435 | (unsigned long long)vcpu->arch.pv_eoi.msr_val); |
439 | return val & 0x1; | 436 | return val & 0x1; |
440 | } | 437 | } |
441 | 438 | ||
@@ -443,7 +440,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) | |||
443 | { | 440 | { |
444 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { | 441 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { |
445 | apic_debug("Can't set EOI MSR value: 0x%llx\n", | 442 | apic_debug("Can't set EOI MSR value: 0x%llx\n", |
446 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | 443 | (unsigned long long)vcpu->arch.pv_eoi.msr_val); |
447 | return; | 444 | return; |
448 | } | 445 | } |
449 | __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | 446 | __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); |
@@ -453,7 +450,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) | |||
453 | { | 450 | { |
454 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { | 451 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { |
455 | apic_debug("Can't clear EOI MSR value: 0x%llx\n", | 452 | apic_debug("Can't clear EOI MSR value: 0x%llx\n", |
456 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | 453 | (unsigned long long)vcpu->arch.pv_eoi.msr_val); |
457 | return; | 454 | return; |
458 | } | 455 | } |
459 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | 456 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index c8b0d0d2da5c..6a11845fd8b9 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -65,7 +65,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | |||
65 | struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); | 65 | struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); |
66 | 66 | ||
67 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); | 67 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); |
68 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); | 68 | int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); |
69 | void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, | 69 | void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, |
70 | struct kvm_lapic_state *s); | 70 | struct kvm_lapic_state *s); |
71 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); | 71 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 40772ef0f2b1..e50425d0f5f7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -2659,6 +2659,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2659 | int emulate = 0; | 2659 | int emulate = 0; |
2660 | gfn_t pseudo_gfn; | 2660 | gfn_t pseudo_gfn; |
2661 | 2661 | ||
2662 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
2663 | return 0; | ||
2664 | |||
2662 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { | 2665 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { |
2663 | if (iterator.level == level) { | 2666 | if (iterator.level == level) { |
2664 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, | 2667 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, |
@@ -2829,6 +2832,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | |||
2829 | bool ret = false; | 2832 | bool ret = false; |
2830 | u64 spte = 0ull; | 2833 | u64 spte = 0ull; |
2831 | 2834 | ||
2835 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
2836 | return false; | ||
2837 | |||
2832 | if (!page_fault_can_be_fast(error_code)) | 2838 | if (!page_fault_can_be_fast(error_code)) |
2833 | return false; | 2839 | return false; |
2834 | 2840 | ||
@@ -3224,6 +3230,9 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) | |||
3224 | struct kvm_shadow_walk_iterator iterator; | 3230 | struct kvm_shadow_walk_iterator iterator; |
3225 | u64 spte = 0ull; | 3231 | u64 spte = 0ull; |
3226 | 3232 | ||
3233 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
3234 | return spte; | ||
3235 | |||
3227 | walk_shadow_page_lockless_begin(vcpu); | 3236 | walk_shadow_page_lockless_begin(vcpu); |
3228 | for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) | 3237 | for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) |
3229 | if (!is_shadow_present_pte(spte)) | 3238 | if (!is_shadow_present_pte(spte)) |
@@ -4510,6 +4519,9 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) | |||
4510 | u64 spte; | 4519 | u64 spte; |
4511 | int nr_sptes = 0; | 4520 | int nr_sptes = 0; |
4512 | 4521 | ||
4522 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
4523 | return nr_sptes; | ||
4524 | |||
4513 | walk_shadow_page_lockless_begin(vcpu); | 4525 | walk_shadow_page_lockless_begin(vcpu); |
4514 | for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { | 4526 | for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { |
4515 | sptes[iterator.level-1] = spte; | 4527 | sptes[iterator.level-1] = spte; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index ad75d77999d0..cba218a2f08d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -569,6 +569,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
569 | if (FNAME(gpte_changed)(vcpu, gw, top_level)) | 569 | if (FNAME(gpte_changed)(vcpu, gw, top_level)) |
570 | goto out_gpte_changed; | 570 | goto out_gpte_changed; |
571 | 571 | ||
572 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
573 | goto out_gpte_changed; | ||
574 | |||
572 | for (shadow_walk_init(&it, vcpu, addr); | 575 | for (shadow_walk_init(&it, vcpu, addr); |
573 | shadow_walk_okay(&it) && it.level > gw->level; | 576 | shadow_walk_okay(&it) && it.level > gw->level; |
574 | shadow_walk_next(&it)) { | 577 | shadow_walk_next(&it)) { |
@@ -820,6 +823,11 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
820 | */ | 823 | */ |
821 | mmu_topup_memory_caches(vcpu); | 824 | mmu_topup_memory_caches(vcpu); |
822 | 825 | ||
826 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) { | ||
827 | WARN_ON(1); | ||
828 | return; | ||
829 | } | ||
830 | |||
823 | spin_lock(&vcpu->kvm->mmu_lock); | 831 | spin_lock(&vcpu->kvm->mmu_lock); |
824 | for_each_shadow_entry(vcpu, gva, iterator) { | 832 | for_each_shadow_entry(vcpu, gva, iterator) { |
825 | level = iterator.level; | 833 | level = iterator.level; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c7168a5cff1b..e81df8fce027 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1671,6 +1671,19 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | |||
1671 | mark_dirty(svm->vmcb, VMCB_ASID); | 1671 | mark_dirty(svm->vmcb, VMCB_ASID); |
1672 | } | 1672 | } |
1673 | 1673 | ||
1674 | static u64 svm_get_dr6(struct kvm_vcpu *vcpu) | ||
1675 | { | ||
1676 | return to_svm(vcpu)->vmcb->save.dr6; | ||
1677 | } | ||
1678 | |||
1679 | static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) | ||
1680 | { | ||
1681 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1682 | |||
1683 | svm->vmcb->save.dr6 = value; | ||
1684 | mark_dirty(svm->vmcb, VMCB_DR); | ||
1685 | } | ||
1686 | |||
1674 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) | 1687 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) |
1675 | { | 1688 | { |
1676 | struct vcpu_svm *svm = to_svm(vcpu); | 1689 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -4286,6 +4299,8 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4286 | .set_idt = svm_set_idt, | 4299 | .set_idt = svm_set_idt, |
4287 | .get_gdt = svm_get_gdt, | 4300 | .get_gdt = svm_get_gdt, |
4288 | .set_gdt = svm_set_gdt, | 4301 | .set_gdt = svm_set_gdt, |
4302 | .get_dr6 = svm_get_dr6, | ||
4303 | .set_dr6 = svm_set_dr6, | ||
4289 | .set_dr7 = svm_set_dr7, | 4304 | .set_dr7 = svm_set_dr7, |
4290 | .cache_reg = svm_cache_reg, | 4305 | .cache_reg = svm_cache_reg, |
4291 | .get_rflags = svm_get_rflags, | 4306 | .get_rflags = svm_get_rflags, |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index da7837e1349d..a06f101ef64b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -418,6 +418,8 @@ struct vcpu_vmx { | |||
418 | u64 msr_host_kernel_gs_base; | 418 | u64 msr_host_kernel_gs_base; |
419 | u64 msr_guest_kernel_gs_base; | 419 | u64 msr_guest_kernel_gs_base; |
420 | #endif | 420 | #endif |
421 | u32 vm_entry_controls_shadow; | ||
422 | u32 vm_exit_controls_shadow; | ||
421 | /* | 423 | /* |
422 | * loaded_vmcs points to the VMCS currently used in this vcpu. For a | 424 | * loaded_vmcs points to the VMCS currently used in this vcpu. For a |
423 | * non-nested (L1) guest, it always points to vmcs01. For a nested | 425 | * non-nested (L1) guest, it always points to vmcs01. For a nested |
@@ -1056,7 +1058,9 @@ static inline bool is_exception(u32 intr_info) | |||
1056 | == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); | 1058 | == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); |
1057 | } | 1059 | } |
1058 | 1060 | ||
1059 | static void nested_vmx_vmexit(struct kvm_vcpu *vcpu); | 1061 | static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, |
1062 | u32 exit_intr_info, | ||
1063 | unsigned long exit_qualification); | ||
1060 | static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, | 1064 | static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, |
1061 | struct vmcs12 *vmcs12, | 1065 | struct vmcs12 *vmcs12, |
1062 | u32 reason, unsigned long qualification); | 1066 | u32 reason, unsigned long qualification); |
@@ -1326,6 +1330,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask) | |||
1326 | vmcs_writel(field, vmcs_readl(field) | mask); | 1330 | vmcs_writel(field, vmcs_readl(field) | mask); |
1327 | } | 1331 | } |
1328 | 1332 | ||
1333 | static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val) | ||
1334 | { | ||
1335 | vmcs_write32(VM_ENTRY_CONTROLS, val); | ||
1336 | vmx->vm_entry_controls_shadow = val; | ||
1337 | } | ||
1338 | |||
1339 | static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val) | ||
1340 | { | ||
1341 | if (vmx->vm_entry_controls_shadow != val) | ||
1342 | vm_entry_controls_init(vmx, val); | ||
1343 | } | ||
1344 | |||
1345 | static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx) | ||
1346 | { | ||
1347 | return vmx->vm_entry_controls_shadow; | ||
1348 | } | ||
1349 | |||
1350 | |||
1351 | static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val) | ||
1352 | { | ||
1353 | vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val); | ||
1354 | } | ||
1355 | |||
1356 | static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val) | ||
1357 | { | ||
1358 | vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val); | ||
1359 | } | ||
1360 | |||
1361 | static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val) | ||
1362 | { | ||
1363 | vmcs_write32(VM_EXIT_CONTROLS, val); | ||
1364 | vmx->vm_exit_controls_shadow = val; | ||
1365 | } | ||
1366 | |||
1367 | static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val) | ||
1368 | { | ||
1369 | if (vmx->vm_exit_controls_shadow != val) | ||
1370 | vm_exit_controls_init(vmx, val); | ||
1371 | } | ||
1372 | |||
1373 | static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx) | ||
1374 | { | ||
1375 | return vmx->vm_exit_controls_shadow; | ||
1376 | } | ||
1377 | |||
1378 | |||
1379 | static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val) | ||
1380 | { | ||
1381 | vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val); | ||
1382 | } | ||
1383 | |||
1384 | static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val) | ||
1385 | { | ||
1386 | vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val); | ||
1387 | } | ||
1388 | |||
1329 | static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) | 1389 | static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) |
1330 | { | 1390 | { |
1331 | vmx->segment_cache.bitmask = 0; | 1391 | vmx->segment_cache.bitmask = 0; |
@@ -1410,11 +1470,11 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
1410 | vmcs_write32(EXCEPTION_BITMAP, eb); | 1470 | vmcs_write32(EXCEPTION_BITMAP, eb); |
1411 | } | 1471 | } |
1412 | 1472 | ||
1413 | static void clear_atomic_switch_msr_special(unsigned long entry, | 1473 | static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, |
1414 | unsigned long exit) | 1474 | unsigned long entry, unsigned long exit) |
1415 | { | 1475 | { |
1416 | vmcs_clear_bits(VM_ENTRY_CONTROLS, entry); | 1476 | vm_entry_controls_clearbit(vmx, entry); |
1417 | vmcs_clear_bits(VM_EXIT_CONTROLS, exit); | 1477 | vm_exit_controls_clearbit(vmx, exit); |
1418 | } | 1478 | } |
1419 | 1479 | ||
1420 | static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | 1480 | static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) |
@@ -1425,14 +1485,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | |||
1425 | switch (msr) { | 1485 | switch (msr) { |
1426 | case MSR_EFER: | 1486 | case MSR_EFER: |
1427 | if (cpu_has_load_ia32_efer) { | 1487 | if (cpu_has_load_ia32_efer) { |
1428 | clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, | 1488 | clear_atomic_switch_msr_special(vmx, |
1489 | VM_ENTRY_LOAD_IA32_EFER, | ||
1429 | VM_EXIT_LOAD_IA32_EFER); | 1490 | VM_EXIT_LOAD_IA32_EFER); |
1430 | return; | 1491 | return; |
1431 | } | 1492 | } |
1432 | break; | 1493 | break; |
1433 | case MSR_CORE_PERF_GLOBAL_CTRL: | 1494 | case MSR_CORE_PERF_GLOBAL_CTRL: |
1434 | if (cpu_has_load_perf_global_ctrl) { | 1495 | if (cpu_has_load_perf_global_ctrl) { |
1435 | clear_atomic_switch_msr_special( | 1496 | clear_atomic_switch_msr_special(vmx, |
1436 | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, | 1497 | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, |
1437 | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); | 1498 | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); |
1438 | return; | 1499 | return; |
@@ -1453,14 +1514,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | |||
1453 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); | 1514 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); |
1454 | } | 1515 | } |
1455 | 1516 | ||
1456 | static void add_atomic_switch_msr_special(unsigned long entry, | 1517 | static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, |
1457 | unsigned long exit, unsigned long guest_val_vmcs, | 1518 | unsigned long entry, unsigned long exit, |
1458 | unsigned long host_val_vmcs, u64 guest_val, u64 host_val) | 1519 | unsigned long guest_val_vmcs, unsigned long host_val_vmcs, |
1520 | u64 guest_val, u64 host_val) | ||
1459 | { | 1521 | { |
1460 | vmcs_write64(guest_val_vmcs, guest_val); | 1522 | vmcs_write64(guest_val_vmcs, guest_val); |
1461 | vmcs_write64(host_val_vmcs, host_val); | 1523 | vmcs_write64(host_val_vmcs, host_val); |
1462 | vmcs_set_bits(VM_ENTRY_CONTROLS, entry); | 1524 | vm_entry_controls_setbit(vmx, entry); |
1463 | vmcs_set_bits(VM_EXIT_CONTROLS, exit); | 1525 | vm_exit_controls_setbit(vmx, exit); |
1464 | } | 1526 | } |
1465 | 1527 | ||
1466 | static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | 1528 | static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, |
@@ -1472,7 +1534,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | |||
1472 | switch (msr) { | 1534 | switch (msr) { |
1473 | case MSR_EFER: | 1535 | case MSR_EFER: |
1474 | if (cpu_has_load_ia32_efer) { | 1536 | if (cpu_has_load_ia32_efer) { |
1475 | add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, | 1537 | add_atomic_switch_msr_special(vmx, |
1538 | VM_ENTRY_LOAD_IA32_EFER, | ||
1476 | VM_EXIT_LOAD_IA32_EFER, | 1539 | VM_EXIT_LOAD_IA32_EFER, |
1477 | GUEST_IA32_EFER, | 1540 | GUEST_IA32_EFER, |
1478 | HOST_IA32_EFER, | 1541 | HOST_IA32_EFER, |
@@ -1482,7 +1545,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | |||
1482 | break; | 1545 | break; |
1483 | case MSR_CORE_PERF_GLOBAL_CTRL: | 1546 | case MSR_CORE_PERF_GLOBAL_CTRL: |
1484 | if (cpu_has_load_perf_global_ctrl) { | 1547 | if (cpu_has_load_perf_global_ctrl) { |
1485 | add_atomic_switch_msr_special( | 1548 | add_atomic_switch_msr_special(vmx, |
1486 | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, | 1549 | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, |
1487 | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, | 1550 | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, |
1488 | GUEST_IA32_PERF_GLOBAL_CTRL, | 1551 | GUEST_IA32_PERF_GLOBAL_CTRL, |
@@ -1906,7 +1969,9 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr) | |||
1906 | if (!(vmcs12->exception_bitmap & (1u << nr))) | 1969 | if (!(vmcs12->exception_bitmap & (1u << nr))) |
1907 | return 0; | 1970 | return 0; |
1908 | 1971 | ||
1909 | nested_vmx_vmexit(vcpu); | 1972 | nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, |
1973 | vmcs_read32(VM_EXIT_INTR_INFO), | ||
1974 | vmcs_readl(EXIT_QUALIFICATION)); | ||
1910 | return 1; | 1975 | return 1; |
1911 | } | 1976 | } |
1912 | 1977 | ||
@@ -2279,6 +2344,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2279 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); | 2344 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); |
2280 | nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | | 2345 | nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | |
2281 | VMX_MISC_SAVE_EFER_LMA; | 2346 | VMX_MISC_SAVE_EFER_LMA; |
2347 | nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT; | ||
2282 | nested_vmx_misc_high = 0; | 2348 | nested_vmx_misc_high = 0; |
2283 | } | 2349 | } |
2284 | 2350 | ||
@@ -2295,32 +2361,10 @@ static inline u64 vmx_control_msr(u32 low, u32 high) | |||
2295 | return low | ((u64)high << 32); | 2361 | return low | ((u64)high << 32); |
2296 | } | 2362 | } |
2297 | 2363 | ||
2298 | /* | 2364 | /* Returns 0 on success, non-0 otherwise. */ |
2299 | * If we allow our guest to use VMX instructions (i.e., nested VMX), we should | ||
2300 | * also let it use VMX-specific MSRs. | ||
2301 | * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a | ||
2302 | * VMX-specific MSR, or 0 when we haven't (and the caller should handle it | ||
2303 | * like all other MSRs). | ||
2304 | */ | ||
2305 | static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | 2365 | static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) |
2306 | { | 2366 | { |
2307 | if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC && | ||
2308 | msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) { | ||
2309 | /* | ||
2310 | * According to the spec, processors which do not support VMX | ||
2311 | * should throw a #GP(0) when VMX capability MSRs are read. | ||
2312 | */ | ||
2313 | kvm_queue_exception_e(vcpu, GP_VECTOR, 0); | ||
2314 | return 1; | ||
2315 | } | ||
2316 | |||
2317 | switch (msr_index) { | 2367 | switch (msr_index) { |
2318 | case MSR_IA32_FEATURE_CONTROL: | ||
2319 | if (nested_vmx_allowed(vcpu)) { | ||
2320 | *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control; | ||
2321 | break; | ||
2322 | } | ||
2323 | return 0; | ||
2324 | case MSR_IA32_VMX_BASIC: | 2368 | case MSR_IA32_VMX_BASIC: |
2325 | /* | 2369 | /* |
2326 | * This MSR reports some information about VMX support. We | 2370 | * This MSR reports some information about VMX support. We |
@@ -2387,34 +2431,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2387 | *pdata = nested_vmx_ept_caps; | 2431 | *pdata = nested_vmx_ept_caps; |
2388 | break; | 2432 | break; |
2389 | default: | 2433 | default: |
2390 | return 0; | ||
2391 | } | ||
2392 | |||
2393 | return 1; | ||
2394 | } | ||
2395 | |||
2396 | static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||
2397 | { | ||
2398 | u32 msr_index = msr_info->index; | ||
2399 | u64 data = msr_info->data; | ||
2400 | bool host_initialized = msr_info->host_initiated; | ||
2401 | |||
2402 | if (!nested_vmx_allowed(vcpu)) | ||
2403 | return 0; | ||
2404 | |||
2405 | if (msr_index == MSR_IA32_FEATURE_CONTROL) { | ||
2406 | if (!host_initialized && | ||
2407 | to_vmx(vcpu)->nested.msr_ia32_feature_control | ||
2408 | & FEATURE_CONTROL_LOCKED) | ||
2409 | return 0; | ||
2410 | to_vmx(vcpu)->nested.msr_ia32_feature_control = data; | ||
2411 | return 1; | 2434 | return 1; |
2412 | } | 2435 | } |
2413 | 2436 | ||
2414 | /* | ||
2415 | * No need to treat VMX capability MSRs specially: If we don't handle | ||
2416 | * them, handle_wrmsr will #GP(0), which is correct (they are readonly) | ||
2417 | */ | ||
2418 | return 0; | 2437 | return 0; |
2419 | } | 2438 | } |
2420 | 2439 | ||
@@ -2460,13 +2479,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2460 | case MSR_IA32_SYSENTER_ESP: | 2479 | case MSR_IA32_SYSENTER_ESP: |
2461 | data = vmcs_readl(GUEST_SYSENTER_ESP); | 2480 | data = vmcs_readl(GUEST_SYSENTER_ESP); |
2462 | break; | 2481 | break; |
2482 | case MSR_IA32_FEATURE_CONTROL: | ||
2483 | if (!nested_vmx_allowed(vcpu)) | ||
2484 | return 1; | ||
2485 | data = to_vmx(vcpu)->nested.msr_ia32_feature_control; | ||
2486 | break; | ||
2487 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: | ||
2488 | if (!nested_vmx_allowed(vcpu)) | ||
2489 | return 1; | ||
2490 | return vmx_get_vmx_msr(vcpu, msr_index, pdata); | ||
2463 | case MSR_TSC_AUX: | 2491 | case MSR_TSC_AUX: |
2464 | if (!to_vmx(vcpu)->rdtscp_enabled) | 2492 | if (!to_vmx(vcpu)->rdtscp_enabled) |
2465 | return 1; | 2493 | return 1; |
2466 | /* Otherwise falls through */ | 2494 | /* Otherwise falls through */ |
2467 | default: | 2495 | default: |
2468 | if (vmx_get_vmx_msr(vcpu, msr_index, pdata)) | ||
2469 | return 0; | ||
2470 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 2496 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
2471 | if (msr) { | 2497 | if (msr) { |
2472 | data = msr->data; | 2498 | data = msr->data; |
@@ -2479,6 +2505,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2479 | return 0; | 2505 | return 0; |
2480 | } | 2506 | } |
2481 | 2507 | ||
2508 | static void vmx_leave_nested(struct kvm_vcpu *vcpu); | ||
2509 | |||
2482 | /* | 2510 | /* |
2483 | * Writes msr value into into the appropriate "register". | 2511 | * Writes msr value into into the appropriate "register". |
2484 | * Returns 0 on success, non-0 otherwise. | 2512 | * Returns 0 on success, non-0 otherwise. |
@@ -2533,6 +2561,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2533 | case MSR_IA32_TSC_ADJUST: | 2561 | case MSR_IA32_TSC_ADJUST: |
2534 | ret = kvm_set_msr_common(vcpu, msr_info); | 2562 | ret = kvm_set_msr_common(vcpu, msr_info); |
2535 | break; | 2563 | break; |
2564 | case MSR_IA32_FEATURE_CONTROL: | ||
2565 | if (!nested_vmx_allowed(vcpu) || | ||
2566 | (to_vmx(vcpu)->nested.msr_ia32_feature_control & | ||
2567 | FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) | ||
2568 | return 1; | ||
2569 | vmx->nested.msr_ia32_feature_control = data; | ||
2570 | if (msr_info->host_initiated && data == 0) | ||
2571 | vmx_leave_nested(vcpu); | ||
2572 | break; | ||
2573 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: | ||
2574 | return 1; /* they are read-only */ | ||
2536 | case MSR_TSC_AUX: | 2575 | case MSR_TSC_AUX: |
2537 | if (!vmx->rdtscp_enabled) | 2576 | if (!vmx->rdtscp_enabled) |
2538 | return 1; | 2577 | return 1; |
@@ -2541,8 +2580,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2541 | return 1; | 2580 | return 1; |
2542 | /* Otherwise falls through */ | 2581 | /* Otherwise falls through */ |
2543 | default: | 2582 | default: |
2544 | if (vmx_set_vmx_msr(vcpu, msr_info)) | ||
2545 | break; | ||
2546 | msr = find_msr_entry(vmx, msr_index); | 2583 | msr = find_msr_entry(vmx, msr_index); |
2547 | if (msr) { | 2584 | if (msr) { |
2548 | msr->data = data; | 2585 | msr->data = data; |
@@ -3182,14 +3219,10 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
3182 | vmx_load_host_state(to_vmx(vcpu)); | 3219 | vmx_load_host_state(to_vmx(vcpu)); |
3183 | vcpu->arch.efer = efer; | 3220 | vcpu->arch.efer = efer; |
3184 | if (efer & EFER_LMA) { | 3221 | if (efer & EFER_LMA) { |
3185 | vmcs_write32(VM_ENTRY_CONTROLS, | 3222 | vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); |
3186 | vmcs_read32(VM_ENTRY_CONTROLS) | | ||
3187 | VM_ENTRY_IA32E_MODE); | ||
3188 | msr->data = efer; | 3223 | msr->data = efer; |
3189 | } else { | 3224 | } else { |
3190 | vmcs_write32(VM_ENTRY_CONTROLS, | 3225 | vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); |
3191 | vmcs_read32(VM_ENTRY_CONTROLS) & | ||
3192 | ~VM_ENTRY_IA32E_MODE); | ||
3193 | 3226 | ||
3194 | msr->data = efer & ~EFER_LME; | 3227 | msr->data = efer & ~EFER_LME; |
3195 | } | 3228 | } |
@@ -3217,9 +3250,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu) | |||
3217 | 3250 | ||
3218 | static void exit_lmode(struct kvm_vcpu *vcpu) | 3251 | static void exit_lmode(struct kvm_vcpu *vcpu) |
3219 | { | 3252 | { |
3220 | vmcs_write32(VM_ENTRY_CONTROLS, | 3253 | vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); |
3221 | vmcs_read32(VM_ENTRY_CONTROLS) | ||
3222 | & ~VM_ENTRY_IA32E_MODE); | ||
3223 | vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); | 3254 | vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); |
3224 | } | 3255 | } |
3225 | 3256 | ||
@@ -4346,10 +4377,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4346 | ++vmx->nmsrs; | 4377 | ++vmx->nmsrs; |
4347 | } | 4378 | } |
4348 | 4379 | ||
4349 | vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); | 4380 | |
4381 | vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); | ||
4350 | 4382 | ||
4351 | /* 22.2.1, 20.8.1 */ | 4383 | /* 22.2.1, 20.8.1 */ |
4352 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); | 4384 | vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl); |
4353 | 4385 | ||
4354 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); | 4386 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); |
4355 | set_cr4_guest_host_mask(vmx); | 4387 | set_cr4_guest_host_mask(vmx); |
@@ -4360,7 +4392,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4360 | static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 4392 | static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) |
4361 | { | 4393 | { |
4362 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4394 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4363 | u64 msr; | 4395 | struct msr_data apic_base_msr; |
4364 | 4396 | ||
4365 | vmx->rmode.vm86_active = 0; | 4397 | vmx->rmode.vm86_active = 0; |
4366 | 4398 | ||
@@ -4368,10 +4400,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4368 | 4400 | ||
4369 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); | 4401 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); |
4370 | kvm_set_cr8(&vmx->vcpu, 0); | 4402 | kvm_set_cr8(&vmx->vcpu, 0); |
4371 | msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | 4403 | apic_base_msr.data = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; |
4372 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) | 4404 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) |
4373 | msr |= MSR_IA32_APICBASE_BSP; | 4405 | apic_base_msr.data |= MSR_IA32_APICBASE_BSP; |
4374 | kvm_set_apic_base(&vmx->vcpu, msr); | 4406 | apic_base_msr.host_initiated = true; |
4407 | kvm_set_apic_base(&vmx->vcpu, &apic_base_msr); | ||
4375 | 4408 | ||
4376 | vmx_segment_cache_clear(vmx); | 4409 | vmx_segment_cache_clear(vmx); |
4377 | 4410 | ||
@@ -4588,15 +4621,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
4588 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | 4621 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) |
4589 | { | 4622 | { |
4590 | if (is_guest_mode(vcpu)) { | 4623 | if (is_guest_mode(vcpu)) { |
4591 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
4592 | |||
4593 | if (to_vmx(vcpu)->nested.nested_run_pending) | 4624 | if (to_vmx(vcpu)->nested.nested_run_pending) |
4594 | return 0; | 4625 | return 0; |
4595 | if (nested_exit_on_nmi(vcpu)) { | 4626 | if (nested_exit_on_nmi(vcpu)) { |
4596 | nested_vmx_vmexit(vcpu); | 4627 | nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, |
4597 | vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; | 4628 | NMI_VECTOR | INTR_TYPE_NMI_INTR | |
4598 | vmcs12->vm_exit_intr_info = NMI_VECTOR | | 4629 | INTR_INFO_VALID_MASK, 0); |
4599 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; | ||
4600 | /* | 4630 | /* |
4601 | * The NMI-triggered VM exit counts as injection: | 4631 | * The NMI-triggered VM exit counts as injection: |
4602 | * clear this one and block further NMIs. | 4632 | * clear this one and block further NMIs. |
@@ -4618,15 +4648,11 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
4618 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 4648 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
4619 | { | 4649 | { |
4620 | if (is_guest_mode(vcpu)) { | 4650 | if (is_guest_mode(vcpu)) { |
4621 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
4622 | |||
4623 | if (to_vmx(vcpu)->nested.nested_run_pending) | 4651 | if (to_vmx(vcpu)->nested.nested_run_pending) |
4624 | return 0; | 4652 | return 0; |
4625 | if (nested_exit_on_intr(vcpu)) { | 4653 | if (nested_exit_on_intr(vcpu)) { |
4626 | nested_vmx_vmexit(vcpu); | 4654 | nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, |
4627 | vmcs12->vm_exit_reason = | 4655 | 0, 0); |
4628 | EXIT_REASON_EXTERNAL_INTERRUPT; | ||
4629 | vmcs12->vm_exit_intr_info = 0; | ||
4630 | /* | 4656 | /* |
4631 | * fall through to normal code, but now in L1, not L2 | 4657 | * fall through to normal code, but now in L1, not L2 |
4632 | */ | 4658 | */ |
@@ -4812,7 +4838,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
4812 | dr6 = vmcs_readl(EXIT_QUALIFICATION); | 4838 | dr6 = vmcs_readl(EXIT_QUALIFICATION); |
4813 | if (!(vcpu->guest_debug & | 4839 | if (!(vcpu->guest_debug & |
4814 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { | 4840 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { |
4815 | vcpu->arch.dr6 = dr6 | DR6_FIXED_1; | 4841 | vcpu->arch.dr6 &= ~15; |
4842 | vcpu->arch.dr6 |= dr6; | ||
4816 | kvm_queue_exception(vcpu, DB_VECTOR); | 4843 | kvm_queue_exception(vcpu, DB_VECTOR); |
4817 | return 1; | 4844 | return 1; |
4818 | } | 4845 | } |
@@ -5080,14 +5107,27 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
5080 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); | 5107 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); |
5081 | if (exit_qualification & TYPE_MOV_FROM_DR) { | 5108 | if (exit_qualification & TYPE_MOV_FROM_DR) { |
5082 | unsigned long val; | 5109 | unsigned long val; |
5083 | if (!kvm_get_dr(vcpu, dr, &val)) | 5110 | |
5084 | kvm_register_write(vcpu, reg, val); | 5111 | if (kvm_get_dr(vcpu, dr, &val)) |
5112 | return 1; | ||
5113 | kvm_register_write(vcpu, reg, val); | ||
5085 | } else | 5114 | } else |
5086 | kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); | 5115 | if (kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg])) |
5116 | return 1; | ||
5117 | |||
5087 | skip_emulated_instruction(vcpu); | 5118 | skip_emulated_instruction(vcpu); |
5088 | return 1; | 5119 | return 1; |
5089 | } | 5120 | } |
5090 | 5121 | ||
5122 | static u64 vmx_get_dr6(struct kvm_vcpu *vcpu) | ||
5123 | { | ||
5124 | return vcpu->arch.dr6; | ||
5125 | } | ||
5126 | |||
5127 | static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) | ||
5128 | { | ||
5129 | } | ||
5130 | |||
5091 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) | 5131 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) |
5092 | { | 5132 | { |
5093 | vmcs_writel(GUEST_DR7, val); | 5133 | vmcs_writel(GUEST_DR7, val); |
@@ -6460,11 +6500,8 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, | |||
6460 | int size; | 6500 | int size; |
6461 | u8 b; | 6501 | u8 b; |
6462 | 6502 | ||
6463 | if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING)) | ||
6464 | return 1; | ||
6465 | |||
6466 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) | 6503 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) |
6467 | return 0; | 6504 | return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); |
6468 | 6505 | ||
6469 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 6506 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
6470 | 6507 | ||
@@ -6628,6 +6665,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6628 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 6665 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
6629 | u32 exit_reason = vmx->exit_reason; | 6666 | u32 exit_reason = vmx->exit_reason; |
6630 | 6667 | ||
6668 | trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, | ||
6669 | vmcs_readl(EXIT_QUALIFICATION), | ||
6670 | vmx->idt_vectoring_info, | ||
6671 | intr_info, | ||
6672 | vmcs_read32(VM_EXIT_INTR_ERROR_CODE), | ||
6673 | KVM_ISA_VMX); | ||
6674 | |||
6631 | if (vmx->nested.nested_run_pending) | 6675 | if (vmx->nested.nested_run_pending) |
6632 | return 0; | 6676 | return 0; |
6633 | 6677 | ||
@@ -6777,7 +6821,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
6777 | return handle_invalid_guest_state(vcpu); | 6821 | return handle_invalid_guest_state(vcpu); |
6778 | 6822 | ||
6779 | if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { | 6823 | if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { |
6780 | nested_vmx_vmexit(vcpu); | 6824 | nested_vmx_vmexit(vcpu, exit_reason, |
6825 | vmcs_read32(VM_EXIT_INTR_INFO), | ||
6826 | vmcs_readl(EXIT_QUALIFICATION)); | ||
6781 | return 1; | 6827 | return 1; |
6782 | } | 6828 | } |
6783 | 6829 | ||
@@ -7332,8 +7378,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
7332 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7378 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7333 | 7379 | ||
7334 | free_vpid(vmx); | 7380 | free_vpid(vmx); |
7335 | free_nested(vmx); | ||
7336 | free_loaded_vmcs(vmx->loaded_vmcs); | 7381 | free_loaded_vmcs(vmx->loaded_vmcs); |
7382 | free_nested(vmx); | ||
7337 | kfree(vmx->guest_msrs); | 7383 | kfree(vmx->guest_msrs); |
7338 | kvm_vcpu_uninit(vcpu); | 7384 | kvm_vcpu_uninit(vcpu); |
7339 | kmem_cache_free(kvm_vcpu_cache, vmx); | 7385 | kmem_cache_free(kvm_vcpu_cache, vmx); |
@@ -7518,15 +7564,14 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | |||
7518 | static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, | 7564 | static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, |
7519 | struct x86_exception *fault) | 7565 | struct x86_exception *fault) |
7520 | { | 7566 | { |
7521 | struct vmcs12 *vmcs12; | 7567 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
7522 | nested_vmx_vmexit(vcpu); | 7568 | u32 exit_reason; |
7523 | vmcs12 = get_vmcs12(vcpu); | ||
7524 | 7569 | ||
7525 | if (fault->error_code & PFERR_RSVD_MASK) | 7570 | if (fault->error_code & PFERR_RSVD_MASK) |
7526 | vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; | 7571 | exit_reason = EXIT_REASON_EPT_MISCONFIG; |
7527 | else | 7572 | else |
7528 | vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; | 7573 | exit_reason = EXIT_REASON_EPT_VIOLATION; |
7529 | vmcs12->exit_qualification = vcpu->arch.exit_qualification; | 7574 | nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification); |
7530 | vmcs12->guest_physical_address = fault->address; | 7575 | vmcs12->guest_physical_address = fault->address; |
7531 | } | 7576 | } |
7532 | 7577 | ||
@@ -7564,7 +7609,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, | |||
7564 | 7609 | ||
7565 | /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ | 7610 | /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ |
7566 | if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) | 7611 | if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) |
7567 | nested_vmx_vmexit(vcpu); | 7612 | nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, |
7613 | vmcs_read32(VM_EXIT_INTR_INFO), | ||
7614 | vmcs_readl(EXIT_QUALIFICATION)); | ||
7568 | else | 7615 | else |
7569 | kvm_inject_page_fault(vcpu, fault); | 7616 | kvm_inject_page_fault(vcpu, fault); |
7570 | } | 7617 | } |
@@ -7706,6 +7753,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7706 | else | 7753 | else |
7707 | vmcs_write64(APIC_ACCESS_ADDR, | 7754 | vmcs_write64(APIC_ACCESS_ADDR, |
7708 | page_to_phys(vmx->nested.apic_access_page)); | 7755 | page_to_phys(vmx->nested.apic_access_page)); |
7756 | } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) { | ||
7757 | exec_control |= | ||
7758 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
7759 | vmcs_write64(APIC_ACCESS_ADDR, | ||
7760 | page_to_phys(vcpu->kvm->arch.apic_access_page)); | ||
7709 | } | 7761 | } |
7710 | 7762 | ||
7711 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 7763 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
@@ -7759,12 +7811,12 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7759 | exit_control = vmcs_config.vmexit_ctrl; | 7811 | exit_control = vmcs_config.vmexit_ctrl; |
7760 | if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) | 7812 | if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) |
7761 | exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; | 7813 | exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; |
7762 | vmcs_write32(VM_EXIT_CONTROLS, exit_control); | 7814 | vm_exit_controls_init(vmx, exit_control); |
7763 | 7815 | ||
7764 | /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are | 7816 | /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are |
7765 | * emulated by vmx_set_efer(), below. | 7817 | * emulated by vmx_set_efer(), below. |
7766 | */ | 7818 | */ |
7767 | vmcs_write32(VM_ENTRY_CONTROLS, | 7819 | vm_entry_controls_init(vmx, |
7768 | (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & | 7820 | (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & |
7769 | ~VM_ENTRY_IA32E_MODE) | | 7821 | ~VM_ENTRY_IA32E_MODE) | |
7770 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); | 7822 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); |
@@ -7882,7 +7934,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7882 | return 1; | 7934 | return 1; |
7883 | } | 7935 | } |
7884 | 7936 | ||
7885 | if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) { | 7937 | if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && |
7938 | vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) { | ||
7886 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 7939 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
7887 | return 1; | 7940 | return 1; |
7888 | } | 7941 | } |
@@ -7994,8 +8047,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7994 | 8047 | ||
7995 | enter_guest_mode(vcpu); | 8048 | enter_guest_mode(vcpu); |
7996 | 8049 | ||
7997 | vmx->nested.nested_run_pending = 1; | ||
7998 | |||
7999 | vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); | 8050 | vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); |
8000 | 8051 | ||
8001 | cpu = get_cpu(); | 8052 | cpu = get_cpu(); |
@@ -8011,6 +8062,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
8011 | 8062 | ||
8012 | prepare_vmcs02(vcpu, vmcs12); | 8063 | prepare_vmcs02(vcpu, vmcs12); |
8013 | 8064 | ||
8065 | if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) | ||
8066 | return kvm_emulate_halt(vcpu); | ||
8067 | |||
8068 | vmx->nested.nested_run_pending = 1; | ||
8069 | |||
8014 | /* | 8070 | /* |
8015 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point | 8071 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point |
8016 | * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet | 8072 | * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet |
@@ -8110,7 +8166,9 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, | |||
8110 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, | 8166 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, |
8111 | * which already writes to vmcs12 directly. | 8167 | * which already writes to vmcs12 directly. |
8112 | */ | 8168 | */ |
8113 | static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | 8169 | static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, |
8170 | u32 exit_reason, u32 exit_intr_info, | ||
8171 | unsigned long exit_qualification) | ||
8114 | { | 8172 | { |
8115 | /* update guest state fields: */ | 8173 | /* update guest state fields: */ |
8116 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); | 8174 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); |
@@ -8162,6 +8220,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8162 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | 8220 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); |
8163 | vmcs12->guest_pending_dbg_exceptions = | 8221 | vmcs12->guest_pending_dbg_exceptions = |
8164 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); | 8222 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); |
8223 | if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) | ||
8224 | vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT; | ||
8225 | else | ||
8226 | vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; | ||
8165 | 8227 | ||
8166 | if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && | 8228 | if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && |
8167 | (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) | 8229 | (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) |
@@ -8186,7 +8248,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8186 | 8248 | ||
8187 | vmcs12->vm_entry_controls = | 8249 | vmcs12->vm_entry_controls = |
8188 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | | 8250 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | |
8189 | (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); | 8251 | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); |
8190 | 8252 | ||
8191 | /* TODO: These cannot have changed unless we have MSR bitmaps and | 8253 | /* TODO: These cannot have changed unless we have MSR bitmaps and |
8192 | * the relevant bit asks not to trap the change */ | 8254 | * the relevant bit asks not to trap the change */ |
@@ -8201,10 +8263,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8201 | 8263 | ||
8202 | /* update exit information fields: */ | 8264 | /* update exit information fields: */ |
8203 | 8265 | ||
8204 | vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; | 8266 | vmcs12->vm_exit_reason = exit_reason; |
8205 | vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 8267 | vmcs12->exit_qualification = exit_qualification; |
8206 | 8268 | ||
8207 | vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 8269 | vmcs12->vm_exit_intr_info = exit_intr_info; |
8208 | if ((vmcs12->vm_exit_intr_info & | 8270 | if ((vmcs12->vm_exit_intr_info & |
8209 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == | 8271 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == |
8210 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) | 8272 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) |
@@ -8370,7 +8432,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
8370 | * and modify vmcs12 to make it see what it would expect to see there if | 8432 | * and modify vmcs12 to make it see what it would expect to see there if |
8371 | * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) | 8433 | * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) |
8372 | */ | 8434 | */ |
8373 | static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | 8435 | static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, |
8436 | u32 exit_intr_info, | ||
8437 | unsigned long exit_qualification) | ||
8374 | { | 8438 | { |
8375 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 8439 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
8376 | int cpu; | 8440 | int cpu; |
@@ -8380,7 +8444,15 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
8380 | WARN_ON_ONCE(vmx->nested.nested_run_pending); | 8444 | WARN_ON_ONCE(vmx->nested.nested_run_pending); |
8381 | 8445 | ||
8382 | leave_guest_mode(vcpu); | 8446 | leave_guest_mode(vcpu); |
8383 | prepare_vmcs12(vcpu, vmcs12); | 8447 | prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, |
8448 | exit_qualification); | ||
8449 | |||
8450 | trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, | ||
8451 | vmcs12->exit_qualification, | ||
8452 | vmcs12->idt_vectoring_info_field, | ||
8453 | vmcs12->vm_exit_intr_info, | ||
8454 | vmcs12->vm_exit_intr_error_code, | ||
8455 | KVM_ISA_VMX); | ||
8384 | 8456 | ||
8385 | cpu = get_cpu(); | 8457 | cpu = get_cpu(); |
8386 | vmx->loaded_vmcs = &vmx->vmcs01; | 8458 | vmx->loaded_vmcs = &vmx->vmcs01; |
@@ -8389,6 +8461,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
8389 | vcpu->cpu = cpu; | 8461 | vcpu->cpu = cpu; |
8390 | put_cpu(); | 8462 | put_cpu(); |
8391 | 8463 | ||
8464 | vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS)); | ||
8465 | vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); | ||
8392 | vmx_segment_cache_clear(vmx); | 8466 | vmx_segment_cache_clear(vmx); |
8393 | 8467 | ||
8394 | /* if no vmcs02 cache requested, remove the one we used */ | 8468 | /* if no vmcs02 cache requested, remove the one we used */ |
@@ -8424,6 +8498,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
8424 | } | 8498 | } |
8425 | 8499 | ||
8426 | /* | 8500 | /* |
8501 | * Forcibly leave nested mode in order to be able to reset the VCPU later on. | ||
8502 | */ | ||
8503 | static void vmx_leave_nested(struct kvm_vcpu *vcpu) | ||
8504 | { | ||
8505 | if (is_guest_mode(vcpu)) | ||
8506 | nested_vmx_vmexit(vcpu, -1, 0, 0); | ||
8507 | free_nested(to_vmx(vcpu)); | ||
8508 | } | ||
8509 | |||
8510 | /* | ||
8427 | * L1's failure to enter L2 is a subset of a normal exit, as explained in | 8511 | * L1's failure to enter L2 is a subset of a normal exit, as explained in |
8428 | * 23.7 "VM-entry failures during or after loading guest state" (this also | 8512 | * 23.7 "VM-entry failures during or after loading guest state" (this also |
8429 | * lists the acceptable exit-reason and exit-qualification parameters). | 8513 | * lists the acceptable exit-reason and exit-qualification parameters). |
@@ -8486,6 +8570,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
8486 | .set_idt = vmx_set_idt, | 8570 | .set_idt = vmx_set_idt, |
8487 | .get_gdt = vmx_get_gdt, | 8571 | .get_gdt = vmx_get_gdt, |
8488 | .set_gdt = vmx_set_gdt, | 8572 | .set_gdt = vmx_set_gdt, |
8573 | .get_dr6 = vmx_get_dr6, | ||
8574 | .set_dr6 = vmx_set_dr6, | ||
8489 | .set_dr7 = vmx_set_dr7, | 8575 | .set_dr7 = vmx_set_dr7, |
8490 | .cache_reg = vmx_cache_reg, | 8576 | .cache_reg = vmx_cache_reg, |
8491 | .get_rflags = vmx_get_rflags, | 8577 | .get_rflags = vmx_get_rflags, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5d004da1e35d..39c28f09dfd5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); | |||
94 | static bool ignore_msrs = 0; | 94 | static bool ignore_msrs = 0; |
95 | module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); | 95 | module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); |
96 | 96 | ||
97 | unsigned int min_timer_period_us = 500; | ||
98 | module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); | ||
99 | |||
97 | bool kvm_has_tsc_control; | 100 | bool kvm_has_tsc_control; |
98 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); | 101 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); |
99 | u32 kvm_max_guest_tsc_khz; | 102 | u32 kvm_max_guest_tsc_khz; |
@@ -254,10 +257,26 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) | |||
254 | } | 257 | } |
255 | EXPORT_SYMBOL_GPL(kvm_get_apic_base); | 258 | EXPORT_SYMBOL_GPL(kvm_get_apic_base); |
256 | 259 | ||
257 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) | 260 | int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
258 | { | 261 | { |
259 | /* TODO: reserve bits check */ | 262 | u64 old_state = vcpu->arch.apic_base & |
260 | kvm_lapic_set_base(vcpu, data); | 263 | (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); |
264 | u64 new_state = msr_info->data & | ||
265 | (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); | ||
266 | u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | | ||
267 | 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE); | ||
268 | |||
269 | if (!msr_info->host_initiated && | ||
270 | ((msr_info->data & reserved_bits) != 0 || | ||
271 | new_state == X2APIC_ENABLE || | ||
272 | (new_state == MSR_IA32_APICBASE_ENABLE && | ||
273 | old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) || | ||
274 | (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) && | ||
275 | old_state == 0))) | ||
276 | return 1; | ||
277 | |||
278 | kvm_lapic_set_base(vcpu, msr_info->data); | ||
279 | return 0; | ||
261 | } | 280 | } |
262 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); | 281 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); |
263 | 282 | ||
@@ -719,6 +738,12 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | |||
719 | } | 738 | } |
720 | EXPORT_SYMBOL_GPL(kvm_get_cr8); | 739 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
721 | 740 | ||
741 | static void kvm_update_dr6(struct kvm_vcpu *vcpu) | ||
742 | { | ||
743 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
744 | kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6); | ||
745 | } | ||
746 | |||
722 | static void kvm_update_dr7(struct kvm_vcpu *vcpu) | 747 | static void kvm_update_dr7(struct kvm_vcpu *vcpu) |
723 | { | 748 | { |
724 | unsigned long dr7; | 749 | unsigned long dr7; |
@@ -747,6 +772,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | |||
747 | if (val & 0xffffffff00000000ULL) | 772 | if (val & 0xffffffff00000000ULL) |
748 | return -1; /* #GP */ | 773 | return -1; /* #GP */ |
749 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | 774 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; |
775 | kvm_update_dr6(vcpu); | ||
750 | break; | 776 | break; |
751 | case 5: | 777 | case 5: |
752 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | 778 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
@@ -788,7 +814,10 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | |||
788 | return 1; | 814 | return 1; |
789 | /* fall through */ | 815 | /* fall through */ |
790 | case 6: | 816 | case 6: |
791 | *val = vcpu->arch.dr6; | 817 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
818 | *val = vcpu->arch.dr6; | ||
819 | else | ||
820 | *val = kvm_x86_ops->get_dr6(vcpu); | ||
792 | break; | 821 | break; |
793 | case 5: | 822 | case 5: |
794 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | 823 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
@@ -836,11 +865,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); | |||
836 | * kvm-specific. Those are put in the beginning of the list. | 865 | * kvm-specific. Those are put in the beginning of the list. |
837 | */ | 866 | */ |
838 | 867 | ||
839 | #define KVM_SAVE_MSRS_BEGIN 10 | 868 | #define KVM_SAVE_MSRS_BEGIN 12 |
840 | static u32 msrs_to_save[] = { | 869 | static u32 msrs_to_save[] = { |
841 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 870 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
842 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 871 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
843 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 872 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
873 | HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, | ||
844 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, | 874 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, |
845 | MSR_KVM_PV_EOI_EN, | 875 | MSR_KVM_PV_EOI_EN, |
846 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 876 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
@@ -1275,8 +1305,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1275 | kvm->arch.last_tsc_write = data; | 1305 | kvm->arch.last_tsc_write = data; |
1276 | kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; | 1306 | kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; |
1277 | 1307 | ||
1278 | /* Reset of TSC must disable overshoot protection below */ | ||
1279 | vcpu->arch.hv_clock.tsc_timestamp = 0; | ||
1280 | vcpu->arch.last_guest_tsc = data; | 1308 | vcpu->arch.last_guest_tsc = data; |
1281 | 1309 | ||
1282 | /* Keep track of which generation this VCPU has synchronized to */ | 1310 | /* Keep track of which generation this VCPU has synchronized to */ |
@@ -1484,7 +1512,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1484 | unsigned long flags, this_tsc_khz; | 1512 | unsigned long flags, this_tsc_khz; |
1485 | struct kvm_vcpu_arch *vcpu = &v->arch; | 1513 | struct kvm_vcpu_arch *vcpu = &v->arch; |
1486 | struct kvm_arch *ka = &v->kvm->arch; | 1514 | struct kvm_arch *ka = &v->kvm->arch; |
1487 | s64 kernel_ns, max_kernel_ns; | 1515 | s64 kernel_ns; |
1488 | u64 tsc_timestamp, host_tsc; | 1516 | u64 tsc_timestamp, host_tsc; |
1489 | struct pvclock_vcpu_time_info guest_hv_clock; | 1517 | struct pvclock_vcpu_time_info guest_hv_clock; |
1490 | u8 pvclock_flags; | 1518 | u8 pvclock_flags; |
@@ -1543,37 +1571,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1543 | if (!vcpu->pv_time_enabled) | 1571 | if (!vcpu->pv_time_enabled) |
1544 | return 0; | 1572 | return 0; |
1545 | 1573 | ||
1546 | /* | ||
1547 | * Time as measured by the TSC may go backwards when resetting the base | ||
1548 | * tsc_timestamp. The reason for this is that the TSC resolution is | ||
1549 | * higher than the resolution of the other clock scales. Thus, many | ||
1550 | * possible measurments of the TSC correspond to one measurement of any | ||
1551 | * other clock, and so a spread of values is possible. This is not a | ||
1552 | * problem for the computation of the nanosecond clock; with TSC rates | ||
1553 | * around 1GHZ, there can only be a few cycles which correspond to one | ||
1554 | * nanosecond value, and any path through this code will inevitably | ||
1555 | * take longer than that. However, with the kernel_ns value itself, | ||
1556 | * the precision may be much lower, down to HZ granularity. If the | ||
1557 | * first sampling of TSC against kernel_ns ends in the low part of the | ||
1558 | * range, and the second in the high end of the range, we can get: | ||
1559 | * | ||
1560 | * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new | ||
1561 | * | ||
1562 | * As the sampling errors potentially range in the thousands of cycles, | ||
1563 | * it is possible such a time value has already been observed by the | ||
1564 | * guest. To protect against this, we must compute the system time as | ||
1565 | * observed by the guest and ensure the new system time is greater. | ||
1566 | */ | ||
1567 | max_kernel_ns = 0; | ||
1568 | if (vcpu->hv_clock.tsc_timestamp) { | ||
1569 | max_kernel_ns = vcpu->last_guest_tsc - | ||
1570 | vcpu->hv_clock.tsc_timestamp; | ||
1571 | max_kernel_ns = pvclock_scale_delta(max_kernel_ns, | ||
1572 | vcpu->hv_clock.tsc_to_system_mul, | ||
1573 | vcpu->hv_clock.tsc_shift); | ||
1574 | max_kernel_ns += vcpu->last_kernel_ns; | ||
1575 | } | ||
1576 | |||
1577 | if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { | 1574 | if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { |
1578 | kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, | 1575 | kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, |
1579 | &vcpu->hv_clock.tsc_shift, | 1576 | &vcpu->hv_clock.tsc_shift, |
@@ -1581,14 +1578,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1581 | vcpu->hw_tsc_khz = this_tsc_khz; | 1578 | vcpu->hw_tsc_khz = this_tsc_khz; |
1582 | } | 1579 | } |
1583 | 1580 | ||
1584 | /* with a master <monotonic time, tsc value> tuple, | ||
1585 | * pvclock clock reads always increase at the (scaled) rate | ||
1586 | * of guest TSC - no need to deal with sampling errors. | ||
1587 | */ | ||
1588 | if (!use_master_clock) { | ||
1589 | if (max_kernel_ns > kernel_ns) | ||
1590 | kernel_ns = max_kernel_ns; | ||
1591 | } | ||
1592 | /* With all the info we got, fill in the values */ | 1581 | /* With all the info we got, fill in the values */ |
1593 | vcpu->hv_clock.tsc_timestamp = tsc_timestamp; | 1582 | vcpu->hv_clock.tsc_timestamp = tsc_timestamp; |
1594 | vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; | 1583 | vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; |
@@ -1826,6 +1815,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) | |||
1826 | switch (msr) { | 1815 | switch (msr) { |
1827 | case HV_X64_MSR_GUEST_OS_ID: | 1816 | case HV_X64_MSR_GUEST_OS_ID: |
1828 | case HV_X64_MSR_HYPERCALL: | 1817 | case HV_X64_MSR_HYPERCALL: |
1818 | case HV_X64_MSR_REFERENCE_TSC: | ||
1819 | case HV_X64_MSR_TIME_REF_COUNT: | ||
1829 | r = true; | 1820 | r = true; |
1830 | break; | 1821 | break; |
1831 | } | 1822 | } |
@@ -1865,6 +1856,21 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1865 | if (__copy_to_user((void __user *)addr, instructions, 4)) | 1856 | if (__copy_to_user((void __user *)addr, instructions, 4)) |
1866 | return 1; | 1857 | return 1; |
1867 | kvm->arch.hv_hypercall = data; | 1858 | kvm->arch.hv_hypercall = data; |
1859 | mark_page_dirty(kvm, gfn); | ||
1860 | break; | ||
1861 | } | ||
1862 | case HV_X64_MSR_REFERENCE_TSC: { | ||
1863 | u64 gfn; | ||
1864 | HV_REFERENCE_TSC_PAGE tsc_ref; | ||
1865 | memset(&tsc_ref, 0, sizeof(tsc_ref)); | ||
1866 | kvm->arch.hv_tsc_page = data; | ||
1867 | if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) | ||
1868 | break; | ||
1869 | gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; | ||
1870 | if (kvm_write_guest(kvm, data, | ||
1871 | &tsc_ref, sizeof(tsc_ref))) | ||
1872 | return 1; | ||
1873 | mark_page_dirty(kvm, gfn); | ||
1868 | break; | 1874 | break; |
1869 | } | 1875 | } |
1870 | default: | 1876 | default: |
@@ -1879,19 +1885,21 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1879 | { | 1885 | { |
1880 | switch (msr) { | 1886 | switch (msr) { |
1881 | case HV_X64_MSR_APIC_ASSIST_PAGE: { | 1887 | case HV_X64_MSR_APIC_ASSIST_PAGE: { |
1888 | u64 gfn; | ||
1882 | unsigned long addr; | 1889 | unsigned long addr; |
1883 | 1890 | ||
1884 | if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { | 1891 | if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { |
1885 | vcpu->arch.hv_vapic = data; | 1892 | vcpu->arch.hv_vapic = data; |
1886 | break; | 1893 | break; |
1887 | } | 1894 | } |
1888 | addr = gfn_to_hva(vcpu->kvm, data >> | 1895 | gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT; |
1889 | HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT); | 1896 | addr = gfn_to_hva(vcpu->kvm, gfn); |
1890 | if (kvm_is_error_hva(addr)) | 1897 | if (kvm_is_error_hva(addr)) |
1891 | return 1; | 1898 | return 1; |
1892 | if (__clear_user((void __user *)addr, PAGE_SIZE)) | 1899 | if (__clear_user((void __user *)addr, PAGE_SIZE)) |
1893 | return 1; | 1900 | return 1; |
1894 | vcpu->arch.hv_vapic = data; | 1901 | vcpu->arch.hv_vapic = data; |
1902 | mark_page_dirty(vcpu->kvm, gfn); | ||
1895 | break; | 1903 | break; |
1896 | } | 1904 | } |
1897 | case HV_X64_MSR_EOI: | 1905 | case HV_X64_MSR_EOI: |
@@ -2017,8 +2025,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2017 | case 0x200 ... 0x2ff: | 2025 | case 0x200 ... 0x2ff: |
2018 | return set_msr_mtrr(vcpu, msr, data); | 2026 | return set_msr_mtrr(vcpu, msr, data); |
2019 | case MSR_IA32_APICBASE: | 2027 | case MSR_IA32_APICBASE: |
2020 | kvm_set_apic_base(vcpu, data); | 2028 | return kvm_set_apic_base(vcpu, msr_info); |
2021 | break; | ||
2022 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: | 2029 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: |
2023 | return kvm_x2apic_msr_write(vcpu, msr, data); | 2030 | return kvm_x2apic_msr_write(vcpu, msr, data); |
2024 | case MSR_IA32_TSCDEADLINE: | 2031 | case MSR_IA32_TSCDEADLINE: |
@@ -2291,6 +2298,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2291 | case HV_X64_MSR_HYPERCALL: | 2298 | case HV_X64_MSR_HYPERCALL: |
2292 | data = kvm->arch.hv_hypercall; | 2299 | data = kvm->arch.hv_hypercall; |
2293 | break; | 2300 | break; |
2301 | case HV_X64_MSR_TIME_REF_COUNT: { | ||
2302 | data = | ||
2303 | div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); | ||
2304 | break; | ||
2305 | } | ||
2306 | case HV_X64_MSR_REFERENCE_TSC: | ||
2307 | data = kvm->arch.hv_tsc_page; | ||
2308 | break; | ||
2294 | default: | 2309 | default: |
2295 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 2310 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
2296 | return 1; | 2311 | return 1; |
@@ -2601,6 +2616,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2601 | case KVM_CAP_GET_TSC_KHZ: | 2616 | case KVM_CAP_GET_TSC_KHZ: |
2602 | case KVM_CAP_KVMCLOCK_CTRL: | 2617 | case KVM_CAP_KVMCLOCK_CTRL: |
2603 | case KVM_CAP_READONLY_MEM: | 2618 | case KVM_CAP_READONLY_MEM: |
2619 | case KVM_CAP_HYPERV_TIME: | ||
2604 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | 2620 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT |
2605 | case KVM_CAP_ASSIGN_DEV_IRQ: | 2621 | case KVM_CAP_ASSIGN_DEV_IRQ: |
2606 | case KVM_CAP_PCI_2_3: | 2622 | case KVM_CAP_PCI_2_3: |
@@ -2972,8 +2988,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2972 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, | 2988 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, |
2973 | struct kvm_debugregs *dbgregs) | 2989 | struct kvm_debugregs *dbgregs) |
2974 | { | 2990 | { |
2991 | unsigned long val; | ||
2992 | |||
2975 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); | 2993 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); |
2976 | dbgregs->dr6 = vcpu->arch.dr6; | 2994 | _kvm_get_dr(vcpu, 6, &val); |
2995 | dbgregs->dr6 = val; | ||
2977 | dbgregs->dr7 = vcpu->arch.dr7; | 2996 | dbgregs->dr7 = vcpu->arch.dr7; |
2978 | dbgregs->flags = 0; | 2997 | dbgregs->flags = 0; |
2979 | memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); | 2998 | memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); |
@@ -2987,7 +3006,9 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | |||
2987 | 3006 | ||
2988 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); | 3007 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); |
2989 | vcpu->arch.dr6 = dbgregs->dr6; | 3008 | vcpu->arch.dr6 = dbgregs->dr6; |
3009 | kvm_update_dr6(vcpu); | ||
2990 | vcpu->arch.dr7 = dbgregs->dr7; | 3010 | vcpu->arch.dr7 = dbgregs->dr7; |
3011 | kvm_update_dr7(vcpu); | ||
2991 | 3012 | ||
2992 | return 0; | 3013 | return 0; |
2993 | } | 3014 | } |
@@ -5834,6 +5855,11 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) | |||
5834 | kvm_apic_update_tmr(vcpu, tmr); | 5855 | kvm_apic_update_tmr(vcpu, tmr); |
5835 | } | 5856 | } |
5836 | 5857 | ||
5858 | /* | ||
5859 | * Returns 1 to let __vcpu_run() continue the guest execution loop without | ||
5860 | * exiting to the userspace. Otherwise, the value will be returned to the | ||
5861 | * userspace. | ||
5862 | */ | ||
5837 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5863 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
5838 | { | 5864 | { |
5839 | int r; | 5865 | int r; |
@@ -6089,7 +6115,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
6089 | } | 6115 | } |
6090 | if (need_resched()) { | 6116 | if (need_resched()) { |
6091 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 6117 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
6092 | kvm_resched(vcpu); | 6118 | cond_resched(); |
6093 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 6119 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
6094 | } | 6120 | } |
6095 | } | 6121 | } |
@@ -6401,6 +6427,7 @@ EXPORT_SYMBOL_GPL(kvm_task_switch); | |||
6401 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | 6427 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, |
6402 | struct kvm_sregs *sregs) | 6428 | struct kvm_sregs *sregs) |
6403 | { | 6429 | { |
6430 | struct msr_data apic_base_msr; | ||
6404 | int mmu_reset_needed = 0; | 6431 | int mmu_reset_needed = 0; |
6405 | int pending_vec, max_bits, idx; | 6432 | int pending_vec, max_bits, idx; |
6406 | struct desc_ptr dt; | 6433 | struct desc_ptr dt; |
@@ -6424,7 +6451,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
6424 | 6451 | ||
6425 | mmu_reset_needed |= vcpu->arch.efer != sregs->efer; | 6452 | mmu_reset_needed |= vcpu->arch.efer != sregs->efer; |
6426 | kvm_x86_ops->set_efer(vcpu, sregs->efer); | 6453 | kvm_x86_ops->set_efer(vcpu, sregs->efer); |
6427 | kvm_set_apic_base(vcpu, sregs->apic_base); | 6454 | apic_base_msr.data = sregs->apic_base; |
6455 | apic_base_msr.host_initiated = true; | ||
6456 | kvm_set_apic_base(vcpu, &apic_base_msr); | ||
6428 | 6457 | ||
6429 | mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; | 6458 | mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; |
6430 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); | 6459 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); |
@@ -6717,6 +6746,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) | |||
6717 | 6746 | ||
6718 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); | 6747 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); |
6719 | vcpu->arch.dr6 = DR6_FIXED_1; | 6748 | vcpu->arch.dr6 = DR6_FIXED_1; |
6749 | kvm_update_dr6(vcpu); | ||
6720 | vcpu->arch.dr7 = DR7_FIXED_1; | 6750 | vcpu->arch.dr7 = DR7_FIXED_1; |
6721 | kvm_update_dr7(vcpu); | 6751 | kvm_update_dr7(vcpu); |
6722 | 6752 | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 587fb9ede436..8da5823bcde6 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -125,5 +125,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
125 | #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) | 125 | #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) |
126 | extern u64 host_xcr0; | 126 | extern u64 host_xcr0; |
127 | 127 | ||
128 | extern unsigned int min_timer_period_us; | ||
129 | |||
128 | extern struct static_key kvm_no_apic_vcpu; | 130 | extern struct static_key kvm_no_apic_vcpu; |
129 | #endif | 131 | #endif |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index bdf8532494fe..ad1fb5f53925 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -233,13 +233,13 @@ static void lguest_end_context_switch(struct task_struct *next) | |||
233 | * flags word contains all kind of stuff, but in practice Linux only cares | 233 | * flags word contains all kind of stuff, but in practice Linux only cares |
234 | * about the interrupt flag. Our "save_flags()" just returns that. | 234 | * about the interrupt flag. Our "save_flags()" just returns that. |
235 | */ | 235 | */ |
236 | static unsigned long save_fl(void) | 236 | asmlinkage unsigned long lguest_save_fl(void) |
237 | { | 237 | { |
238 | return lguest_data.irq_enabled; | 238 | return lguest_data.irq_enabled; |
239 | } | 239 | } |
240 | 240 | ||
241 | /* Interrupts go off... */ | 241 | /* Interrupts go off... */ |
242 | static void irq_disable(void) | 242 | asmlinkage void lguest_irq_disable(void) |
243 | { | 243 | { |
244 | lguest_data.irq_enabled = 0; | 244 | lguest_data.irq_enabled = 0; |
245 | } | 245 | } |
@@ -253,8 +253,8 @@ static void irq_disable(void) | |||
253 | * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the | 253 | * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the |
254 | * C function, then restores it. | 254 | * C function, then restores it. |
255 | */ | 255 | */ |
256 | PV_CALLEE_SAVE_REGS_THUNK(save_fl); | 256 | PV_CALLEE_SAVE_REGS_THUNK(lguest_save_fl); |
257 | PV_CALLEE_SAVE_REGS_THUNK(irq_disable); | 257 | PV_CALLEE_SAVE_REGS_THUNK(lguest_irq_disable); |
258 | /*:*/ | 258 | /*:*/ |
259 | 259 | ||
260 | /* These are in i386_head.S */ | 260 | /* These are in i386_head.S */ |
@@ -1291,9 +1291,9 @@ __init void lguest_init(void) | |||
1291 | */ | 1291 | */ |
1292 | 1292 | ||
1293 | /* Interrupt-related operations */ | 1293 | /* Interrupt-related operations */ |
1294 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); | 1294 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(lguest_save_fl); |
1295 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); | 1295 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); |
1296 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); | 1296 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(lguest_irq_disable); |
1297 | pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable); | 1297 | pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable); |
1298 | pv_irq_ops.safe_halt = lguest_safe_halt; | 1298 | pv_irq_ops.safe_halt = lguest_safe_halt; |
1299 | 1299 | ||
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 992d63bb154f..eabcb6e6a900 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -24,7 +24,7 @@ lib-$(CONFIG_SMP) += rwlock.o | |||
24 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o | 24 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o |
25 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o | 25 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o |
26 | 26 | ||
27 | obj-y += msr.o msr-reg.o msr-reg-export.o | 27 | obj-y += msr.o msr-reg.o msr-reg-export.o hash.o |
28 | 28 | ||
29 | ifeq ($(CONFIG_X86_32),y) | 29 | ifeq ($(CONFIG_X86_32),y) |
30 | obj-y += atomic64_32.o | 30 | obj-y += atomic64_32.o |
diff --git a/arch/x86/lib/hash.c b/arch/x86/lib/hash.c new file mode 100644 index 000000000000..3056702e81fb --- /dev/null +++ b/arch/x86/lib/hash.c | |||
@@ -0,0 +1,88 @@ | |||
1 | /* | ||
2 | * Some portions derived from code covered by the following notice: | ||
3 | * | ||
4 | * Copyright (c) 2010-2013 Intel Corporation. All rights reserved. | ||
5 | * All rights reserved. | ||
6 | * | ||
7 | * Redistribution and use in source and binary forms, with or without | ||
8 | * modification, are permitted provided that the following conditions | ||
9 | * are met: | ||
10 | * | ||
11 | * * Redistributions of source code must retain the above copyright | ||
12 | * notice, this list of conditions and the following disclaimer. | ||
13 | * * Redistributions in binary form must reproduce the above copyright | ||
14 | * notice, this list of conditions and the following disclaimer in | ||
15 | * the documentation and/or other materials provided with the | ||
16 | * distribution. | ||
17 | * * Neither the name of Intel Corporation nor the names of its | ||
18 | * contributors may be used to endorse or promote products derived | ||
19 | * from this software without specific prior written permission. | ||
20 | * | ||
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
32 | */ | ||
33 | |||
34 | #include <linux/hash.h> | ||
35 | |||
36 | #include <asm/processor.h> | ||
37 | #include <asm/cpufeature.h> | ||
38 | #include <asm/hash.h> | ||
39 | |||
40 | static inline u32 crc32_u32(u32 crc, u32 val) | ||
41 | { | ||
42 | asm ("crc32l %1,%0\n" : "+r" (crc) : "rm" (val)); | ||
43 | return crc; | ||
44 | } | ||
45 | |||
46 | static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed) | ||
47 | { | ||
48 | const u32 *p32 = (const u32 *) data; | ||
49 | u32 i, tmp = 0; | ||
50 | |||
51 | for (i = 0; i < len / 4; i++) | ||
52 | seed = crc32_u32(*p32++, seed); | ||
53 | |||
54 | switch (3 - (len & 0x03)) { | ||
55 | case 0: | ||
56 | tmp |= *((const u8 *) p32 + 2) << 16; | ||
57 | /* fallthrough */ | ||
58 | case 1: | ||
59 | tmp |= *((const u8 *) p32 + 1) << 8; | ||
60 | /* fallthrough */ | ||
61 | case 2: | ||
62 | tmp |= *((const u8 *) p32); | ||
63 | seed = crc32_u32(tmp, seed); | ||
64 | default: | ||
65 | break; | ||
66 | } | ||
67 | |||
68 | return seed; | ||
69 | } | ||
70 | |||
71 | static u32 intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed) | ||
72 | { | ||
73 | const u32 *p32 = (const u32 *) data; | ||
74 | u32 i; | ||
75 | |||
76 | for (i = 0; i < len; i++) | ||
77 | seed = crc32_u32(*p32++, seed); | ||
78 | |||
79 | return seed; | ||
80 | } | ||
81 | |||
82 | void setup_arch_fast_hash(struct fast_hash_ops *ops) | ||
83 | { | ||
84 | if (cpu_has_xmm4_2) { | ||
85 | ops->hash = intel_crc4_2_hash; | ||
86 | ops->hash2 = intel_crc4_2_hash2; | ||
87 | } | ||
88 | } | ||
diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c index 59d353d2c599..a5449089cd9f 100644 --- a/arch/x86/math-emu/errors.c +++ b/arch/x86/math-emu/errors.c | |||
@@ -330,11 +330,6 @@ asmlinkage void FPU_exception(int n) | |||
330 | 330 | ||
331 | RE_ENTRANT_CHECK_OFF; | 331 | RE_ENTRANT_CHECK_OFF; |
332 | if ((~control_word & n & CW_Exceptions) || (n == EX_INTERNAL)) { | 332 | if ((~control_word & n & CW_Exceptions) || (n == EX_INTERNAL)) { |
333 | #ifdef PRINT_MESSAGES | ||
334 | /* My message from the sponsor */ | ||
335 | printk(FPU_VERSION " " __DATE__ " (C) W. Metzenthen.\n"); | ||
336 | #endif /* PRINT_MESSAGES */ | ||
337 | |||
338 | /* Get a name string for error reporting */ | 333 | /* Get a name string for error reporting */ |
339 | for (i = 0; exception_names[i].type; i++) | 334 | for (i = 0; exception_names[i].type; i++) |
340 | if ((exception_names[i].type & n) == | 335 | if ((exception_names[i].type & n) == |
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 0596e8e0cc19..207d9aef662d 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -108,8 +108,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | |||
108 | 108 | ||
109 | static inline void get_head_page_multiple(struct page *page, int nr) | 109 | static inline void get_head_page_multiple(struct page *page, int nr) |
110 | { | 110 | { |
111 | VM_BUG_ON(page != compound_head(page)); | 111 | VM_BUG_ON_PAGE(page != compound_head(page), page); |
112 | VM_BUG_ON(page_count(page) == 0); | 112 | VM_BUG_ON_PAGE(page_count(page) == 0, page); |
113 | atomic_add(nr, &page->_count); | 113 | atomic_add(nr, &page->_count); |
114 | SetPageReferenced(page); | 114 | SetPageReferenced(page); |
115 | } | 115 | } |
@@ -135,7 +135,7 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, | |||
135 | head = pte_page(pte); | 135 | head = pte_page(pte); |
136 | page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); | 136 | page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); |
137 | do { | 137 | do { |
138 | VM_BUG_ON(compound_head(page) != head); | 138 | VM_BUG_ON_PAGE(compound_head(page) != head, page); |
139 | pages[*nr] = page; | 139 | pages[*nr] = page; |
140 | if (PageTail(page)) | 140 | if (PageTail(page)) |
141 | get_huge_page_tail(page); | 141 | get_huge_page_tail(page); |
@@ -212,7 +212,7 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, | |||
212 | head = pte_page(pte); | 212 | head = pte_page(pte); |
213 | page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); | 213 | page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); |
214 | do { | 214 | do { |
215 | VM_BUG_ON(compound_head(page) != head); | 215 | VM_BUG_ON_PAGE(compound_head(page) != head, page); |
216 | pages[*nr] = page; | 216 | pages[*nr] = page; |
217 | if (PageTail(page)) | 217 | if (PageTail(page)) |
218 | get_huge_page_tail(page); | 218 | get_huge_page_tail(page); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 5bdc5430597c..e39504878aec 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -665,7 +665,7 @@ void __init initmem_init(void) | |||
665 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | 665 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
666 | #endif | 666 | #endif |
667 | 667 | ||
668 | memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); | 668 | memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); |
669 | sparse_memory_present_with_active_regions(0); | 669 | sparse_memory_present_with_active_regions(0); |
670 | 670 | ||
671 | #ifdef CONFIG_FLATMEM | 671 | #ifdef CONFIG_FLATMEM |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 104d56a9245f..f35c66c5959a 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -643,7 +643,7 @@ kernel_physical_mapping_init(unsigned long start, | |||
643 | #ifndef CONFIG_NUMA | 643 | #ifndef CONFIG_NUMA |
644 | void __init initmem_init(void) | 644 | void __init initmem_init(void) |
645 | { | 645 | { |
646 | memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); | 646 | memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); |
647 | } | 647 | } |
648 | #endif | 648 | #endif |
649 | 649 | ||
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 8dabbed409ee..1e9da795767a 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c | |||
@@ -74,7 +74,7 @@ static void __init do_one_pass(u64 pattern, u64 start, u64 end) | |||
74 | u64 i; | 74 | u64 i; |
75 | phys_addr_t this_start, this_end; | 75 | phys_addr_t this_start, this_end; |
76 | 76 | ||
77 | for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) { | 77 | for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) { |
78 | this_start = clamp_t(phys_addr_t, this_start, start, end); | 78 | this_start = clamp_t(phys_addr_t, this_start, start, end); |
79 | this_end = clamp_t(phys_addr_t, this_end, start, end); | 79 | this_end = clamp_t(phys_addr_t, this_end, start, end); |
80 | if (this_start < this_end) { | 80 | if (this_start < this_end) { |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index c85da7bb6b60..81b2750f3666 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -491,7 +491,16 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) | |||
491 | 491 | ||
492 | for (i = 0; i < mi->nr_blks; i++) { | 492 | for (i = 0; i < mi->nr_blks; i++) { |
493 | struct numa_memblk *mb = &mi->blk[i]; | 493 | struct numa_memblk *mb = &mi->blk[i]; |
494 | memblock_set_node(mb->start, mb->end - mb->start, mb->nid); | 494 | memblock_set_node(mb->start, mb->end - mb->start, |
495 | &memblock.memory, mb->nid); | ||
496 | |||
497 | /* | ||
498 | * At this time, all memory regions reserved by memblock are | ||
499 | * used by the kernel. Set the nid in memblock.reserved will | ||
500 | * mark out all the nodes the kernel resides in. | ||
501 | */ | ||
502 | memblock_set_node(mb->start, mb->end - mb->start, | ||
503 | &memblock.reserved, mb->nid); | ||
495 | } | 504 | } |
496 | 505 | ||
497 | /* | 506 | /* |
@@ -553,6 +562,30 @@ static void __init numa_init_array(void) | |||
553 | } | 562 | } |
554 | } | 563 | } |
555 | 564 | ||
565 | static void __init numa_clear_kernel_node_hotplug(void) | ||
566 | { | ||
567 | int i, nid; | ||
568 | nodemask_t numa_kernel_nodes; | ||
569 | unsigned long start, end; | ||
570 | struct memblock_type *type = &memblock.reserved; | ||
571 | |||
572 | /* Mark all kernel nodes. */ | ||
573 | for (i = 0; i < type->cnt; i++) | ||
574 | node_set(type->regions[i].nid, numa_kernel_nodes); | ||
575 | |||
576 | /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */ | ||
577 | for (i = 0; i < numa_meminfo.nr_blks; i++) { | ||
578 | nid = numa_meminfo.blk[i].nid; | ||
579 | if (!node_isset(nid, numa_kernel_nodes)) | ||
580 | continue; | ||
581 | |||
582 | start = numa_meminfo.blk[i].start; | ||
583 | end = numa_meminfo.blk[i].end; | ||
584 | |||
585 | memblock_clear_hotplug(start, end - start); | ||
586 | } | ||
587 | } | ||
588 | |||
556 | static int __init numa_init(int (*init_func)(void)) | 589 | static int __init numa_init(int (*init_func)(void)) |
557 | { | 590 | { |
558 | int i; | 591 | int i; |
@@ -565,7 +598,12 @@ static int __init numa_init(int (*init_func)(void)) | |||
565 | nodes_clear(node_possible_map); | 598 | nodes_clear(node_possible_map); |
566 | nodes_clear(node_online_map); | 599 | nodes_clear(node_online_map); |
567 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); | 600 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); |
568 | WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES)); | 601 | WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory, |
602 | MAX_NUMNODES)); | ||
603 | WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved, | ||
604 | MAX_NUMNODES)); | ||
605 | /* In case that parsing SRAT failed. */ | ||
606 | WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX)); | ||
569 | numa_reset_distance(); | 607 | numa_reset_distance(); |
570 | 608 | ||
571 | ret = init_func(); | 609 | ret = init_func(); |
@@ -601,6 +639,16 @@ static int __init numa_init(int (*init_func)(void)) | |||
601 | numa_clear_node(i); | 639 | numa_clear_node(i); |
602 | } | 640 | } |
603 | numa_init_array(); | 641 | numa_init_array(); |
642 | |||
643 | /* | ||
644 | * At very early time, the kernel have to use some memory such as | ||
645 | * loading the kernel image. We cannot prevent this anyway. So any | ||
646 | * node the kernel resides in should be un-hotpluggable. | ||
647 | * | ||
648 | * And when we come here, numa_init() won't fail. | ||
649 | */ | ||
650 | numa_clear_kernel_node_hotplug(); | ||
651 | |||
604 | return 0; | 652 | return 0; |
605 | } | 653 | } |
606 | 654 | ||
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 5ecf65117e6f..1953e9c9391a 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c | |||
@@ -191,6 +191,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
191 | (unsigned long long) start, (unsigned long long) end - 1, | 191 | (unsigned long long) start, (unsigned long long) end - 1, |
192 | hotpluggable ? " hotplug" : ""); | 192 | hotpluggable ? " hotplug" : ""); |
193 | 193 | ||
194 | /* Mark hotplug range in memblock. */ | ||
195 | if (hotpluggable && memblock_mark_hotplug(start, ma->length)) | ||
196 | pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n", | ||
197 | (unsigned long long)start, (unsigned long long)end - 1); | ||
198 | |||
194 | return 0; | 199 | return 0; |
195 | out_err_bad_srat: | 200 | out_err_bad_srat: |
196 | bad_srat(); | 201 | bad_srat(); |
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 082e88129712..248642f4bab7 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -12,7 +12,6 @@ | |||
12 | 12 | ||
13 | #include <linux/pci.h> | 13 | #include <linux/pci.h> |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/acpi.h> | ||
16 | #include <linux/sfi_acpi.h> | 15 | #include <linux/sfi_acpi.h> |
17 | #include <linux/bitmap.h> | 16 | #include <linux/bitmap.h> |
18 | #include <linux/dmi.h> | 17 | #include <linux/dmi.h> |
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 5c90975cdf0f..43984bc1665a 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/rcupdate.h> | 14 | #include <linux/rcupdate.h> |
15 | #include <asm/e820.h> | 15 | #include <asm/e820.h> |
16 | #include <asm/pci_x86.h> | 16 | #include <asm/pci_x86.h> |
17 | #include <acpi/acpi.h> | ||
18 | 17 | ||
19 | /* Assume systems with more busses have correct MCFG */ | 18 | /* Assume systems with more busses have correct MCFG */ |
20 | #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) | 19 | #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) |
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 5eee4959785d..103e702ec5a7 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c | |||
@@ -337,7 +337,7 @@ out: | |||
337 | return ret; | 337 | return ret; |
338 | } | 338 | } |
339 | 339 | ||
340 | static void xen_initdom_restore_msi_irqs(struct pci_dev *dev, int irq) | 340 | static void xen_initdom_restore_msi_irqs(struct pci_dev *dev) |
341 | { | 341 | { |
342 | int ret = 0; | 342 | int ret = 0; |
343 | 343 | ||
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index 7145ec63c520..4df9591eadad 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c | |||
@@ -49,7 +49,8 @@ void __init efi_bgrt_init(void) | |||
49 | 49 | ||
50 | image = efi_lookup_mapped_addr(bgrt_tab->image_address); | 50 | image = efi_lookup_mapped_addr(bgrt_tab->image_address); |
51 | if (!image) { | 51 | if (!image) { |
52 | image = ioremap(bgrt_tab->image_address, sizeof(bmp_header)); | 52 | image = early_memremap(bgrt_tab->image_address, |
53 | sizeof(bmp_header)); | ||
53 | ioremapped = true; | 54 | ioremapped = true; |
54 | if (!image) | 55 | if (!image) |
55 | return; | 56 | return; |
@@ -57,7 +58,7 @@ void __init efi_bgrt_init(void) | |||
57 | 58 | ||
58 | memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); | 59 | memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); |
59 | if (ioremapped) | 60 | if (ioremapped) |
60 | iounmap(image); | 61 | early_iounmap(image, sizeof(bmp_header)); |
61 | bgrt_image_size = bmp_header.size; | 62 | bgrt_image_size = bmp_header.size; |
62 | 63 | ||
63 | bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); | 64 | bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); |
@@ -65,7 +66,8 @@ void __init efi_bgrt_init(void) | |||
65 | return; | 66 | return; |
66 | 67 | ||
67 | if (ioremapped) { | 68 | if (ioremapped) { |
68 | image = ioremap(bgrt_tab->image_address, bmp_header.size); | 69 | image = early_memremap(bgrt_tab->image_address, |
70 | bmp_header.size); | ||
69 | if (!image) { | 71 | if (!image) { |
70 | kfree(bgrt_image); | 72 | kfree(bgrt_image); |
71 | bgrt_image = NULL; | 73 | bgrt_image = NULL; |
@@ -75,5 +77,5 @@ void __init efi_bgrt_init(void) | |||
75 | 77 | ||
76 | memcpy_fromio(bgrt_image, image, bgrt_image_size); | 78 | memcpy_fromio(bgrt_image, image, bgrt_image_size); |
77 | if (ioremapped) | 79 | if (ioremapped) |
78 | iounmap(image); | 80 | early_iounmap(image, bmp_header.size); |
79 | } | 81 | } |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h b/arch/x86/platform/intel-mid/device_libs/platform_ipc.h index 8f568dd79605..79bb09d4f718 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h +++ b/arch/x86/platform/intel-mid/device_libs/platform_ipc.h | |||
@@ -12,6 +12,7 @@ | |||
12 | #ifndef _PLATFORM_IPC_H_ | 12 | #ifndef _PLATFORM_IPC_H_ |
13 | #define _PLATFORM_IPC_H_ | 13 | #define _PLATFORM_IPC_H_ |
14 | 14 | ||
15 | extern void __init ipc_device_handler(struct sfi_device_table_entry *pentry, | 15 | void __init |
16 | struct devs_id *dev) __attribute__((weak)); | 16 | ipc_device_handler(struct sfi_device_table_entry *pentry, struct devs_id *dev); |
17 | |||
17 | #endif | 18 | #endif |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic.h b/arch/x86/platform/intel-mid/device_libs/platform_msic.h index 917eb56d77da..b7be1d041da2 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic.h +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic.h | |||
@@ -14,6 +14,6 @@ | |||
14 | 14 | ||
15 | extern struct intel_msic_platform_data msic_pdata; | 15 | extern struct intel_msic_platform_data msic_pdata; |
16 | 16 | ||
17 | extern void *msic_generic_platform_data(void *info, | 17 | void *msic_generic_platform_data(void *info, enum intel_msic_block block); |
18 | enum intel_msic_block block) __attribute__((weak)); | 18 | |
19 | #endif | 19 | #endif |
diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h index a537ffc16299..46aa25c8ce06 100644 --- a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h +++ b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h | |||
@@ -14,6 +14,6 @@ | |||
14 | /* For every CPU addition a new get_<cpuname>_ops interface needs | 14 | /* For every CPU addition a new get_<cpuname>_ops interface needs |
15 | * to be added. | 15 | * to be added. |
16 | */ | 16 | */ |
17 | extern void * __cpuinit get_penwell_ops(void) __attribute__((weak)); | 17 | extern void *get_penwell_ops(void) __attribute__((weak)); |
18 | extern void * __cpuinit get_cloverview_ops(void) __attribute__((weak)); | 18 | extern void *get_cloverview_ops(void) __attribute__((weak)); |
19 | extern void * __init get_tangier_ops(void) __attribute__((weak)); | 19 | extern void *get_tangier_ops(void) __attribute__((weak)); |
diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c index 4f7884eebc14..23381d2174ae 100644 --- a/arch/x86/platform/intel-mid/mfld.c +++ b/arch/x86/platform/intel-mid/mfld.c | |||
@@ -58,18 +58,18 @@ static unsigned long __init mfld_calibrate_tsc(void) | |||
58 | return 0; | 58 | return 0; |
59 | } | 59 | } |
60 | 60 | ||
61 | static void __init penwell_arch_setup() | 61 | static void __init penwell_arch_setup(void) |
62 | { | 62 | { |
63 | x86_platform.calibrate_tsc = mfld_calibrate_tsc; | 63 | x86_platform.calibrate_tsc = mfld_calibrate_tsc; |
64 | pm_power_off = mfld_power_off; | 64 | pm_power_off = mfld_power_off; |
65 | } | 65 | } |
66 | 66 | ||
67 | void * __cpuinit get_penwell_ops() | 67 | void *get_penwell_ops(void) |
68 | { | 68 | { |
69 | return &penwell_ops; | 69 | return &penwell_ops; |
70 | } | 70 | } |
71 | 71 | ||
72 | void * __cpuinit get_cloverview_ops() | 72 | void *get_cloverview_ops(void) |
73 | { | 73 | { |
74 | return &penwell_ops; | 74 | return &penwell_ops; |
75 | } | 75 | } |
diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfl.c index 09d10159e7b7..aaca91753d32 100644 --- a/arch/x86/platform/intel-mid/mrfl.c +++ b/arch/x86/platform/intel-mid/mrfl.c | |||
@@ -97,7 +97,7 @@ static struct intel_mid_ops tangier_ops = { | |||
97 | .arch_setup = tangier_arch_setup, | 97 | .arch_setup = tangier_arch_setup, |
98 | }; | 98 | }; |
99 | 99 | ||
100 | void * __cpuinit get_tangier_ops() | 100 | void *get_tangier_ops(void) |
101 | { | 101 | { |
102 | return &tangier_ops; | 102 | return &tangier_ops; |
103 | } | 103 | } |
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c index 649a12befba9..08e350e757dc 100644 --- a/arch/x86/platform/olpc/olpc-xo15-sci.c +++ b/arch/x86/platform/olpc/olpc-xo15-sci.c | |||
@@ -15,8 +15,7 @@ | |||
15 | #include <linux/power_supply.h> | 15 | #include <linux/power_supply.h> |
16 | #include <linux/olpc-ec.h> | 16 | #include <linux/olpc-ec.h> |
17 | 17 | ||
18 | #include <acpi/acpi_bus.h> | 18 | #include <linux/acpi.h> |
19 | #include <acpi/acpi_drivers.h> | ||
20 | #include <asm/olpc.h> | 19 | #include <asm/olpc.h> |
21 | 20 | ||
22 | #define DRV_NAME "olpc-xo15-sci" | 21 | #define DRV_NAME "olpc-xo15-sci" |
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 8eeccba73130..be27da60dc8f 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c | |||
@@ -74,7 +74,6 @@ static atomic_t uv_in_nmi; | |||
74 | static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1); | 74 | static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1); |
75 | static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1); | 75 | static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1); |
76 | static atomic_t uv_nmi_slave_continue; | 76 | static atomic_t uv_nmi_slave_continue; |
77 | static atomic_t uv_nmi_kexec_failed; | ||
78 | static cpumask_var_t uv_nmi_cpu_mask; | 77 | static cpumask_var_t uv_nmi_cpu_mask; |
79 | 78 | ||
80 | /* Values for uv_nmi_slave_continue */ | 79 | /* Values for uv_nmi_slave_continue */ |
@@ -149,7 +148,8 @@ module_param_named(retry_count, uv_nmi_retry_count, int, 0644); | |||
149 | * "dump" - dump process stack for each cpu | 148 | * "dump" - dump process stack for each cpu |
150 | * "ips" - dump IP info for each cpu | 149 | * "ips" - dump IP info for each cpu |
151 | * "kdump" - do crash dump | 150 | * "kdump" - do crash dump |
152 | * "kdb" - enter KDB/KGDB (default) | 151 | * "kdb" - enter KDB (default) |
152 | * "kgdb" - enter KGDB | ||
153 | */ | 153 | */ |
154 | static char uv_nmi_action[8] = "kdb"; | 154 | static char uv_nmi_action[8] = "kdb"; |
155 | module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644); | 155 | module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644); |
@@ -504,6 +504,7 @@ static void uv_nmi_touch_watchdogs(void) | |||
504 | } | 504 | } |
505 | 505 | ||
506 | #if defined(CONFIG_KEXEC) | 506 | #if defined(CONFIG_KEXEC) |
507 | static atomic_t uv_nmi_kexec_failed; | ||
507 | static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) | 508 | static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) |
508 | { | 509 | { |
509 | /* Call crash to dump system state */ | 510 | /* Call crash to dump system state */ |
@@ -537,18 +538,45 @@ static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) | |||
537 | } | 538 | } |
538 | #endif /* !CONFIG_KEXEC */ | 539 | #endif /* !CONFIG_KEXEC */ |
539 | 540 | ||
541 | #ifdef CONFIG_KGDB | ||
540 | #ifdef CONFIG_KGDB_KDB | 542 | #ifdef CONFIG_KGDB_KDB |
541 | /* Call KDB from NMI handler */ | 543 | static inline int uv_nmi_kdb_reason(void) |
542 | static void uv_call_kdb(int cpu, struct pt_regs *regs, int master) | ||
543 | { | 544 | { |
544 | int ret; | 545 | return KDB_REASON_SYSTEM_NMI; |
546 | } | ||
547 | #else /* !CONFIG_KGDB_KDB */ | ||
548 | static inline int uv_nmi_kdb_reason(void) | ||
549 | { | ||
550 | /* Insure user is expecting to attach gdb remote */ | ||
551 | if (uv_nmi_action_is("kgdb")) | ||
552 | return 0; | ||
553 | |||
554 | pr_err("UV: NMI error: KDB is not enabled in this kernel\n"); | ||
555 | return -1; | ||
556 | } | ||
557 | #endif /* CONFIG_KGDB_KDB */ | ||
545 | 558 | ||
559 | /* | ||
560 | * Call KGDB/KDB from NMI handler | ||
561 | * | ||
562 | * Note that if both KGDB and KDB are configured, then the action of 'kgdb' or | ||
563 | * 'kdb' has no affect on which is used. See the KGDB documention for further | ||
564 | * information. | ||
565 | */ | ||
566 | static void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master) | ||
567 | { | ||
546 | if (master) { | 568 | if (master) { |
569 | int reason = uv_nmi_kdb_reason(); | ||
570 | int ret; | ||
571 | |||
572 | if (reason < 0) | ||
573 | return; | ||
574 | |||
547 | /* call KGDB NMI handler as MASTER */ | 575 | /* call KGDB NMI handler as MASTER */ |
548 | ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs, | 576 | ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs, reason, |
549 | &uv_nmi_slave_continue); | 577 | &uv_nmi_slave_continue); |
550 | if (ret) { | 578 | if (ret) { |
551 | pr_alert("KDB returned error, is kgdboc set?\n"); | 579 | pr_alert("KGDB returned error, is kgdboc set?\n"); |
552 | atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT); | 580 | atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT); |
553 | } | 581 | } |
554 | } else { | 582 | } else { |
@@ -567,12 +595,12 @@ static void uv_call_kdb(int cpu, struct pt_regs *regs, int master) | |||
567 | uv_nmi_sync_exit(master); | 595 | uv_nmi_sync_exit(master); |
568 | } | 596 | } |
569 | 597 | ||
570 | #else /* !CONFIG_KGDB_KDB */ | 598 | #else /* !CONFIG_KGDB */ |
571 | static inline void uv_call_kdb(int cpu, struct pt_regs *regs, int master) | 599 | static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master) |
572 | { | 600 | { |
573 | pr_err("UV: NMI error: KGDB/KDB is not enabled in this kernel\n"); | 601 | pr_err("UV: NMI error: KGDB is not enabled in this kernel\n"); |
574 | } | 602 | } |
575 | #endif /* !CONFIG_KGDB_KDB */ | 603 | #endif /* !CONFIG_KGDB */ |
576 | 604 | ||
577 | /* | 605 | /* |
578 | * UV NMI handler | 606 | * UV NMI handler |
@@ -606,9 +634,9 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) | |||
606 | if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) | 634 | if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) |
607 | uv_nmi_dump_state(cpu, regs, master); | 635 | uv_nmi_dump_state(cpu, regs, master); |
608 | 636 | ||
609 | /* Call KDB if enabled */ | 637 | /* Call KGDB/KDB if enabled */ |
610 | else if (uv_nmi_action_is("kdb")) | 638 | else if (uv_nmi_action_is("kdb") || uv_nmi_action_is("kgdb")) |
611 | uv_call_kdb(cpu, regs, master); | 639 | uv_call_kgdb_kdb(cpu, regs, master); |
612 | 640 | ||
613 | /* Clear per_cpu "in nmi" flag */ | 641 | /* Clear per_cpu "in nmi" flag */ |
614 | atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_OUT); | 642 | atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_OUT); |
@@ -634,7 +662,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) | |||
634 | /* | 662 | /* |
635 | * NMI handler for pulling in CPUs when perf events are grabbing our NMI | 663 | * NMI handler for pulling in CPUs when perf events are grabbing our NMI |
636 | */ | 664 | */ |
637 | int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs) | 665 | static int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs) |
638 | { | 666 | { |
639 | int ret; | 667 | int ret; |
640 | 668 | ||
@@ -651,7 +679,7 @@ int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs) | |||
651 | return ret; | 679 | return ret; |
652 | } | 680 | } |
653 | 681 | ||
654 | void uv_register_nmi_notifier(void) | 682 | static void uv_register_nmi_notifier(void) |
655 | { | 683 | { |
656 | if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) | 684 | if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) |
657 | pr_warn("UV: NMI handler failed to register\n"); | 685 | pr_warn("UV: NMI handler failed to register\n"); |
@@ -695,6 +723,5 @@ void uv_nmi_setup(void) | |||
695 | uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid]; | 723 | uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid]; |
696 | } | 724 | } |
697 | BUG_ON(!alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL)); | 725 | BUG_ON(!alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL)); |
726 | uv_register_nmi_notifier(); | ||
698 | } | 727 | } |
699 | |||
700 | |||
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 9cac82588cbc..3497f14e4dea 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile | |||
@@ -64,20 +64,7 @@ $(obj)/realmode.relocs: $(obj)/realmode.elf FORCE | |||
64 | 64 | ||
65 | # --------------------------------------------------------------------------- | 65 | # --------------------------------------------------------------------------- |
66 | 66 | ||
67 | # How to compile the 16-bit code. Note we always compile for -march=i386, | 67 | KBUILD_CFLAGS := $(LINUXINCLUDE) $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \ |
68 | # that way we can complain to the user if the CPU is insufficient. | 68 | -I$(srctree)/arch/x86/boot |
69 | KBUILD_CFLAGS := $(LINUXINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ -D_WAKEUP \ | ||
70 | -I$(srctree)/arch/x86/boot \ | ||
71 | -DDISABLE_BRANCH_PROFILING \ | ||
72 | -Wall -Wstrict-prototypes \ | ||
73 | -march=i386 -mregparm=3 \ | ||
74 | -include $(srctree)/$(src)/../../boot/code16gcc.h \ | ||
75 | -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ | ||
76 | -mno-mmx -mno-sse \ | ||
77 | $(call cc-option, -ffreestanding) \ | ||
78 | $(call cc-option, -fno-toplevel-reorder,\ | ||
79 | $(call cc-option, -fno-unit-at-a-time)) \ | ||
80 | $(call cc-option, -fno-stack-protector) \ | ||
81 | $(call cc-option, -mpreferred-stack-boundary=2) | ||
82 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ | 69 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ |
83 | GCOV_PROFILE := n | 70 | GCOV_PROFILE := n |
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 11f9285a2ff6..cfbdbdb4e173 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c | |||
@@ -1025,6 +1025,29 @@ static void emit_relocs(int as_text, int use_real_mode) | |||
1025 | } | 1025 | } |
1026 | } | 1026 | } |
1027 | 1027 | ||
1028 | /* | ||
1029 | * As an aid to debugging problems with different linkers | ||
1030 | * print summary information about the relocs. | ||
1031 | * Since different linkers tend to emit the sections in | ||
1032 | * different orders we use the section names in the output. | ||
1033 | */ | ||
1034 | static int do_reloc_info(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, | ||
1035 | const char *symname) | ||
1036 | { | ||
1037 | printf("%s\t%s\t%s\t%s\n", | ||
1038 | sec_name(sec->shdr.sh_info), | ||
1039 | rel_type(ELF_R_TYPE(rel->r_info)), | ||
1040 | symname, | ||
1041 | sec_name(sym->st_shndx)); | ||
1042 | return 0; | ||
1043 | } | ||
1044 | |||
1045 | static void print_reloc_info(void) | ||
1046 | { | ||
1047 | printf("reloc section\treloc type\tsymbol\tsymbol section\n"); | ||
1048 | walk_relocs(do_reloc_info); | ||
1049 | } | ||
1050 | |||
1028 | #if ELF_BITS == 64 | 1051 | #if ELF_BITS == 64 |
1029 | # define process process_64 | 1052 | # define process process_64 |
1030 | #else | 1053 | #else |
@@ -1032,7 +1055,8 @@ static void emit_relocs(int as_text, int use_real_mode) | |||
1032 | #endif | 1055 | #endif |
1033 | 1056 | ||
1034 | void process(FILE *fp, int use_real_mode, int as_text, | 1057 | void process(FILE *fp, int use_real_mode, int as_text, |
1035 | int show_absolute_syms, int show_absolute_relocs) | 1058 | int show_absolute_syms, int show_absolute_relocs, |
1059 | int show_reloc_info) | ||
1036 | { | 1060 | { |
1037 | regex_init(use_real_mode); | 1061 | regex_init(use_real_mode); |
1038 | read_ehdr(fp); | 1062 | read_ehdr(fp); |
@@ -1050,5 +1074,9 @@ void process(FILE *fp, int use_real_mode, int as_text, | |||
1050 | print_absolute_relocs(); | 1074 | print_absolute_relocs(); |
1051 | return; | 1075 | return; |
1052 | } | 1076 | } |
1077 | if (show_reloc_info) { | ||
1078 | print_reloc_info(); | ||
1079 | return; | ||
1080 | } | ||
1053 | emit_relocs(as_text, use_real_mode); | 1081 | emit_relocs(as_text, use_real_mode); |
1054 | } | 1082 | } |
diff --git a/arch/x86/tools/relocs.h b/arch/x86/tools/relocs.h index 07cdb1eca4fa..f59590645b68 100644 --- a/arch/x86/tools/relocs.h +++ b/arch/x86/tools/relocs.h | |||
@@ -29,8 +29,9 @@ enum symtype { | |||
29 | }; | 29 | }; |
30 | 30 | ||
31 | void process_32(FILE *fp, int use_real_mode, int as_text, | 31 | void process_32(FILE *fp, int use_real_mode, int as_text, |
32 | int show_absolute_syms, int show_absolute_relocs); | 32 | int show_absolute_syms, int show_absolute_relocs, |
33 | int show_reloc_info); | ||
33 | void process_64(FILE *fp, int use_real_mode, int as_text, | 34 | void process_64(FILE *fp, int use_real_mode, int as_text, |
34 | int show_absolute_syms, int show_absolute_relocs); | 35 | int show_absolute_syms, int show_absolute_relocs, |
35 | 36 | int show_reloc_info); | |
36 | #endif /* RELOCS_H */ | 37 | #endif /* RELOCS_H */ |
diff --git a/arch/x86/tools/relocs_common.c b/arch/x86/tools/relocs_common.c index 44d396823a53..acab636bcb34 100644 --- a/arch/x86/tools/relocs_common.c +++ b/arch/x86/tools/relocs_common.c | |||
@@ -11,12 +11,13 @@ void die(char *fmt, ...) | |||
11 | 11 | ||
12 | static void usage(void) | 12 | static void usage(void) |
13 | { | 13 | { |
14 | die("relocs [--abs-syms|--abs-relocs|--text|--realmode] vmlinux\n"); | 14 | die("relocs [--abs-syms|--abs-relocs|--reloc-info|--text|--realmode]" \ |
15 | " vmlinux\n"); | ||
15 | } | 16 | } |
16 | 17 | ||
17 | int main(int argc, char **argv) | 18 | int main(int argc, char **argv) |
18 | { | 19 | { |
19 | int show_absolute_syms, show_absolute_relocs; | 20 | int show_absolute_syms, show_absolute_relocs, show_reloc_info; |
20 | int as_text, use_real_mode; | 21 | int as_text, use_real_mode; |
21 | const char *fname; | 22 | const char *fname; |
22 | FILE *fp; | 23 | FILE *fp; |
@@ -25,6 +26,7 @@ int main(int argc, char **argv) | |||
25 | 26 | ||
26 | show_absolute_syms = 0; | 27 | show_absolute_syms = 0; |
27 | show_absolute_relocs = 0; | 28 | show_absolute_relocs = 0; |
29 | show_reloc_info = 0; | ||
28 | as_text = 0; | 30 | as_text = 0; |
29 | use_real_mode = 0; | 31 | use_real_mode = 0; |
30 | fname = NULL; | 32 | fname = NULL; |
@@ -39,6 +41,10 @@ int main(int argc, char **argv) | |||
39 | show_absolute_relocs = 1; | 41 | show_absolute_relocs = 1; |
40 | continue; | 42 | continue; |
41 | } | 43 | } |
44 | if (strcmp(arg, "--reloc-info") == 0) { | ||
45 | show_reloc_info = 1; | ||
46 | continue; | ||
47 | } | ||
42 | if (strcmp(arg, "--text") == 0) { | 48 | if (strcmp(arg, "--text") == 0) { |
43 | as_text = 1; | 49 | as_text = 1; |
44 | continue; | 50 | continue; |
@@ -67,10 +73,12 @@ int main(int argc, char **argv) | |||
67 | rewind(fp); | 73 | rewind(fp); |
68 | if (e_ident[EI_CLASS] == ELFCLASS64) | 74 | if (e_ident[EI_CLASS] == ELFCLASS64) |
69 | process_64(fp, use_real_mode, as_text, | 75 | process_64(fp, use_real_mode, as_text, |
70 | show_absolute_syms, show_absolute_relocs); | 76 | show_absolute_syms, show_absolute_relocs, |
77 | show_reloc_info); | ||
71 | else | 78 | else |
72 | process_32(fp, use_real_mode, as_text, | 79 | process_32(fp, use_real_mode, as_text, |
73 | show_absolute_syms, show_absolute_relocs); | 80 | show_absolute_syms, show_absolute_relocs, |
81 | show_reloc_info); | ||
74 | fclose(fp); | 82 | fclose(fp); |
75 | return 0; | 83 | return 0; |
76 | } | 84 | } |
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 1a3c76505649..01b90261fa38 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -51,3 +51,7 @@ config XEN_DEBUG_FS | |||
51 | Enable statistics output and various tuning options in debugfs. | 51 | Enable statistics output and various tuning options in debugfs. |
52 | Enabling this option may incur a significant performance overhead. | 52 | Enabling this option may incur a significant performance overhead. |
53 | 53 | ||
54 | config XEN_PVH | ||
55 | bool "Support for running as a PVH guest" | ||
56 | depends on X86_64 && XEN && XEN_PVHVM | ||
57 | def_bool n | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index fa6ade76ef3f..a4d7b647867f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -262,8 +262,9 @@ static void __init xen_banner(void) | |||
262 | struct xen_extraversion extra; | 262 | struct xen_extraversion extra; |
263 | HYPERVISOR_xen_version(XENVER_extraversion, &extra); | 263 | HYPERVISOR_xen_version(XENVER_extraversion, &extra); |
264 | 264 | ||
265 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 265 | pr_info("Booting paravirtualized kernel %son %s\n", |
266 | pv_info.name); | 266 | xen_feature(XENFEAT_auto_translated_physmap) ? |
267 | "with PVH extensions " : "", pv_info.name); | ||
267 | printk(KERN_INFO "Xen version: %d.%d%s%s\n", | 268 | printk(KERN_INFO "Xen version: %d.%d%s%s\n", |
268 | version >> 16, version & 0xffff, extra.extraversion, | 269 | version >> 16, version & 0xffff, extra.extraversion, |
269 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); | 270 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); |
@@ -433,7 +434,7 @@ static void __init xen_init_cpuid_mask(void) | |||
433 | 434 | ||
434 | ax = 1; | 435 | ax = 1; |
435 | cx = 0; | 436 | cx = 0; |
436 | xen_cpuid(&ax, &bx, &cx, &dx); | 437 | cpuid(1, &ax, &bx, &cx, &dx); |
437 | 438 | ||
438 | xsave_mask = | 439 | xsave_mask = |
439 | (1 << (X86_FEATURE_XSAVE % 32)) | | 440 | (1 << (X86_FEATURE_XSAVE % 32)) | |
@@ -1142,8 +1143,9 @@ void xen_setup_vcpu_info_placement(void) | |||
1142 | xen_vcpu_setup(cpu); | 1143 | xen_vcpu_setup(cpu); |
1143 | 1144 | ||
1144 | /* xen_vcpu_setup managed to place the vcpu_info within the | 1145 | /* xen_vcpu_setup managed to place the vcpu_info within the |
1145 | percpu area for all cpus, so make use of it */ | 1146 | * percpu area for all cpus, so make use of it. Note that for |
1146 | if (have_vcpu_info_placement) { | 1147 | * PVH we want to use native IRQ mechanism. */ |
1148 | if (have_vcpu_info_placement && !xen_pvh_domain()) { | ||
1147 | pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); | 1149 | pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); |
1148 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); | 1150 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); |
1149 | pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); | 1151 | pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); |
@@ -1407,9 +1409,49 @@ static void __init xen_boot_params_init_edd(void) | |||
1407 | * Set up the GDT and segment registers for -fstack-protector. Until | 1409 | * Set up the GDT and segment registers for -fstack-protector. Until |
1408 | * we do this, we have to be careful not to call any stack-protected | 1410 | * we do this, we have to be careful not to call any stack-protected |
1409 | * function, which is most of the kernel. | 1411 | * function, which is most of the kernel. |
1412 | * | ||
1413 | * Note, that it is __ref because the only caller of this after init | ||
1414 | * is PVH which is not going to use xen_load_gdt_boot or other | ||
1415 | * __init functions. | ||
1410 | */ | 1416 | */ |
1411 | static void __init xen_setup_stackprotector(void) | 1417 | static void __ref xen_setup_gdt(int cpu) |
1412 | { | 1418 | { |
1419 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
1420 | #ifdef CONFIG_X86_64 | ||
1421 | unsigned long dummy; | ||
1422 | |||
1423 | load_percpu_segment(cpu); /* We need to access per-cpu area */ | ||
1424 | switch_to_new_gdt(cpu); /* GDT and GS set */ | ||
1425 | |||
1426 | /* We are switching of the Xen provided GDT to our HVM mode | ||
1427 | * GDT. The new GDT has __KERNEL_CS with CS.L = 1 | ||
1428 | * and we are jumping to reload it. | ||
1429 | */ | ||
1430 | asm volatile ("pushq %0\n" | ||
1431 | "leaq 1f(%%rip),%0\n" | ||
1432 | "pushq %0\n" | ||
1433 | "lretq\n" | ||
1434 | "1:\n" | ||
1435 | : "=&r" (dummy) : "0" (__KERNEL_CS)); | ||
1436 | |||
1437 | /* | ||
1438 | * While not needed, we also set the %es, %ds, and %fs | ||
1439 | * to zero. We don't care about %ss as it is NULL. | ||
1440 | * Strictly speaking this is not needed as Xen zeros those | ||
1441 | * out (and also MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE) | ||
1442 | * | ||
1443 | * Linux zeros them in cpu_init() and in secondary_startup_64 | ||
1444 | * (for BSP). | ||
1445 | */ | ||
1446 | loadsegment(es, 0); | ||
1447 | loadsegment(ds, 0); | ||
1448 | loadsegment(fs, 0); | ||
1449 | #else | ||
1450 | /* PVH: TODO Implement. */ | ||
1451 | BUG(); | ||
1452 | #endif | ||
1453 | return; /* PVH does not need any PV GDT ops. */ | ||
1454 | } | ||
1413 | pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; | 1455 | pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; |
1414 | pv_cpu_ops.load_gdt = xen_load_gdt_boot; | 1456 | pv_cpu_ops.load_gdt = xen_load_gdt_boot; |
1415 | 1457 | ||
@@ -1420,6 +1462,46 @@ static void __init xen_setup_stackprotector(void) | |||
1420 | pv_cpu_ops.load_gdt = xen_load_gdt; | 1462 | pv_cpu_ops.load_gdt = xen_load_gdt; |
1421 | } | 1463 | } |
1422 | 1464 | ||
1465 | /* | ||
1466 | * A PV guest starts with default flags that are not set for PVH, set them | ||
1467 | * here asap. | ||
1468 | */ | ||
1469 | static void xen_pvh_set_cr_flags(int cpu) | ||
1470 | { | ||
1471 | |||
1472 | /* Some of these are setup in 'secondary_startup_64'. The others: | ||
1473 | * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests | ||
1474 | * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */ | ||
1475 | write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM); | ||
1476 | } | ||
1477 | |||
1478 | /* | ||
1479 | * Note, that it is ref - because the only caller of this after init | ||
1480 | * is PVH which is not going to use xen_load_gdt_boot or other | ||
1481 | * __init functions. | ||
1482 | */ | ||
1483 | void __ref xen_pvh_secondary_vcpu_init(int cpu) | ||
1484 | { | ||
1485 | xen_setup_gdt(cpu); | ||
1486 | xen_pvh_set_cr_flags(cpu); | ||
1487 | } | ||
1488 | |||
1489 | static void __init xen_pvh_early_guest_init(void) | ||
1490 | { | ||
1491 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | ||
1492 | return; | ||
1493 | |||
1494 | if (!xen_feature(XENFEAT_hvm_callback_vector)) | ||
1495 | return; | ||
1496 | |||
1497 | xen_have_vector_callback = 1; | ||
1498 | xen_pvh_set_cr_flags(0); | ||
1499 | |||
1500 | #ifdef CONFIG_X86_32 | ||
1501 | BUG(); /* PVH: Implement proper support. */ | ||
1502 | #endif | ||
1503 | } | ||
1504 | |||
1423 | /* First C function to be called on Xen boot */ | 1505 | /* First C function to be called on Xen boot */ |
1424 | asmlinkage void __init xen_start_kernel(void) | 1506 | asmlinkage void __init xen_start_kernel(void) |
1425 | { | 1507 | { |
@@ -1431,13 +1513,16 @@ asmlinkage void __init xen_start_kernel(void) | |||
1431 | 1513 | ||
1432 | xen_domain_type = XEN_PV_DOMAIN; | 1514 | xen_domain_type = XEN_PV_DOMAIN; |
1433 | 1515 | ||
1516 | xen_setup_features(); | ||
1517 | xen_pvh_early_guest_init(); | ||
1434 | xen_setup_machphys_mapping(); | 1518 | xen_setup_machphys_mapping(); |
1435 | 1519 | ||
1436 | /* Install Xen paravirt ops */ | 1520 | /* Install Xen paravirt ops */ |
1437 | pv_info = xen_info; | 1521 | pv_info = xen_info; |
1438 | pv_init_ops = xen_init_ops; | 1522 | pv_init_ops = xen_init_ops; |
1439 | pv_cpu_ops = xen_cpu_ops; | ||
1440 | pv_apic_ops = xen_apic_ops; | 1523 | pv_apic_ops = xen_apic_ops; |
1524 | if (!xen_pvh_domain()) | ||
1525 | pv_cpu_ops = xen_cpu_ops; | ||
1441 | 1526 | ||
1442 | x86_init.resources.memory_setup = xen_memory_setup; | 1527 | x86_init.resources.memory_setup = xen_memory_setup; |
1443 | x86_init.oem.arch_setup = xen_arch_setup; | 1528 | x86_init.oem.arch_setup = xen_arch_setup; |
@@ -1469,17 +1554,14 @@ asmlinkage void __init xen_start_kernel(void) | |||
1469 | /* Work out if we support NX */ | 1554 | /* Work out if we support NX */ |
1470 | x86_configure_nx(); | 1555 | x86_configure_nx(); |
1471 | 1556 | ||
1472 | xen_setup_features(); | ||
1473 | |||
1474 | /* Get mfn list */ | 1557 | /* Get mfn list */ |
1475 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 1558 | xen_build_dynamic_phys_to_machine(); |
1476 | xen_build_dynamic_phys_to_machine(); | ||
1477 | 1559 | ||
1478 | /* | 1560 | /* |
1479 | * Set up kernel GDT and segment registers, mainly so that | 1561 | * Set up kernel GDT and segment registers, mainly so that |
1480 | * -fstack-protector code can be executed. | 1562 | * -fstack-protector code can be executed. |
1481 | */ | 1563 | */ |
1482 | xen_setup_stackprotector(); | 1564 | xen_setup_gdt(0); |
1483 | 1565 | ||
1484 | xen_init_irq_ops(); | 1566 | xen_init_irq_ops(); |
1485 | xen_init_cpuid_mask(); | 1567 | xen_init_cpuid_mask(); |
@@ -1548,14 +1630,18 @@ asmlinkage void __init xen_start_kernel(void) | |||
1548 | /* set the limit of our address space */ | 1630 | /* set the limit of our address space */ |
1549 | xen_reserve_top(); | 1631 | xen_reserve_top(); |
1550 | 1632 | ||
1551 | /* We used to do this in xen_arch_setup, but that is too late on AMD | 1633 | /* PVH: runs at default kernel iopl of 0 */ |
1552 | * were early_cpu_init (run before ->arch_setup()) calls early_amd_init | 1634 | if (!xen_pvh_domain()) { |
1553 | * which pokes 0xcf8 port. | 1635 | /* |
1554 | */ | 1636 | * We used to do this in xen_arch_setup, but that is too late |
1555 | set_iopl.iopl = 1; | 1637 | * on AMD were early_cpu_init (run before ->arch_setup()) calls |
1556 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | 1638 | * early_amd_init which pokes 0xcf8 port. |
1557 | if (rc != 0) | 1639 | */ |
1558 | xen_raw_printk("physdev_op failed %d\n", rc); | 1640 | set_iopl.iopl = 1; |
1641 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | ||
1642 | if (rc != 0) | ||
1643 | xen_raw_printk("physdev_op failed %d\n", rc); | ||
1644 | } | ||
1559 | 1645 | ||
1560 | #ifdef CONFIG_X86_32 | 1646 | #ifdef CONFIG_X86_32 |
1561 | /* set up basic CPUID stuff */ | 1647 | /* set up basic CPUID stuff */ |
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 3a5f55d51907..c98583588580 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c | |||
@@ -125,3 +125,67 @@ void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) | |||
125 | apply_to_page_range(&init_mm, (unsigned long)shared, | 125 | apply_to_page_range(&init_mm, (unsigned long)shared, |
126 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); | 126 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); |
127 | } | 127 | } |
128 | #ifdef CONFIG_XEN_PVH | ||
129 | #include <xen/balloon.h> | ||
130 | #include <xen/events.h> | ||
131 | #include <xen/xen.h> | ||
132 | #include <linux/slab.h> | ||
133 | static int __init xlated_setup_gnttab_pages(void) | ||
134 | { | ||
135 | struct page **pages; | ||
136 | xen_pfn_t *pfns; | ||
137 | int rc; | ||
138 | unsigned int i; | ||
139 | unsigned long nr_grant_frames = gnttab_max_grant_frames(); | ||
140 | |||
141 | BUG_ON(nr_grant_frames == 0); | ||
142 | pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL); | ||
143 | if (!pages) | ||
144 | return -ENOMEM; | ||
145 | |||
146 | pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL); | ||
147 | if (!pfns) { | ||
148 | kfree(pages); | ||
149 | return -ENOMEM; | ||
150 | } | ||
151 | rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */); | ||
152 | if (rc) { | ||
153 | pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__, | ||
154 | nr_grant_frames, rc); | ||
155 | kfree(pages); | ||
156 | kfree(pfns); | ||
157 | return rc; | ||
158 | } | ||
159 | for (i = 0; i < nr_grant_frames; i++) | ||
160 | pfns[i] = page_to_pfn(pages[i]); | ||
161 | |||
162 | rc = arch_gnttab_map_shared(pfns, nr_grant_frames, nr_grant_frames, | ||
163 | &xen_auto_xlat_grant_frames.vaddr); | ||
164 | |||
165 | if (rc) { | ||
166 | pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__, | ||
167 | nr_grant_frames, rc); | ||
168 | free_xenballooned_pages(nr_grant_frames, pages); | ||
169 | kfree(pages); | ||
170 | kfree(pfns); | ||
171 | return rc; | ||
172 | } | ||
173 | kfree(pages); | ||
174 | |||
175 | xen_auto_xlat_grant_frames.pfn = pfns; | ||
176 | xen_auto_xlat_grant_frames.count = nr_grant_frames; | ||
177 | |||
178 | return 0; | ||
179 | } | ||
180 | |||
181 | static int __init xen_pvh_gnttab_setup(void) | ||
182 | { | ||
183 | if (!xen_pvh_domain()) | ||
184 | return -ENODEV; | ||
185 | |||
186 | return xlated_setup_gnttab_pages(); | ||
187 | } | ||
188 | /* Call it _before_ __gnttab_init as we need to initialize the | ||
189 | * xen_auto_xlat_grant_frames first. */ | ||
190 | core_initcall(xen_pvh_gnttab_setup); | ||
191 | #endif | ||
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 0da7f863056f..08f763de26fe 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <xen/interface/xen.h> | 5 | #include <xen/interface/xen.h> |
6 | #include <xen/interface/sched.h> | 6 | #include <xen/interface/sched.h> |
7 | #include <xen/interface/vcpu.h> | 7 | #include <xen/interface/vcpu.h> |
8 | #include <xen/features.h> | ||
8 | #include <xen/events.h> | 9 | #include <xen/events.h> |
9 | 10 | ||
10 | #include <asm/xen/hypercall.h> | 11 | #include <asm/xen/hypercall.h> |
@@ -22,7 +23,7 @@ void xen_force_evtchn_callback(void) | |||
22 | (void)HYPERVISOR_xen_version(0, NULL); | 23 | (void)HYPERVISOR_xen_version(0, NULL); |
23 | } | 24 | } |
24 | 25 | ||
25 | static unsigned long xen_save_fl(void) | 26 | asmlinkage unsigned long xen_save_fl(void) |
26 | { | 27 | { |
27 | struct vcpu_info *vcpu; | 28 | struct vcpu_info *vcpu; |
28 | unsigned long flags; | 29 | unsigned long flags; |
@@ -40,7 +41,7 @@ static unsigned long xen_save_fl(void) | |||
40 | } | 41 | } |
41 | PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl); | 42 | PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl); |
42 | 43 | ||
43 | static void xen_restore_fl(unsigned long flags) | 44 | __visible void xen_restore_fl(unsigned long flags) |
44 | { | 45 | { |
45 | struct vcpu_info *vcpu; | 46 | struct vcpu_info *vcpu; |
46 | 47 | ||
@@ -62,7 +63,7 @@ static void xen_restore_fl(unsigned long flags) | |||
62 | } | 63 | } |
63 | PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); | 64 | PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); |
64 | 65 | ||
65 | static void xen_irq_disable(void) | 66 | asmlinkage void xen_irq_disable(void) |
66 | { | 67 | { |
67 | /* There's a one instruction preempt window here. We need to | 68 | /* There's a one instruction preempt window here. We need to |
68 | make sure we're don't switch CPUs between getting the vcpu | 69 | make sure we're don't switch CPUs between getting the vcpu |
@@ -73,7 +74,7 @@ static void xen_irq_disable(void) | |||
73 | } | 74 | } |
74 | PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); | 75 | PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); |
75 | 76 | ||
76 | static void xen_irq_enable(void) | 77 | asmlinkage void xen_irq_enable(void) |
77 | { | 78 | { |
78 | struct vcpu_info *vcpu; | 79 | struct vcpu_info *vcpu; |
79 | 80 | ||
@@ -128,6 +129,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = { | |||
128 | 129 | ||
129 | void __init xen_init_irq_ops(void) | 130 | void __init xen_init_irq_ops(void) |
130 | { | 131 | { |
131 | pv_irq_ops = xen_irq_ops; | 132 | /* For PVH we use default pv_irq_ops settings. */ |
133 | if (!xen_feature(XENFEAT_hvm_callback_vector)) | ||
134 | pv_irq_ops = xen_irq_ops; | ||
132 | x86_init.irqs.intr_init = xen_init_IRQ; | 135 | x86_init.irqs.intr_init = xen_init_IRQ; |
133 | } | 136 | } |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ce563be09cc1..2423ef04ffea 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -431,7 +431,7 @@ static pteval_t iomap_pte(pteval_t val) | |||
431 | return val; | 431 | return val; |
432 | } | 432 | } |
433 | 433 | ||
434 | static pteval_t xen_pte_val(pte_t pte) | 434 | __visible pteval_t xen_pte_val(pte_t pte) |
435 | { | 435 | { |
436 | pteval_t pteval = pte.pte; | 436 | pteval_t pteval = pte.pte; |
437 | #if 0 | 437 | #if 0 |
@@ -448,7 +448,7 @@ static pteval_t xen_pte_val(pte_t pte) | |||
448 | } | 448 | } |
449 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); | 449 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); |
450 | 450 | ||
451 | static pgdval_t xen_pgd_val(pgd_t pgd) | 451 | __visible pgdval_t xen_pgd_val(pgd_t pgd) |
452 | { | 452 | { |
453 | return pte_mfn_to_pfn(pgd.pgd); | 453 | return pte_mfn_to_pfn(pgd.pgd); |
454 | } | 454 | } |
@@ -479,7 +479,7 @@ void xen_set_pat(u64 pat) | |||
479 | WARN_ON(pat != 0x0007010600070106ull); | 479 | WARN_ON(pat != 0x0007010600070106ull); |
480 | } | 480 | } |
481 | 481 | ||
482 | static pte_t xen_make_pte(pteval_t pte) | 482 | __visible pte_t xen_make_pte(pteval_t pte) |
483 | { | 483 | { |
484 | phys_addr_t addr = (pte & PTE_PFN_MASK); | 484 | phys_addr_t addr = (pte & PTE_PFN_MASK); |
485 | #if 0 | 485 | #if 0 |
@@ -514,14 +514,14 @@ static pte_t xen_make_pte(pteval_t pte) | |||
514 | } | 514 | } |
515 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); | 515 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); |
516 | 516 | ||
517 | static pgd_t xen_make_pgd(pgdval_t pgd) | 517 | __visible pgd_t xen_make_pgd(pgdval_t pgd) |
518 | { | 518 | { |
519 | pgd = pte_pfn_to_mfn(pgd); | 519 | pgd = pte_pfn_to_mfn(pgd); |
520 | return native_make_pgd(pgd); | 520 | return native_make_pgd(pgd); |
521 | } | 521 | } |
522 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); | 522 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); |
523 | 523 | ||
524 | static pmdval_t xen_pmd_val(pmd_t pmd) | 524 | __visible pmdval_t xen_pmd_val(pmd_t pmd) |
525 | { | 525 | { |
526 | return pte_mfn_to_pfn(pmd.pmd); | 526 | return pte_mfn_to_pfn(pmd.pmd); |
527 | } | 527 | } |
@@ -580,7 +580,7 @@ static void xen_pmd_clear(pmd_t *pmdp) | |||
580 | } | 580 | } |
581 | #endif /* CONFIG_X86_PAE */ | 581 | #endif /* CONFIG_X86_PAE */ |
582 | 582 | ||
583 | static pmd_t xen_make_pmd(pmdval_t pmd) | 583 | __visible pmd_t xen_make_pmd(pmdval_t pmd) |
584 | { | 584 | { |
585 | pmd = pte_pfn_to_mfn(pmd); | 585 | pmd = pte_pfn_to_mfn(pmd); |
586 | return native_make_pmd(pmd); | 586 | return native_make_pmd(pmd); |
@@ -588,13 +588,13 @@ static pmd_t xen_make_pmd(pmdval_t pmd) | |||
588 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); | 588 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); |
589 | 589 | ||
590 | #if PAGETABLE_LEVELS == 4 | 590 | #if PAGETABLE_LEVELS == 4 |
591 | static pudval_t xen_pud_val(pud_t pud) | 591 | __visible pudval_t xen_pud_val(pud_t pud) |
592 | { | 592 | { |
593 | return pte_mfn_to_pfn(pud.pud); | 593 | return pte_mfn_to_pfn(pud.pud); |
594 | } | 594 | } |
595 | PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); | 595 | PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); |
596 | 596 | ||
597 | static pud_t xen_make_pud(pudval_t pud) | 597 | __visible pud_t xen_make_pud(pudval_t pud) |
598 | { | 598 | { |
599 | pud = pte_pfn_to_mfn(pud); | 599 | pud = pte_pfn_to_mfn(pud); |
600 | 600 | ||
@@ -1198,44 +1198,40 @@ static void __init xen_cleanhighmap(unsigned long vaddr, | |||
1198 | * instead of somewhere later and be confusing. */ | 1198 | * instead of somewhere later and be confusing. */ |
1199 | xen_mc_flush(); | 1199 | xen_mc_flush(); |
1200 | } | 1200 | } |
1201 | #endif | 1201 | static void __init xen_pagetable_p2m_copy(void) |
1202 | static void __init xen_pagetable_init(void) | ||
1203 | { | 1202 | { |
1204 | #ifdef CONFIG_X86_64 | ||
1205 | unsigned long size; | 1203 | unsigned long size; |
1206 | unsigned long addr; | 1204 | unsigned long addr; |
1207 | #endif | 1205 | unsigned long new_mfn_list; |
1208 | paging_init(); | 1206 | |
1209 | xen_setup_shared_info(); | 1207 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
1210 | #ifdef CONFIG_X86_64 | 1208 | return; |
1211 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 1209 | |
1212 | unsigned long new_mfn_list; | 1210 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
1213 | 1211 | ||
1214 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | 1212 | new_mfn_list = xen_revector_p2m_tree(); |
1215 | 1213 | /* No memory or already called. */ | |
1216 | /* On 32-bit, we get zero so this never gets executed. */ | 1214 | if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list) |
1217 | new_mfn_list = xen_revector_p2m_tree(); | 1215 | return; |
1218 | if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { | 1216 | |
1219 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ | 1217 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ |
1220 | memset((void *)xen_start_info->mfn_list, 0xff, size); | 1218 | memset((void *)xen_start_info->mfn_list, 0xff, size); |
1221 | 1219 | ||
1222 | /* We should be in __ka space. */ | 1220 | /* We should be in __ka space. */ |
1223 | BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); | 1221 | BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); |
1224 | addr = xen_start_info->mfn_list; | 1222 | addr = xen_start_info->mfn_list; |
1225 | /* We roundup to the PMD, which means that if anybody at this stage is | 1223 | /* We roundup to the PMD, which means that if anybody at this stage is |
1226 | * using the __ka address of xen_start_info or xen_start_info->shared_info | 1224 | * using the __ka address of xen_start_info or xen_start_info->shared_info |
1227 | * they are in going to crash. Fortunatly we have already revectored | 1225 | * they are in going to crash. Fortunatly we have already revectored |
1228 | * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ | 1226 | * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ |
1229 | size = roundup(size, PMD_SIZE); | 1227 | size = roundup(size, PMD_SIZE); |
1230 | xen_cleanhighmap(addr, addr + size); | 1228 | xen_cleanhighmap(addr, addr + size); |
1231 | 1229 | ||
1232 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | 1230 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
1233 | memblock_free(__pa(xen_start_info->mfn_list), size); | 1231 | memblock_free(__pa(xen_start_info->mfn_list), size); |
1234 | /* And revector! Bye bye old array */ | 1232 | /* And revector! Bye bye old array */ |
1235 | xen_start_info->mfn_list = new_mfn_list; | 1233 | xen_start_info->mfn_list = new_mfn_list; |
1236 | } else | 1234 | |
1237 | goto skip; | ||
1238 | } | ||
1239 | /* At this stage, cleanup_highmap has already cleaned __ka space | 1235 | /* At this stage, cleanup_highmap has already cleaned __ka space |
1240 | * from _brk_limit way up to the max_pfn_mapped (which is the end of | 1236 | * from _brk_limit way up to the max_pfn_mapped (which is the end of |
1241 | * the ramdisk). We continue on, erasing PMD entries that point to page | 1237 | * the ramdisk). We continue on, erasing PMD entries that point to page |
@@ -1255,7 +1251,15 @@ static void __init xen_pagetable_init(void) | |||
1255 | * anything at this stage. */ | 1251 | * anything at this stage. */ |
1256 | xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); | 1252 | xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); |
1257 | #endif | 1253 | #endif |
1258 | skip: | 1254 | } |
1255 | #endif | ||
1256 | |||
1257 | static void __init xen_pagetable_init(void) | ||
1258 | { | ||
1259 | paging_init(); | ||
1260 | xen_setup_shared_info(); | ||
1261 | #ifdef CONFIG_X86_64 | ||
1262 | xen_pagetable_p2m_copy(); | ||
1259 | #endif | 1263 | #endif |
1260 | xen_post_allocator_init(); | 1264 | xen_post_allocator_init(); |
1261 | } | 1265 | } |
@@ -1753,6 +1757,10 @@ static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags) | |||
1753 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; | 1757 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; |
1754 | pte_t pte = pfn_pte(pfn, prot); | 1758 | pte_t pte = pfn_pte(pfn, prot); |
1755 | 1759 | ||
1760 | /* For PVH no need to set R/O or R/W to pin them or unpin them. */ | ||
1761 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
1762 | return; | ||
1763 | |||
1756 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) | 1764 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) |
1757 | BUG(); | 1765 | BUG(); |
1758 | } | 1766 | } |
@@ -1863,6 +1871,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, | |||
1863 | * but that's enough to get __va working. We need to fill in the rest | 1871 | * but that's enough to get __va working. We need to fill in the rest |
1864 | * of the physical mapping once some sort of allocator has been set | 1872 | * of the physical mapping once some sort of allocator has been set |
1865 | * up. | 1873 | * up. |
1874 | * NOTE: for PVH, the page tables are native. | ||
1866 | */ | 1875 | */ |
1867 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | 1876 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) |
1868 | { | 1877 | { |
@@ -1884,17 +1893,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1884 | /* Zap identity mapping */ | 1893 | /* Zap identity mapping */ |
1885 | init_level4_pgt[0] = __pgd(0); | 1894 | init_level4_pgt[0] = __pgd(0); |
1886 | 1895 | ||
1887 | /* Pre-constructed entries are in pfn, so convert to mfn */ | 1896 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
1888 | /* L4[272] -> level3_ident_pgt | 1897 | /* Pre-constructed entries are in pfn, so convert to mfn */ |
1889 | * L4[511] -> level3_kernel_pgt */ | 1898 | /* L4[272] -> level3_ident_pgt |
1890 | convert_pfn_mfn(init_level4_pgt); | 1899 | * L4[511] -> level3_kernel_pgt */ |
1891 | 1900 | convert_pfn_mfn(init_level4_pgt); | |
1892 | /* L3_i[0] -> level2_ident_pgt */ | 1901 | |
1893 | convert_pfn_mfn(level3_ident_pgt); | 1902 | /* L3_i[0] -> level2_ident_pgt */ |
1894 | /* L3_k[510] -> level2_kernel_pgt | 1903 | convert_pfn_mfn(level3_ident_pgt); |
1895 | * L3_i[511] -> level2_fixmap_pgt */ | 1904 | /* L3_k[510] -> level2_kernel_pgt |
1896 | convert_pfn_mfn(level3_kernel_pgt); | 1905 | * L3_i[511] -> level2_fixmap_pgt */ |
1897 | 1906 | convert_pfn_mfn(level3_kernel_pgt); | |
1907 | } | ||
1898 | /* We get [511][511] and have Xen's version of level2_kernel_pgt */ | 1908 | /* We get [511][511] and have Xen's version of level2_kernel_pgt */ |
1899 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); | 1909 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); |
1900 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); | 1910 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); |
@@ -1918,31 +1928,33 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1918 | copy_page(level2_fixmap_pgt, l2); | 1928 | copy_page(level2_fixmap_pgt, l2); |
1919 | /* Note that we don't do anything with level1_fixmap_pgt which | 1929 | /* Note that we don't do anything with level1_fixmap_pgt which |
1920 | * we don't need. */ | 1930 | * we don't need. */ |
1931 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | ||
1932 | /* Make pagetable pieces RO */ | ||
1933 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | ||
1934 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | ||
1935 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | ||
1936 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | ||
1937 | set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); | ||
1938 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | ||
1939 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | ||
1940 | |||
1941 | /* Pin down new L4 */ | ||
1942 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, | ||
1943 | PFN_DOWN(__pa_symbol(init_level4_pgt))); | ||
1944 | |||
1945 | /* Unpin Xen-provided one */ | ||
1946 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1921 | 1947 | ||
1922 | /* Make pagetable pieces RO */ | 1948 | /* |
1923 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | 1949 | * At this stage there can be no user pgd, and no page |
1924 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | 1950 | * structure to attach it to, so make sure we just set kernel |
1925 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | 1951 | * pgd. |
1926 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | 1952 | */ |
1927 | set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); | 1953 | xen_mc_batch(); |
1928 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | 1954 | __xen_write_cr3(true, __pa(init_level4_pgt)); |
1929 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | 1955 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
1930 | 1956 | } else | |
1931 | /* Pin down new L4 */ | 1957 | native_write_cr3(__pa(init_level4_pgt)); |
1932 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, | ||
1933 | PFN_DOWN(__pa_symbol(init_level4_pgt))); | ||
1934 | |||
1935 | /* Unpin Xen-provided one */ | ||
1936 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1937 | |||
1938 | /* | ||
1939 | * At this stage there can be no user pgd, and no page | ||
1940 | * structure to attach it to, so make sure we just set kernel | ||
1941 | * pgd. | ||
1942 | */ | ||
1943 | xen_mc_batch(); | ||
1944 | __xen_write_cr3(true, __pa(init_level4_pgt)); | ||
1945 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
1946 | 1958 | ||
1947 | /* We can't that easily rip out L3 and L2, as the Xen pagetables are | 1959 | /* We can't that easily rip out L3 and L2, as the Xen pagetables are |
1948 | * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for | 1960 | * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for |
@@ -2103,6 +2115,9 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
2103 | 2115 | ||
2104 | static void __init xen_post_allocator_init(void) | 2116 | static void __init xen_post_allocator_init(void) |
2105 | { | 2117 | { |
2118 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
2119 | return; | ||
2120 | |||
2106 | pv_mmu_ops.set_pte = xen_set_pte; | 2121 | pv_mmu_ops.set_pte = xen_set_pte; |
2107 | pv_mmu_ops.set_pmd = xen_set_pmd; | 2122 | pv_mmu_ops.set_pmd = xen_set_pmd; |
2108 | pv_mmu_ops.set_pud = xen_set_pud; | 2123 | pv_mmu_ops.set_pud = xen_set_pud; |
@@ -2207,6 +2222,15 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { | |||
2207 | void __init xen_init_mmu_ops(void) | 2222 | void __init xen_init_mmu_ops(void) |
2208 | { | 2223 | { |
2209 | x86_init.paging.pagetable_init = xen_pagetable_init; | 2224 | x86_init.paging.pagetable_init = xen_pagetable_init; |
2225 | |||
2226 | /* Optimization - we can use the HVM one but it has no idea which | ||
2227 | * VCPUs are descheduled - which means that it will needlessly IPI | ||
2228 | * them. Xen knows so let it do the job. | ||
2229 | */ | ||
2230 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
2231 | pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others; | ||
2232 | return; | ||
2233 | } | ||
2210 | pv_mmu_ops = xen_mmu_ops; | 2234 | pv_mmu_ops = xen_mmu_ops; |
2211 | 2235 | ||
2212 | memset(dummy_mapping, 0xff, PAGE_SIZE); | 2236 | memset(dummy_mapping, 0xff, PAGE_SIZE); |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 2ae8699e8767..8009acbe41e4 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -280,6 +280,9 @@ void __ref xen_build_mfn_list_list(void) | |||
280 | { | 280 | { |
281 | unsigned long pfn; | 281 | unsigned long pfn; |
282 | 282 | ||
283 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
284 | return; | ||
285 | |||
283 | /* Pre-initialize p2m_top_mfn to be completely missing */ | 286 | /* Pre-initialize p2m_top_mfn to be completely missing */ |
284 | if (p2m_top_mfn == NULL) { | 287 | if (p2m_top_mfn == NULL) { |
285 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | 288 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); |
@@ -336,6 +339,9 @@ void __ref xen_build_mfn_list_list(void) | |||
336 | 339 | ||
337 | void xen_setup_mfn_list_list(void) | 340 | void xen_setup_mfn_list_list(void) |
338 | { | 341 | { |
342 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
343 | return; | ||
344 | |||
339 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | 345 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
340 | 346 | ||
341 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | 347 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = |
@@ -346,10 +352,15 @@ void xen_setup_mfn_list_list(void) | |||
346 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | 352 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ |
347 | void __init xen_build_dynamic_phys_to_machine(void) | 353 | void __init xen_build_dynamic_phys_to_machine(void) |
348 | { | 354 | { |
349 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | 355 | unsigned long *mfn_list; |
350 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | 356 | unsigned long max_pfn; |
351 | unsigned long pfn; | 357 | unsigned long pfn; |
352 | 358 | ||
359 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
360 | return; | ||
361 | |||
362 | mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
363 | max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
353 | xen_max_p2m_pfn = max_pfn; | 364 | xen_max_p2m_pfn = max_pfn; |
354 | 365 | ||
355 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | 366 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); |
@@ -888,13 +899,6 @@ int m2p_add_override(unsigned long mfn, struct page *page, | |||
888 | "m2p_add_override: pfn %lx not mapped", pfn)) | 899 | "m2p_add_override: pfn %lx not mapped", pfn)) |
889 | return -EINVAL; | 900 | return -EINVAL; |
890 | } | 901 | } |
891 | WARN_ON(PagePrivate(page)); | ||
892 | SetPagePrivate(page); | ||
893 | set_page_private(page, mfn); | ||
894 | page->index = pfn_to_mfn(pfn); | ||
895 | |||
896 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) | ||
897 | return -ENOMEM; | ||
898 | 902 | ||
899 | if (kmap_op != NULL) { | 903 | if (kmap_op != NULL) { |
900 | if (!PageHighMem(page)) { | 904 | if (!PageHighMem(page)) { |
@@ -933,19 +937,16 @@ int m2p_add_override(unsigned long mfn, struct page *page, | |||
933 | } | 937 | } |
934 | EXPORT_SYMBOL_GPL(m2p_add_override); | 938 | EXPORT_SYMBOL_GPL(m2p_add_override); |
935 | int m2p_remove_override(struct page *page, | 939 | int m2p_remove_override(struct page *page, |
936 | struct gnttab_map_grant_ref *kmap_op) | 940 | struct gnttab_map_grant_ref *kmap_op, |
941 | unsigned long mfn) | ||
937 | { | 942 | { |
938 | unsigned long flags; | 943 | unsigned long flags; |
939 | unsigned long mfn; | ||
940 | unsigned long pfn; | 944 | unsigned long pfn; |
941 | unsigned long uninitialized_var(address); | 945 | unsigned long uninitialized_var(address); |
942 | unsigned level; | 946 | unsigned level; |
943 | pte_t *ptep = NULL; | 947 | pte_t *ptep = NULL; |
944 | 948 | ||
945 | pfn = page_to_pfn(page); | 949 | pfn = page_to_pfn(page); |
946 | mfn = get_phys_to_machine(pfn); | ||
947 | if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) | ||
948 | return -EINVAL; | ||
949 | 950 | ||
950 | if (!PageHighMem(page)) { | 951 | if (!PageHighMem(page)) { |
951 | address = (unsigned long)__va(pfn << PAGE_SHIFT); | 952 | address = (unsigned long)__va(pfn << PAGE_SHIFT); |
@@ -959,10 +960,7 @@ int m2p_remove_override(struct page *page, | |||
959 | spin_lock_irqsave(&m2p_override_lock, flags); | 960 | spin_lock_irqsave(&m2p_override_lock, flags); |
960 | list_del(&page->lru); | 961 | list_del(&page->lru); |
961 | spin_unlock_irqrestore(&m2p_override_lock, flags); | 962 | spin_unlock_irqrestore(&m2p_override_lock, flags); |
962 | WARN_ON(!PagePrivate(page)); | ||
963 | ClearPagePrivate(page); | ||
964 | 963 | ||
965 | set_phys_to_machine(pfn, page->index); | ||
966 | if (kmap_op != NULL) { | 964 | if (kmap_op != NULL) { |
967 | if (!PageHighMem(page)) { | 965 | if (!PageHighMem(page)) { |
968 | struct multicall_space mcs; | 966 | struct multicall_space mcs; |
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 0a7852483ffe..a8261716d58d 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c | |||
@@ -30,10 +30,9 @@ | |||
30 | #define XEN_PLATFORM_ERR_PROTOCOL -2 | 30 | #define XEN_PLATFORM_ERR_PROTOCOL -2 |
31 | #define XEN_PLATFORM_ERR_BLACKLIST -3 | 31 | #define XEN_PLATFORM_ERR_BLACKLIST -3 |
32 | 32 | ||
33 | /* store the value of xen_emul_unplug after the unplug is done */ | ||
34 | int xen_platform_pci_unplug; | ||
35 | EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); | ||
36 | #ifdef CONFIG_XEN_PVHVM | 33 | #ifdef CONFIG_XEN_PVHVM |
34 | /* store the value of xen_emul_unplug after the unplug is done */ | ||
35 | static int xen_platform_pci_unplug; | ||
37 | static int xen_emul_unplug; | 36 | static int xen_emul_unplug; |
38 | 37 | ||
39 | static int check_platform_magic(void) | 38 | static int check_platform_magic(void) |
@@ -69,6 +68,80 @@ static int check_platform_magic(void) | |||
69 | return 0; | 68 | return 0; |
70 | } | 69 | } |
71 | 70 | ||
71 | bool xen_has_pv_devices() | ||
72 | { | ||
73 | if (!xen_domain()) | ||
74 | return false; | ||
75 | |||
76 | /* PV domains always have them. */ | ||
77 | if (xen_pv_domain()) | ||
78 | return true; | ||
79 | |||
80 | /* And user has xen_platform_pci=0 set in guest config as | ||
81 | * driver did not modify the value. */ | ||
82 | if (xen_platform_pci_unplug == 0) | ||
83 | return false; | ||
84 | |||
85 | if (xen_platform_pci_unplug & XEN_UNPLUG_NEVER) | ||
86 | return false; | ||
87 | |||
88 | if (xen_platform_pci_unplug & XEN_UNPLUG_ALL) | ||
89 | return true; | ||
90 | |||
91 | /* This is an odd one - we are going to run legacy | ||
92 | * and PV drivers at the same time. */ | ||
93 | if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) | ||
94 | return true; | ||
95 | |||
96 | /* And the caller has to follow with xen_pv_{disk,nic}_devices | ||
97 | * to be certain which driver can load. */ | ||
98 | return false; | ||
99 | } | ||
100 | EXPORT_SYMBOL_GPL(xen_has_pv_devices); | ||
101 | |||
102 | static bool __xen_has_pv_device(int state) | ||
103 | { | ||
104 | /* HVM domains might or might not */ | ||
105 | if (xen_hvm_domain() && (xen_platform_pci_unplug & state)) | ||
106 | return true; | ||
107 | |||
108 | return xen_has_pv_devices(); | ||
109 | } | ||
110 | |||
111 | bool xen_has_pv_nic_devices(void) | ||
112 | { | ||
113 | return __xen_has_pv_device(XEN_UNPLUG_ALL_NICS | XEN_UNPLUG_ALL); | ||
114 | } | ||
115 | EXPORT_SYMBOL_GPL(xen_has_pv_nic_devices); | ||
116 | |||
117 | bool xen_has_pv_disk_devices(void) | ||
118 | { | ||
119 | return __xen_has_pv_device(XEN_UNPLUG_ALL_IDE_DISKS | | ||
120 | XEN_UNPLUG_AUX_IDE_DISKS | XEN_UNPLUG_ALL); | ||
121 | } | ||
122 | EXPORT_SYMBOL_GPL(xen_has_pv_disk_devices); | ||
123 | |||
124 | /* | ||
125 | * This one is odd - it determines whether you want to run PV _and_ | ||
126 | * legacy (IDE) drivers together. This combination is only possible | ||
127 | * under HVM. | ||
128 | */ | ||
129 | bool xen_has_pv_and_legacy_disk_devices(void) | ||
130 | { | ||
131 | if (!xen_domain()) | ||
132 | return false; | ||
133 | |||
134 | /* N.B. This is only ever used in HVM mode */ | ||
135 | if (xen_pv_domain()) | ||
136 | return false; | ||
137 | |||
138 | if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) | ||
139 | return true; | ||
140 | |||
141 | return false; | ||
142 | } | ||
143 | EXPORT_SYMBOL_GPL(xen_has_pv_and_legacy_disk_devices); | ||
144 | |||
72 | void xen_unplug_emulated_devices(void) | 145 | void xen_unplug_emulated_devices(void) |
73 | { | 146 | { |
74 | int r; | 147 | int r; |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 68c054f59de6..0982233b9b84 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <xen/interface/memory.h> | 27 | #include <xen/interface/memory.h> |
28 | #include <xen/interface/physdev.h> | 28 | #include <xen/interface/physdev.h> |
29 | #include <xen/features.h> | 29 | #include <xen/features.h> |
30 | #include "mmu.h" | ||
30 | #include "xen-ops.h" | 31 | #include "xen-ops.h" |
31 | #include "vdso.h" | 32 | #include "vdso.h" |
32 | 33 | ||
@@ -34,7 +35,7 @@ | |||
34 | extern const char xen_hypervisor_callback[]; | 35 | extern const char xen_hypervisor_callback[]; |
35 | extern const char xen_failsafe_callback[]; | 36 | extern const char xen_failsafe_callback[]; |
36 | #ifdef CONFIG_X86_64 | 37 | #ifdef CONFIG_X86_64 |
37 | extern const char nmi[]; | 38 | extern asmlinkage void nmi(void); |
38 | #endif | 39 | #endif |
39 | extern void xen_sysenter_target(void); | 40 | extern void xen_sysenter_target(void); |
40 | extern void xen_syscall_target(void); | 41 | extern void xen_syscall_target(void); |
@@ -81,6 +82,9 @@ static void __init xen_add_extra_mem(u64 start, u64 size) | |||
81 | 82 | ||
82 | memblock_reserve(start, size); | 83 | memblock_reserve(start, size); |
83 | 84 | ||
85 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
86 | return; | ||
87 | |||
84 | xen_max_p2m_pfn = PFN_DOWN(start + size); | 88 | xen_max_p2m_pfn = PFN_DOWN(start + size); |
85 | for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { | 89 | for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { |
86 | unsigned long mfn = pfn_to_mfn(pfn); | 90 | unsigned long mfn = pfn_to_mfn(pfn); |
@@ -103,6 +107,7 @@ static unsigned long __init xen_do_chunk(unsigned long start, | |||
103 | .domid = DOMID_SELF | 107 | .domid = DOMID_SELF |
104 | }; | 108 | }; |
105 | unsigned long len = 0; | 109 | unsigned long len = 0; |
110 | int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap); | ||
106 | unsigned long pfn; | 111 | unsigned long pfn; |
107 | int ret; | 112 | int ret; |
108 | 113 | ||
@@ -116,7 +121,7 @@ static unsigned long __init xen_do_chunk(unsigned long start, | |||
116 | continue; | 121 | continue; |
117 | frame = mfn; | 122 | frame = mfn; |
118 | } else { | 123 | } else { |
119 | if (mfn != INVALID_P2M_ENTRY) | 124 | if (!xlated_phys && mfn != INVALID_P2M_ENTRY) |
120 | continue; | 125 | continue; |
121 | frame = pfn; | 126 | frame = pfn; |
122 | } | 127 | } |
@@ -154,6 +159,13 @@ static unsigned long __init xen_do_chunk(unsigned long start, | |||
154 | static unsigned long __init xen_release_chunk(unsigned long start, | 159 | static unsigned long __init xen_release_chunk(unsigned long start, |
155 | unsigned long end) | 160 | unsigned long end) |
156 | { | 161 | { |
162 | /* | ||
163 | * Xen already ballooned out the E820 non RAM regions for us | ||
164 | * and set them up properly in EPT. | ||
165 | */ | ||
166 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
167 | return end - start; | ||
168 | |||
157 | return xen_do_chunk(start, end, true); | 169 | return xen_do_chunk(start, end, true); |
158 | } | 170 | } |
159 | 171 | ||
@@ -222,7 +234,13 @@ static void __init xen_set_identity_and_release_chunk( | |||
222 | * (except for the ISA region which must be 1:1 mapped) to | 234 | * (except for the ISA region which must be 1:1 mapped) to |
223 | * release the refcounts (in Xen) on the original frames. | 235 | * release the refcounts (in Xen) on the original frames. |
224 | */ | 236 | */ |
225 | for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { | 237 | |
238 | /* | ||
239 | * PVH E820 matches the hypervisor's P2M which means we need to | ||
240 | * account for the proper values of *release and *identity. | ||
241 | */ | ||
242 | for (pfn = start_pfn; !xen_feature(XENFEAT_auto_translated_physmap) && | ||
243 | pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { | ||
226 | pte_t pte = __pte_ma(0); | 244 | pte_t pte = __pte_ma(0); |
227 | 245 | ||
228 | if (pfn < PFN_UP(ISA_END_ADDRESS)) | 246 | if (pfn < PFN_UP(ISA_END_ADDRESS)) |
@@ -559,20 +577,17 @@ void xen_enable_syscall(void) | |||
559 | void xen_enable_nmi(void) | 577 | void xen_enable_nmi(void) |
560 | { | 578 | { |
561 | #ifdef CONFIG_X86_64 | 579 | #ifdef CONFIG_X86_64 |
562 | if (register_callback(CALLBACKTYPE_nmi, nmi)) | 580 | if (register_callback(CALLBACKTYPE_nmi, (char *)nmi)) |
563 | BUG(); | 581 | BUG(); |
564 | #endif | 582 | #endif |
565 | } | 583 | } |
566 | void __init xen_arch_setup(void) | 584 | void __init xen_pvmmu_arch_setup(void) |
567 | { | 585 | { |
568 | xen_panic_handler_init(); | ||
569 | |||
570 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); | 586 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); |
571 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); | 587 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); |
572 | 588 | ||
573 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 589 | HYPERVISOR_vm_assist(VMASST_CMD_enable, |
574 | HYPERVISOR_vm_assist(VMASST_CMD_enable, | 590 | VMASST_TYPE_pae_extended_cr3); |
575 | VMASST_TYPE_pae_extended_cr3); | ||
576 | 591 | ||
577 | if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || | 592 | if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || |
578 | register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) | 593 | register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) |
@@ -581,6 +596,15 @@ void __init xen_arch_setup(void) | |||
581 | xen_enable_sysenter(); | 596 | xen_enable_sysenter(); |
582 | xen_enable_syscall(); | 597 | xen_enable_syscall(); |
583 | xen_enable_nmi(); | 598 | xen_enable_nmi(); |
599 | } | ||
600 | |||
601 | /* This function is not called for HVM domains */ | ||
602 | void __init xen_arch_setup(void) | ||
603 | { | ||
604 | xen_panic_handler_init(); | ||
605 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | ||
606 | xen_pvmmu_arch_setup(); | ||
607 | |||
584 | #ifdef CONFIG_ACPI | 608 | #ifdef CONFIG_ACPI |
585 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { | 609 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { |
586 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); | 610 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index c36b325abd83..a18eadd8bb40 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -73,9 +73,11 @@ static void cpu_bringup(void) | |||
73 | touch_softlockup_watchdog(); | 73 | touch_softlockup_watchdog(); |
74 | preempt_disable(); | 74 | preempt_disable(); |
75 | 75 | ||
76 | xen_enable_sysenter(); | 76 | /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */ |
77 | xen_enable_syscall(); | 77 | if (!xen_feature(XENFEAT_supervisor_mode_kernel)) { |
78 | 78 | xen_enable_sysenter(); | |
79 | xen_enable_syscall(); | ||
80 | } | ||
79 | cpu = smp_processor_id(); | 81 | cpu = smp_processor_id(); |
80 | smp_store_cpu_info(cpu); | 82 | smp_store_cpu_info(cpu); |
81 | cpu_data(cpu).x86_max_cores = 1; | 83 | cpu_data(cpu).x86_max_cores = 1; |
@@ -97,8 +99,14 @@ static void cpu_bringup(void) | |||
97 | wmb(); /* make sure everything is out */ | 99 | wmb(); /* make sure everything is out */ |
98 | } | 100 | } |
99 | 101 | ||
100 | static void cpu_bringup_and_idle(void) | 102 | /* Note: cpu parameter is only relevant for PVH */ |
103 | static void cpu_bringup_and_idle(int cpu) | ||
101 | { | 104 | { |
105 | #ifdef CONFIG_X86_64 | ||
106 | if (xen_feature(XENFEAT_auto_translated_physmap) && | ||
107 | xen_feature(XENFEAT_supervisor_mode_kernel)) | ||
108 | xen_pvh_secondary_vcpu_init(cpu); | ||
109 | #endif | ||
102 | cpu_bringup(); | 110 | cpu_bringup(); |
103 | cpu_startup_entry(CPUHP_ONLINE); | 111 | cpu_startup_entry(CPUHP_ONLINE); |
104 | } | 112 | } |
@@ -274,9 +282,10 @@ static void __init xen_smp_prepare_boot_cpu(void) | |||
274 | native_smp_prepare_boot_cpu(); | 282 | native_smp_prepare_boot_cpu(); |
275 | 283 | ||
276 | if (xen_pv_domain()) { | 284 | if (xen_pv_domain()) { |
277 | /* We've switched to the "real" per-cpu gdt, so make sure the | 285 | if (!xen_feature(XENFEAT_writable_page_tables)) |
278 | old memory can be recycled */ | 286 | /* We've switched to the "real" per-cpu gdt, so make |
279 | make_lowmem_page_readwrite(xen_initial_gdt); | 287 | * sure the old memory can be recycled. */ |
288 | make_lowmem_page_readwrite(xen_initial_gdt); | ||
280 | 289 | ||
281 | #ifdef CONFIG_X86_32 | 290 | #ifdef CONFIG_X86_32 |
282 | /* | 291 | /* |
@@ -360,22 +369,21 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
360 | 369 | ||
361 | gdt = get_cpu_gdt_table(cpu); | 370 | gdt = get_cpu_gdt_table(cpu); |
362 | 371 | ||
363 | ctxt->flags = VGCF_IN_KERNEL; | ||
364 | ctxt->user_regs.ss = __KERNEL_DS; | ||
365 | #ifdef CONFIG_X86_32 | 372 | #ifdef CONFIG_X86_32 |
373 | /* Note: PVH is not yet supported on x86_32. */ | ||
366 | ctxt->user_regs.fs = __KERNEL_PERCPU; | 374 | ctxt->user_regs.fs = __KERNEL_PERCPU; |
367 | ctxt->user_regs.gs = __KERNEL_STACK_CANARY; | 375 | ctxt->user_regs.gs = __KERNEL_STACK_CANARY; |
368 | #else | ||
369 | ctxt->gs_base_kernel = per_cpu_offset(cpu); | ||
370 | #endif | 376 | #endif |
371 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; | 377 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; |
372 | 378 | ||
373 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); | 379 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); |
374 | 380 | ||
375 | { | 381 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
382 | ctxt->flags = VGCF_IN_KERNEL; | ||
376 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ | 383 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ |
377 | ctxt->user_regs.ds = __USER_DS; | 384 | ctxt->user_regs.ds = __USER_DS; |
378 | ctxt->user_regs.es = __USER_DS; | 385 | ctxt->user_regs.es = __USER_DS; |
386 | ctxt->user_regs.ss = __KERNEL_DS; | ||
379 | 387 | ||
380 | xen_copy_trap_info(ctxt->trap_ctxt); | 388 | xen_copy_trap_info(ctxt->trap_ctxt); |
381 | 389 | ||
@@ -396,18 +404,27 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
396 | #ifdef CONFIG_X86_32 | 404 | #ifdef CONFIG_X86_32 |
397 | ctxt->event_callback_cs = __KERNEL_CS; | 405 | ctxt->event_callback_cs = __KERNEL_CS; |
398 | ctxt->failsafe_callback_cs = __KERNEL_CS; | 406 | ctxt->failsafe_callback_cs = __KERNEL_CS; |
407 | #else | ||
408 | ctxt->gs_base_kernel = per_cpu_offset(cpu); | ||
399 | #endif | 409 | #endif |
400 | ctxt->event_callback_eip = | 410 | ctxt->event_callback_eip = |
401 | (unsigned long)xen_hypervisor_callback; | 411 | (unsigned long)xen_hypervisor_callback; |
402 | ctxt->failsafe_callback_eip = | 412 | ctxt->failsafe_callback_eip = |
403 | (unsigned long)xen_failsafe_callback; | 413 | (unsigned long)xen_failsafe_callback; |
414 | ctxt->user_regs.cs = __KERNEL_CS; | ||
415 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | ||
416 | #ifdef CONFIG_X86_32 | ||
404 | } | 417 | } |
405 | ctxt->user_regs.cs = __KERNEL_CS; | 418 | #else |
419 | } else | ||
420 | /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with | ||
421 | * %rdi having the cpu number - which means are passing in | ||
422 | * as the first parameter the cpu. Subtle! | ||
423 | */ | ||
424 | ctxt->user_regs.rdi = cpu; | ||
425 | #endif | ||
406 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); | 426 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); |
407 | |||
408 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | ||
409 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); | 427 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); |
410 | |||
411 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) | 428 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) |
412 | BUG(); | 429 | BUG(); |
413 | 430 | ||
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 0e36cde12f7e..581521c843a5 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -106,7 +106,7 @@ static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); | |||
106 | static cpumask_t waiting_cpus; | 106 | static cpumask_t waiting_cpus; |
107 | 107 | ||
108 | static bool xen_pvspin = true; | 108 | static bool xen_pvspin = true; |
109 | static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | 109 | __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) |
110 | { | 110 | { |
111 | int irq = __this_cpu_read(lock_kicker_irq); | 111 | int irq = __this_cpu_read(lock_kicker_irq); |
112 | struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting); | 112 | struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting); |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 12a1ca707b94..7b78f88c1707 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -446,6 +446,7 @@ void xen_setup_timer(int cpu) | |||
446 | IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| | 446 | IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| |
447 | IRQF_FORCE_RESUME, | 447 | IRQF_FORCE_RESUME, |
448 | name, NULL); | 448 | name, NULL); |
449 | (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX); | ||
449 | 450 | ||
450 | memcpy(evt, xen_clockevent, sizeof(*evt)); | 451 | memcpy(evt, xen_clockevent, sizeof(*evt)); |
451 | 452 | ||
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 7faed5869e5b..485b69585540 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -11,8 +11,28 @@ | |||
11 | #include <asm/page_types.h> | 11 | #include <asm/page_types.h> |
12 | 12 | ||
13 | #include <xen/interface/elfnote.h> | 13 | #include <xen/interface/elfnote.h> |
14 | #include <xen/interface/features.h> | ||
14 | #include <asm/xen/interface.h> | 15 | #include <asm/xen/interface.h> |
15 | 16 | ||
17 | #ifdef CONFIG_XEN_PVH | ||
18 | #define PVH_FEATURES_STR "|writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel" | ||
19 | /* Note the lack of 'hvm_callback_vector'. Older hypervisor will | ||
20 | * balk at this being part of XEN_ELFNOTE_FEATURES, so we put it in | ||
21 | * XEN_ELFNOTE_SUPPORTED_FEATURES which older hypervisors will ignore. | ||
22 | */ | ||
23 | #define PVH_FEATURES ((1 << XENFEAT_writable_page_tables) | \ | ||
24 | (1 << XENFEAT_auto_translated_physmap) | \ | ||
25 | (1 << XENFEAT_supervisor_mode_kernel) | \ | ||
26 | (1 << XENFEAT_hvm_callback_vector)) | ||
27 | /* The XENFEAT_writable_page_tables is not stricly neccessary as we set that | ||
28 | * up regardless whether this CONFIG option is enabled or not, but it | ||
29 | * clarifies what the right flags need to be. | ||
30 | */ | ||
31 | #else | ||
32 | #define PVH_FEATURES_STR "" | ||
33 | #define PVH_FEATURES (0) | ||
34 | #endif | ||
35 | |||
16 | __INIT | 36 | __INIT |
17 | ENTRY(startup_xen) | 37 | ENTRY(startup_xen) |
18 | cld | 38 | cld |
@@ -95,7 +115,10 @@ NEXT_HYPERCALL(arch_6) | |||
95 | #endif | 115 | #endif |
96 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) | 116 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) |
97 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) | 117 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) |
98 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") | 118 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR) |
119 | ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long (PVH_FEATURES) | | ||
120 | (1 << XENFEAT_writable_page_tables) | | ||
121 | (1 << XENFEAT_dom0)) | ||
99 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") | 122 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") |
100 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") | 123 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") |
101 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, | 124 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 95f8c6142328..1cb6f4c37300 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -123,4 +123,5 @@ __visible void xen_adjust_exception_frame(void); | |||
123 | 123 | ||
124 | extern int xen_panic_handler_init(void); | 124 | extern int xen_panic_handler_init(void); |
125 | 125 | ||
126 | void xen_pvh_secondary_vcpu_init(int cpu); | ||
126 | #endif /* XEN_OPS_H */ | 127 | #endif /* XEN_OPS_H */ |