diff options
Diffstat (limited to 'arch/x86')
101 files changed, 3512 insertions, 1215 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 3e0baf726eef..137dfa96aa14 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -113,7 +113,6 @@ config DEBUG_RODATA_TEST | |||
113 | config DEBUG_WX | 113 | config DEBUG_WX |
114 | bool "Warn on W+X mappings at boot" | 114 | bool "Warn on W+X mappings at boot" |
115 | depends on DEBUG_RODATA | 115 | depends on DEBUG_RODATA |
116 | default y | ||
117 | select X86_PTDUMP_CORE | 116 | select X86_PTDUMP_CORE |
118 | ---help--- | 117 | ---help--- |
119 | Generate a warning if any W+X mappings are found at boot. | 118 | Generate a warning if any W+X mappings are found at boot. |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2dfaa72260b4..4086abca0b32 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -171,9 +171,11 @@ asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1) | |||
171 | asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1) | 171 | asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1) |
172 | avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) | 172 | avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) |
173 | avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) | 173 | avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) |
174 | sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1) | ||
175 | sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1) | ||
174 | 176 | ||
175 | KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) | 177 | KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(sha1_ni_instr) $(sha256_ni_instr) |
176 | KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) | 178 | KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(sha1_ni_instr) $(sha256_ni_instr) |
177 | 179 | ||
178 | LDFLAGS := -m elf_$(UTS_MACHINE) | 180 | LDFLAGS := -m elf_$(UTS_MACHINE) |
179 | 181 | ||
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 0d553e54171b..2ee62dba0373 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile | |||
@@ -9,13 +9,13 @@ | |||
9 | # Changed by many, many contributors over the years. | 9 | # Changed by many, many contributors over the years. |
10 | # | 10 | # |
11 | 11 | ||
12 | KASAN_SANITIZE := n | ||
13 | |||
12 | # If you want to preset the SVGA mode, uncomment the next line and | 14 | # If you want to preset the SVGA mode, uncomment the next line and |
13 | # set SVGA_MODE to whatever number you want. | 15 | # set SVGA_MODE to whatever number you want. |
14 | # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. | 16 | # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. |
15 | # The number is the same as you would ordinarily press at bootup. | 17 | # The number is the same as you would ordinarily press at bootup. |
16 | 18 | ||
17 | KASAN_SANITIZE := n | ||
18 | |||
19 | SVGA_MODE := -DSVGA_MODE=NORMAL_VGA | 19 | SVGA_MODE := -DSVGA_MODE=NORMAL_VGA |
20 | 20 | ||
21 | targets := vmlinux.bin setup.bin setup.elf bzImage | 21 | targets := vmlinux.bin setup.bin setup.elf bzImage |
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 0033e96c3f09..9011a88353de 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <stdarg.h> | 23 | #include <stdarg.h> |
24 | #include <linux/types.h> | 24 | #include <linux/types.h> |
25 | #include <linux/edd.h> | 25 | #include <linux/edd.h> |
26 | #include <asm/boot.h> | ||
27 | #include <asm/setup.h> | 26 | #include <asm/setup.h> |
28 | #include "bitops.h" | 27 | #include "bitops.h" |
29 | #include "ctype.h" | 28 | #include "ctype.h" |
diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c index aa8a96b052e3..95c7a818c0ed 100644 --- a/arch/x86/boot/video-mode.c +++ b/arch/x86/boot/video-mode.c | |||
@@ -19,6 +19,8 @@ | |||
19 | #include "video.h" | 19 | #include "video.h" |
20 | #include "vesa.h" | 20 | #include "vesa.h" |
21 | 21 | ||
22 | #include <uapi/asm/boot.h> | ||
23 | |||
22 | /* | 24 | /* |
23 | * Common variables | 25 | * Common variables |
24 | */ | 26 | */ |
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index 05111bb8d018..77780e386e9b 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c | |||
@@ -13,6 +13,8 @@ | |||
13 | * Select video mode | 13 | * Select video mode |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <uapi/asm/boot.h> | ||
17 | |||
16 | #include "boot.h" | 18 | #include "boot.h" |
17 | #include "video.h" | 19 | #include "video.h" |
18 | #include "vesa.h" | 20 | #include "vesa.h" |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 9a2838cf0591..b9b912a44d61 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -5,6 +5,8 @@ | |||
5 | avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) | 5 | avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) |
6 | avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ | 6 | avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ |
7 | $(comma)4)$(comma)%ymm2,yes,no) | 7 | $(comma)4)$(comma)%ymm2,yes,no) |
8 | sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no) | ||
9 | sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no) | ||
8 | 10 | ||
9 | obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o | 11 | obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o |
10 | 12 | ||
@@ -91,9 +93,15 @@ ifeq ($(avx2_supported),yes) | |||
91 | sha1-ssse3-y += sha1_avx2_x86_64_asm.o | 93 | sha1-ssse3-y += sha1_avx2_x86_64_asm.o |
92 | poly1305-x86_64-y += poly1305-avx2-x86_64.o | 94 | poly1305-x86_64-y += poly1305-avx2-x86_64.o |
93 | endif | 95 | endif |
96 | ifeq ($(sha1_ni_supported),yes) | ||
97 | sha1-ssse3-y += sha1_ni_asm.o | ||
98 | endif | ||
94 | crc32c-intel-y := crc32c-intel_glue.o | 99 | crc32c-intel-y := crc32c-intel_glue.o |
95 | crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o | 100 | crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o |
96 | crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o | 101 | crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o |
97 | sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o | 102 | sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o |
103 | ifeq ($(sha256_ni_supported),yes) | ||
104 | sha256-ssse3-y += sha256_ni_asm.o | ||
105 | endif | ||
98 | sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o | 106 | sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o |
99 | crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o | 107 | crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o |
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 225be06edc80..4fe27e074194 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S | |||
@@ -330,7 +330,7 @@ ENDPROC(crc_pcl) | |||
330 | ## PCLMULQDQ tables | 330 | ## PCLMULQDQ tables |
331 | ## Table is 128 entries x 2 words (8 bytes) each | 331 | ## Table is 128 entries x 2 words (8 bytes) each |
332 | ################################################################ | 332 | ################################################################ |
333 | .section .rotata, "a", %progbits | 333 | .section .rodata, "a", %progbits |
334 | .align 8 | 334 | .align 8 |
335 | K_table: | 335 | K_table: |
336 | .long 0x493c7d27, 0x00000001 | 336 | .long 0x493c7d27, 0x00000001 |
diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S new file mode 100644 index 000000000000..874a651b9e7d --- /dev/null +++ b/arch/x86/crypto/sha1_ni_asm.S | |||
@@ -0,0 +1,302 @@ | |||
1 | /* | ||
2 | * Intel SHA Extensions optimized implementation of a SHA-1 update function | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2015 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Sean Gulley <sean.m.gulley@intel.com> | ||
22 | * Tim Chen <tim.c.chen@linux.intel.com> | ||
23 | * | ||
24 | * BSD LICENSE | ||
25 | * | ||
26 | * Copyright(c) 2015 Intel Corporation. | ||
27 | * | ||
28 | * Redistribution and use in source and binary forms, with or without | ||
29 | * modification, are permitted provided that the following conditions | ||
30 | * are met: | ||
31 | * | ||
32 | * * Redistributions of source code must retain the above copyright | ||
33 | * notice, this list of conditions and the following disclaimer. | ||
34 | * * Redistributions in binary form must reproduce the above copyright | ||
35 | * notice, this list of conditions and the following disclaimer in | ||
36 | * the documentation and/or other materials provided with the | ||
37 | * distribution. | ||
38 | * * Neither the name of Intel Corporation nor the names of its | ||
39 | * contributors may be used to endorse or promote products derived | ||
40 | * from this software without specific prior written permission. | ||
41 | * | ||
42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
43 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
44 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
45 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
46 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
47 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
48 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
49 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
50 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
51 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
52 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
53 | * | ||
54 | */ | ||
55 | |||
56 | #include <linux/linkage.h> | ||
57 | |||
58 | #define DIGEST_PTR %rdi /* 1st arg */ | ||
59 | #define DATA_PTR %rsi /* 2nd arg */ | ||
60 | #define NUM_BLKS %rdx /* 3rd arg */ | ||
61 | |||
62 | #define RSPSAVE %rax | ||
63 | |||
64 | /* gcc conversion */ | ||
65 | #define FRAME_SIZE 32 /* space for 2x16 bytes */ | ||
66 | |||
67 | #define ABCD %xmm0 | ||
68 | #define E0 %xmm1 /* Need two E's b/c they ping pong */ | ||
69 | #define E1 %xmm2 | ||
70 | #define MSG0 %xmm3 | ||
71 | #define MSG1 %xmm4 | ||
72 | #define MSG2 %xmm5 | ||
73 | #define MSG3 %xmm6 | ||
74 | #define SHUF_MASK %xmm7 | ||
75 | |||
76 | |||
77 | /* | ||
78 | * Intel SHA Extensions optimized implementation of a SHA-1 update function | ||
79 | * | ||
80 | * The function takes a pointer to the current hash values, a pointer to the | ||
81 | * input data, and a number of 64 byte blocks to process. Once all blocks have | ||
82 | * been processed, the digest pointer is updated with the resulting hash value. | ||
83 | * The function only processes complete blocks, there is no functionality to | ||
84 | * store partial blocks. All message padding and hash value initialization must | ||
85 | * be done outside the update function. | ||
86 | * | ||
87 | * The indented lines in the loop are instructions related to rounds processing. | ||
88 | * The non-indented lines are instructions related to the message schedule. | ||
89 | * | ||
90 | * void sha1_ni_transform(uint32_t *digest, const void *data, | ||
91 | uint32_t numBlocks) | ||
92 | * digest : pointer to digest | ||
93 | * data: pointer to input data | ||
94 | * numBlocks: Number of blocks to process | ||
95 | */ | ||
96 | .text | ||
97 | .align 32 | ||
98 | ENTRY(sha1_ni_transform) | ||
99 | mov %rsp, RSPSAVE | ||
100 | sub $FRAME_SIZE, %rsp | ||
101 | and $~0xF, %rsp | ||
102 | |||
103 | shl $6, NUM_BLKS /* convert to bytes */ | ||
104 | jz .Ldone_hash | ||
105 | add DATA_PTR, NUM_BLKS /* pointer to end of data */ | ||
106 | |||
107 | /* load initial hash values */ | ||
108 | pinsrd $3, 1*16(DIGEST_PTR), E0 | ||
109 | movdqu 0*16(DIGEST_PTR), ABCD | ||
110 | pand UPPER_WORD_MASK(%rip), E0 | ||
111 | pshufd $0x1B, ABCD, ABCD | ||
112 | |||
113 | movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK | ||
114 | |||
115 | .Lloop0: | ||
116 | /* Save hash values for addition after rounds */ | ||
117 | movdqa E0, (0*16)(%rsp) | ||
118 | movdqa ABCD, (1*16)(%rsp) | ||
119 | |||
120 | /* Rounds 0-3 */ | ||
121 | movdqu 0*16(DATA_PTR), MSG0 | ||
122 | pshufb SHUF_MASK, MSG0 | ||
123 | paddd MSG0, E0 | ||
124 | movdqa ABCD, E1 | ||
125 | sha1rnds4 $0, E0, ABCD | ||
126 | |||
127 | /* Rounds 4-7 */ | ||
128 | movdqu 1*16(DATA_PTR), MSG1 | ||
129 | pshufb SHUF_MASK, MSG1 | ||
130 | sha1nexte MSG1, E1 | ||
131 | movdqa ABCD, E0 | ||
132 | sha1rnds4 $0, E1, ABCD | ||
133 | sha1msg1 MSG1, MSG0 | ||
134 | |||
135 | /* Rounds 8-11 */ | ||
136 | movdqu 2*16(DATA_PTR), MSG2 | ||
137 | pshufb SHUF_MASK, MSG2 | ||
138 | sha1nexte MSG2, E0 | ||
139 | movdqa ABCD, E1 | ||
140 | sha1rnds4 $0, E0, ABCD | ||
141 | sha1msg1 MSG2, MSG1 | ||
142 | pxor MSG2, MSG0 | ||
143 | |||
144 | /* Rounds 12-15 */ | ||
145 | movdqu 3*16(DATA_PTR), MSG3 | ||
146 | pshufb SHUF_MASK, MSG3 | ||
147 | sha1nexte MSG3, E1 | ||
148 | movdqa ABCD, E0 | ||
149 | sha1msg2 MSG3, MSG0 | ||
150 | sha1rnds4 $0, E1, ABCD | ||
151 | sha1msg1 MSG3, MSG2 | ||
152 | pxor MSG3, MSG1 | ||
153 | |||
154 | /* Rounds 16-19 */ | ||
155 | sha1nexte MSG0, E0 | ||
156 | movdqa ABCD, E1 | ||
157 | sha1msg2 MSG0, MSG1 | ||
158 | sha1rnds4 $0, E0, ABCD | ||
159 | sha1msg1 MSG0, MSG3 | ||
160 | pxor MSG0, MSG2 | ||
161 | |||
162 | /* Rounds 20-23 */ | ||
163 | sha1nexte MSG1, E1 | ||
164 | movdqa ABCD, E0 | ||
165 | sha1msg2 MSG1, MSG2 | ||
166 | sha1rnds4 $1, E1, ABCD | ||
167 | sha1msg1 MSG1, MSG0 | ||
168 | pxor MSG1, MSG3 | ||
169 | |||
170 | /* Rounds 24-27 */ | ||
171 | sha1nexte MSG2, E0 | ||
172 | movdqa ABCD, E1 | ||
173 | sha1msg2 MSG2, MSG3 | ||
174 | sha1rnds4 $1, E0, ABCD | ||
175 | sha1msg1 MSG2, MSG1 | ||
176 | pxor MSG2, MSG0 | ||
177 | |||
178 | /* Rounds 28-31 */ | ||
179 | sha1nexte MSG3, E1 | ||
180 | movdqa ABCD, E0 | ||
181 | sha1msg2 MSG3, MSG0 | ||
182 | sha1rnds4 $1, E1, ABCD | ||
183 | sha1msg1 MSG3, MSG2 | ||
184 | pxor MSG3, MSG1 | ||
185 | |||
186 | /* Rounds 32-35 */ | ||
187 | sha1nexte MSG0, E0 | ||
188 | movdqa ABCD, E1 | ||
189 | sha1msg2 MSG0, MSG1 | ||
190 | sha1rnds4 $1, E0, ABCD | ||
191 | sha1msg1 MSG0, MSG3 | ||
192 | pxor MSG0, MSG2 | ||
193 | |||
194 | /* Rounds 36-39 */ | ||
195 | sha1nexte MSG1, E1 | ||
196 | movdqa ABCD, E0 | ||
197 | sha1msg2 MSG1, MSG2 | ||
198 | sha1rnds4 $1, E1, ABCD | ||
199 | sha1msg1 MSG1, MSG0 | ||
200 | pxor MSG1, MSG3 | ||
201 | |||
202 | /* Rounds 40-43 */ | ||
203 | sha1nexte MSG2, E0 | ||
204 | movdqa ABCD, E1 | ||
205 | sha1msg2 MSG2, MSG3 | ||
206 | sha1rnds4 $2, E0, ABCD | ||
207 | sha1msg1 MSG2, MSG1 | ||
208 | pxor MSG2, MSG0 | ||
209 | |||
210 | /* Rounds 44-47 */ | ||
211 | sha1nexte MSG3, E1 | ||
212 | movdqa ABCD, E0 | ||
213 | sha1msg2 MSG3, MSG0 | ||
214 | sha1rnds4 $2, E1, ABCD | ||
215 | sha1msg1 MSG3, MSG2 | ||
216 | pxor MSG3, MSG1 | ||
217 | |||
218 | /* Rounds 48-51 */ | ||
219 | sha1nexte MSG0, E0 | ||
220 | movdqa ABCD, E1 | ||
221 | sha1msg2 MSG0, MSG1 | ||
222 | sha1rnds4 $2, E0, ABCD | ||
223 | sha1msg1 MSG0, MSG3 | ||
224 | pxor MSG0, MSG2 | ||
225 | |||
226 | /* Rounds 52-55 */ | ||
227 | sha1nexte MSG1, E1 | ||
228 | movdqa ABCD, E0 | ||
229 | sha1msg2 MSG1, MSG2 | ||
230 | sha1rnds4 $2, E1, ABCD | ||
231 | sha1msg1 MSG1, MSG0 | ||
232 | pxor MSG1, MSG3 | ||
233 | |||
234 | /* Rounds 56-59 */ | ||
235 | sha1nexte MSG2, E0 | ||
236 | movdqa ABCD, E1 | ||
237 | sha1msg2 MSG2, MSG3 | ||
238 | sha1rnds4 $2, E0, ABCD | ||
239 | sha1msg1 MSG2, MSG1 | ||
240 | pxor MSG2, MSG0 | ||
241 | |||
242 | /* Rounds 60-63 */ | ||
243 | sha1nexte MSG3, E1 | ||
244 | movdqa ABCD, E0 | ||
245 | sha1msg2 MSG3, MSG0 | ||
246 | sha1rnds4 $3, E1, ABCD | ||
247 | sha1msg1 MSG3, MSG2 | ||
248 | pxor MSG3, MSG1 | ||
249 | |||
250 | /* Rounds 64-67 */ | ||
251 | sha1nexte MSG0, E0 | ||
252 | movdqa ABCD, E1 | ||
253 | sha1msg2 MSG0, MSG1 | ||
254 | sha1rnds4 $3, E0, ABCD | ||
255 | sha1msg1 MSG0, MSG3 | ||
256 | pxor MSG0, MSG2 | ||
257 | |||
258 | /* Rounds 68-71 */ | ||
259 | sha1nexte MSG1, E1 | ||
260 | movdqa ABCD, E0 | ||
261 | sha1msg2 MSG1, MSG2 | ||
262 | sha1rnds4 $3, E1, ABCD | ||
263 | pxor MSG1, MSG3 | ||
264 | |||
265 | /* Rounds 72-75 */ | ||
266 | sha1nexte MSG2, E0 | ||
267 | movdqa ABCD, E1 | ||
268 | sha1msg2 MSG2, MSG3 | ||
269 | sha1rnds4 $3, E0, ABCD | ||
270 | |||
271 | /* Rounds 76-79 */ | ||
272 | sha1nexte MSG3, E1 | ||
273 | movdqa ABCD, E0 | ||
274 | sha1rnds4 $3, E1, ABCD | ||
275 | |||
276 | /* Add current hash values with previously saved */ | ||
277 | sha1nexte (0*16)(%rsp), E0 | ||
278 | paddd (1*16)(%rsp), ABCD | ||
279 | |||
280 | /* Increment data pointer and loop if more to process */ | ||
281 | add $64, DATA_PTR | ||
282 | cmp NUM_BLKS, DATA_PTR | ||
283 | jne .Lloop0 | ||
284 | |||
285 | /* Write hash values back in the correct order */ | ||
286 | pshufd $0x1B, ABCD, ABCD | ||
287 | movdqu ABCD, 0*16(DIGEST_PTR) | ||
288 | pextrd $3, E0, 1*16(DIGEST_PTR) | ||
289 | |||
290 | .Ldone_hash: | ||
291 | mov RSPSAVE, %rsp | ||
292 | |||
293 | ret | ||
294 | ENDPROC(sha1_ni_transform) | ||
295 | |||
296 | .data | ||
297 | |||
298 | .align 64 | ||
299 | PSHUFFLE_BYTE_FLIP_MASK: | ||
300 | .octa 0x000102030405060708090a0b0c0d0e0f | ||
301 | UPPER_WORD_MASK: | ||
302 | .octa 0xFFFFFFFF000000000000000000000000 | ||
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 00212c32d4db..dd14616b7739 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c | |||
@@ -31,24 +31,11 @@ | |||
31 | #include <crypto/sha1_base.h> | 31 | #include <crypto/sha1_base.h> |
32 | #include <asm/fpu/api.h> | 32 | #include <asm/fpu/api.h> |
33 | 33 | ||
34 | typedef void (sha1_transform_fn)(u32 *digest, const char *data, | ||
35 | unsigned int rounds); | ||
34 | 36 | ||
35 | asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, | 37 | static int sha1_update(struct shash_desc *desc, const u8 *data, |
36 | unsigned int rounds); | 38 | unsigned int len, sha1_transform_fn *sha1_xform) |
37 | #ifdef CONFIG_AS_AVX | ||
38 | asmlinkage void sha1_transform_avx(u32 *digest, const char *data, | ||
39 | unsigned int rounds); | ||
40 | #endif | ||
41 | #ifdef CONFIG_AS_AVX2 | ||
42 | #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ | ||
43 | |||
44 | asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, | ||
45 | unsigned int rounds); | ||
46 | #endif | ||
47 | |||
48 | static void (*sha1_transform_asm)(u32 *, const char *, unsigned int); | ||
49 | |||
50 | static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
51 | unsigned int len) | ||
52 | { | 39 | { |
53 | struct sha1_state *sctx = shash_desc_ctx(desc); | 40 | struct sha1_state *sctx = shash_desc_ctx(desc); |
54 | 41 | ||
@@ -61,14 +48,14 @@ static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, | |||
61 | 48 | ||
62 | kernel_fpu_begin(); | 49 | kernel_fpu_begin(); |
63 | sha1_base_do_update(desc, data, len, | 50 | sha1_base_do_update(desc, data, len, |
64 | (sha1_block_fn *)sha1_transform_asm); | 51 | (sha1_block_fn *)sha1_xform); |
65 | kernel_fpu_end(); | 52 | kernel_fpu_end(); |
66 | 53 | ||
67 | return 0; | 54 | return 0; |
68 | } | 55 | } |
69 | 56 | ||
70 | static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, | 57 | static int sha1_finup(struct shash_desc *desc, const u8 *data, |
71 | unsigned int len, u8 *out) | 58 | unsigned int len, u8 *out, sha1_transform_fn *sha1_xform) |
72 | { | 59 | { |
73 | if (!irq_fpu_usable()) | 60 | if (!irq_fpu_usable()) |
74 | return crypto_sha1_finup(desc, data, len, out); | 61 | return crypto_sha1_finup(desc, data, len, out); |
@@ -76,32 +63,37 @@ static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, | |||
76 | kernel_fpu_begin(); | 63 | kernel_fpu_begin(); |
77 | if (len) | 64 | if (len) |
78 | sha1_base_do_update(desc, data, len, | 65 | sha1_base_do_update(desc, data, len, |
79 | (sha1_block_fn *)sha1_transform_asm); | 66 | (sha1_block_fn *)sha1_xform); |
80 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_asm); | 67 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_xform); |
81 | kernel_fpu_end(); | 68 | kernel_fpu_end(); |
82 | 69 | ||
83 | return sha1_base_finish(desc, out); | 70 | return sha1_base_finish(desc, out); |
84 | } | 71 | } |
85 | 72 | ||
86 | /* Add padding and return the message digest. */ | 73 | asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, |
87 | static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) | 74 | unsigned int rounds); |
75 | |||
76 | static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
77 | unsigned int len) | ||
88 | { | 78 | { |
89 | return sha1_ssse3_finup(desc, NULL, 0, out); | 79 | return sha1_update(desc, data, len, |
80 | (sha1_transform_fn *) sha1_transform_ssse3); | ||
90 | } | 81 | } |
91 | 82 | ||
92 | #ifdef CONFIG_AS_AVX2 | 83 | static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, |
93 | static void sha1_apply_transform_avx2(u32 *digest, const char *data, | 84 | unsigned int len, u8 *out) |
94 | unsigned int rounds) | ||
95 | { | 85 | { |
96 | /* Select the optimal transform based on data block size */ | 86 | return sha1_finup(desc, data, len, out, |
97 | if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE) | 87 | (sha1_transform_fn *) sha1_transform_ssse3); |
98 | sha1_transform_avx2(digest, data, rounds); | 88 | } |
99 | else | 89 | |
100 | sha1_transform_avx(digest, data, rounds); | 90 | /* Add padding and return the message digest. */ |
91 | static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) | ||
92 | { | ||
93 | return sha1_ssse3_finup(desc, NULL, 0, out); | ||
101 | } | 94 | } |
102 | #endif | ||
103 | 95 | ||
104 | static struct shash_alg alg = { | 96 | static struct shash_alg sha1_ssse3_alg = { |
105 | .digestsize = SHA1_DIGEST_SIZE, | 97 | .digestsize = SHA1_DIGEST_SIZE, |
106 | .init = sha1_base_init, | 98 | .init = sha1_base_init, |
107 | .update = sha1_ssse3_update, | 99 | .update = sha1_ssse3_update, |
@@ -110,7 +102,7 @@ static struct shash_alg alg = { | |||
110 | .descsize = sizeof(struct sha1_state), | 102 | .descsize = sizeof(struct sha1_state), |
111 | .base = { | 103 | .base = { |
112 | .cra_name = "sha1", | 104 | .cra_name = "sha1", |
113 | .cra_driver_name= "sha1-ssse3", | 105 | .cra_driver_name = "sha1-ssse3", |
114 | .cra_priority = 150, | 106 | .cra_priority = 150, |
115 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | 107 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, |
116 | .cra_blocksize = SHA1_BLOCK_SIZE, | 108 | .cra_blocksize = SHA1_BLOCK_SIZE, |
@@ -118,8 +110,60 @@ static struct shash_alg alg = { | |||
118 | } | 110 | } |
119 | }; | 111 | }; |
120 | 112 | ||
113 | static int register_sha1_ssse3(void) | ||
114 | { | ||
115 | if (boot_cpu_has(X86_FEATURE_SSSE3)) | ||
116 | return crypto_register_shash(&sha1_ssse3_alg); | ||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | static void unregister_sha1_ssse3(void) | ||
121 | { | ||
122 | if (boot_cpu_has(X86_FEATURE_SSSE3)) | ||
123 | crypto_unregister_shash(&sha1_ssse3_alg); | ||
124 | } | ||
125 | |||
121 | #ifdef CONFIG_AS_AVX | 126 | #ifdef CONFIG_AS_AVX |
122 | static bool __init avx_usable(void) | 127 | asmlinkage void sha1_transform_avx(u32 *digest, const char *data, |
128 | unsigned int rounds); | ||
129 | |||
130 | static int sha1_avx_update(struct shash_desc *desc, const u8 *data, | ||
131 | unsigned int len) | ||
132 | { | ||
133 | return sha1_update(desc, data, len, | ||
134 | (sha1_transform_fn *) sha1_transform_avx); | ||
135 | } | ||
136 | |||
137 | static int sha1_avx_finup(struct shash_desc *desc, const u8 *data, | ||
138 | unsigned int len, u8 *out) | ||
139 | { | ||
140 | return sha1_finup(desc, data, len, out, | ||
141 | (sha1_transform_fn *) sha1_transform_avx); | ||
142 | } | ||
143 | |||
144 | static int sha1_avx_final(struct shash_desc *desc, u8 *out) | ||
145 | { | ||
146 | return sha1_avx_finup(desc, NULL, 0, out); | ||
147 | } | ||
148 | |||
149 | static struct shash_alg sha1_avx_alg = { | ||
150 | .digestsize = SHA1_DIGEST_SIZE, | ||
151 | .init = sha1_base_init, | ||
152 | .update = sha1_avx_update, | ||
153 | .final = sha1_avx_final, | ||
154 | .finup = sha1_avx_finup, | ||
155 | .descsize = sizeof(struct sha1_state), | ||
156 | .base = { | ||
157 | .cra_name = "sha1", | ||
158 | .cra_driver_name = "sha1-avx", | ||
159 | .cra_priority = 160, | ||
160 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
161 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
162 | .cra_module = THIS_MODULE, | ||
163 | } | ||
164 | }; | ||
165 | |||
166 | static bool avx_usable(void) | ||
123 | { | 167 | { |
124 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { | 168 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { |
125 | if (cpu_has_avx) | 169 | if (cpu_has_avx) |
@@ -130,55 +174,197 @@ static bool __init avx_usable(void) | |||
130 | return true; | 174 | return true; |
131 | } | 175 | } |
132 | 176 | ||
133 | #ifdef CONFIG_AS_AVX2 | 177 | static int register_sha1_avx(void) |
134 | static bool __init avx2_usable(void) | 178 | { |
179 | if (avx_usable()) | ||
180 | return crypto_register_shash(&sha1_avx_alg); | ||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | static void unregister_sha1_avx(void) | ||
135 | { | 185 | { |
136 | if (avx_usable() && cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI1) && | 186 | if (avx_usable()) |
137 | boot_cpu_has(X86_FEATURE_BMI2)) | 187 | crypto_unregister_shash(&sha1_avx_alg); |
188 | } | ||
189 | |||
190 | #else /* CONFIG_AS_AVX */ | ||
191 | static inline int register_sha1_avx(void) { return 0; } | ||
192 | static inline void unregister_sha1_avx(void) { } | ||
193 | #endif /* CONFIG_AS_AVX */ | ||
194 | |||
195 | |||
196 | #if defined(CONFIG_AS_AVX2) && (CONFIG_AS_AVX) | ||
197 | #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ | ||
198 | |||
199 | asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, | ||
200 | unsigned int rounds); | ||
201 | |||
202 | static bool avx2_usable(void) | ||
203 | { | ||
204 | if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) | ||
205 | && boot_cpu_has(X86_FEATURE_BMI1) | ||
206 | && boot_cpu_has(X86_FEATURE_BMI2)) | ||
138 | return true; | 207 | return true; |
139 | 208 | ||
140 | return false; | 209 | return false; |
141 | } | 210 | } |
211 | |||
212 | static void sha1_apply_transform_avx2(u32 *digest, const char *data, | ||
213 | unsigned int rounds) | ||
214 | { | ||
215 | /* Select the optimal transform based on data block size */ | ||
216 | if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE) | ||
217 | sha1_transform_avx2(digest, data, rounds); | ||
218 | else | ||
219 | sha1_transform_avx(digest, data, rounds); | ||
220 | } | ||
221 | |||
222 | static int sha1_avx2_update(struct shash_desc *desc, const u8 *data, | ||
223 | unsigned int len) | ||
224 | { | ||
225 | return sha1_update(desc, data, len, | ||
226 | (sha1_transform_fn *) sha1_apply_transform_avx2); | ||
227 | } | ||
228 | |||
229 | static int sha1_avx2_finup(struct shash_desc *desc, const u8 *data, | ||
230 | unsigned int len, u8 *out) | ||
231 | { | ||
232 | return sha1_finup(desc, data, len, out, | ||
233 | (sha1_transform_fn *) sha1_apply_transform_avx2); | ||
234 | } | ||
235 | |||
236 | static int sha1_avx2_final(struct shash_desc *desc, u8 *out) | ||
237 | { | ||
238 | return sha1_avx2_finup(desc, NULL, 0, out); | ||
239 | } | ||
240 | |||
241 | static struct shash_alg sha1_avx2_alg = { | ||
242 | .digestsize = SHA1_DIGEST_SIZE, | ||
243 | .init = sha1_base_init, | ||
244 | .update = sha1_avx2_update, | ||
245 | .final = sha1_avx2_final, | ||
246 | .finup = sha1_avx2_finup, | ||
247 | .descsize = sizeof(struct sha1_state), | ||
248 | .base = { | ||
249 | .cra_name = "sha1", | ||
250 | .cra_driver_name = "sha1-avx2", | ||
251 | .cra_priority = 170, | ||
252 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
253 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
254 | .cra_module = THIS_MODULE, | ||
255 | } | ||
256 | }; | ||
257 | |||
258 | static int register_sha1_avx2(void) | ||
259 | { | ||
260 | if (avx2_usable()) | ||
261 | return crypto_register_shash(&sha1_avx2_alg); | ||
262 | return 0; | ||
263 | } | ||
264 | |||
265 | static void unregister_sha1_avx2(void) | ||
266 | { | ||
267 | if (avx2_usable()) | ||
268 | crypto_unregister_shash(&sha1_avx2_alg); | ||
269 | } | ||
270 | |||
271 | #else | ||
272 | static inline int register_sha1_avx2(void) { return 0; } | ||
273 | static inline void unregister_sha1_avx2(void) { } | ||
142 | #endif | 274 | #endif |
275 | |||
276 | #ifdef CONFIG_AS_SHA1_NI | ||
277 | asmlinkage void sha1_ni_transform(u32 *digest, const char *data, | ||
278 | unsigned int rounds); | ||
279 | |||
280 | static int sha1_ni_update(struct shash_desc *desc, const u8 *data, | ||
281 | unsigned int len) | ||
282 | { | ||
283 | return sha1_update(desc, data, len, | ||
284 | (sha1_transform_fn *) sha1_ni_transform); | ||
285 | } | ||
286 | |||
287 | static int sha1_ni_finup(struct shash_desc *desc, const u8 *data, | ||
288 | unsigned int len, u8 *out) | ||
289 | { | ||
290 | return sha1_finup(desc, data, len, out, | ||
291 | (sha1_transform_fn *) sha1_ni_transform); | ||
292 | } | ||
293 | |||
294 | static int sha1_ni_final(struct shash_desc *desc, u8 *out) | ||
295 | { | ||
296 | return sha1_ni_finup(desc, NULL, 0, out); | ||
297 | } | ||
298 | |||
299 | static struct shash_alg sha1_ni_alg = { | ||
300 | .digestsize = SHA1_DIGEST_SIZE, | ||
301 | .init = sha1_base_init, | ||
302 | .update = sha1_ni_update, | ||
303 | .final = sha1_ni_final, | ||
304 | .finup = sha1_ni_finup, | ||
305 | .descsize = sizeof(struct sha1_state), | ||
306 | .base = { | ||
307 | .cra_name = "sha1", | ||
308 | .cra_driver_name = "sha1-ni", | ||
309 | .cra_priority = 250, | ||
310 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
311 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
312 | .cra_module = THIS_MODULE, | ||
313 | } | ||
314 | }; | ||
315 | |||
316 | static int register_sha1_ni(void) | ||
317 | { | ||
318 | if (boot_cpu_has(X86_FEATURE_SHA_NI)) | ||
319 | return crypto_register_shash(&sha1_ni_alg); | ||
320 | return 0; | ||
321 | } | ||
322 | |||
323 | static void unregister_sha1_ni(void) | ||
324 | { | ||
325 | if (boot_cpu_has(X86_FEATURE_SHA_NI)) | ||
326 | crypto_unregister_shash(&sha1_ni_alg); | ||
327 | } | ||
328 | |||
329 | #else | ||
330 | static inline int register_sha1_ni(void) { return 0; } | ||
331 | static inline void unregister_sha1_ni(void) { } | ||
143 | #endif | 332 | #endif |
144 | 333 | ||
145 | static int __init sha1_ssse3_mod_init(void) | 334 | static int __init sha1_ssse3_mod_init(void) |
146 | { | 335 | { |
147 | char *algo_name; | 336 | if (register_sha1_ssse3()) |
337 | goto fail; | ||
148 | 338 | ||
149 | /* test for SSSE3 first */ | 339 | if (register_sha1_avx()) { |
150 | if (cpu_has_ssse3) { | 340 | unregister_sha1_ssse3(); |
151 | sha1_transform_asm = sha1_transform_ssse3; | 341 | goto fail; |
152 | algo_name = "SSSE3"; | ||
153 | } | 342 | } |
154 | 343 | ||
155 | #ifdef CONFIG_AS_AVX | 344 | if (register_sha1_avx2()) { |
156 | /* allow AVX to override SSSE3, it's a little faster */ | 345 | unregister_sha1_avx(); |
157 | if (avx_usable()) { | 346 | unregister_sha1_ssse3(); |
158 | sha1_transform_asm = sha1_transform_avx; | 347 | goto fail; |
159 | algo_name = "AVX"; | ||
160 | #ifdef CONFIG_AS_AVX2 | ||
161 | /* allow AVX2 to override AVX, it's a little faster */ | ||
162 | if (avx2_usable()) { | ||
163 | sha1_transform_asm = sha1_apply_transform_avx2; | ||
164 | algo_name = "AVX2"; | ||
165 | } | ||
166 | #endif | ||
167 | } | 348 | } |
168 | #endif | ||
169 | 349 | ||
170 | if (sha1_transform_asm) { | 350 | if (register_sha1_ni()) { |
171 | pr_info("Using %s optimized SHA-1 implementation\n", algo_name); | 351 | unregister_sha1_avx2(); |
172 | return crypto_register_shash(&alg); | 352 | unregister_sha1_avx(); |
353 | unregister_sha1_ssse3(); | ||
354 | goto fail; | ||
173 | } | 355 | } |
174 | pr_info("Neither AVX nor AVX2 nor SSSE3 is available/usable.\n"); | ||
175 | 356 | ||
357 | return 0; | ||
358 | fail: | ||
176 | return -ENODEV; | 359 | return -ENODEV; |
177 | } | 360 | } |
178 | 361 | ||
179 | static void __exit sha1_ssse3_mod_fini(void) | 362 | static void __exit sha1_ssse3_mod_fini(void) |
180 | { | 363 | { |
181 | crypto_unregister_shash(&alg); | 364 | unregister_sha1_ni(); |
365 | unregister_sha1_avx2(); | ||
366 | unregister_sha1_avx(); | ||
367 | unregister_sha1_ssse3(); | ||
182 | } | 368 | } |
183 | 369 | ||
184 | module_init(sha1_ssse3_mod_init); | 370 | module_init(sha1_ssse3_mod_init); |
diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S new file mode 100644 index 000000000000..748cdf21a938 --- /dev/null +++ b/arch/x86/crypto/sha256_ni_asm.S | |||
@@ -0,0 +1,353 @@ | |||
1 | /* | ||
2 | * Intel SHA Extensions optimized implementation of a SHA-256 update function | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2015 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Sean Gulley <sean.m.gulley@intel.com> | ||
22 | * Tim Chen <tim.c.chen@linux.intel.com> | ||
23 | * | ||
24 | * BSD LICENSE | ||
25 | * | ||
26 | * Copyright(c) 2015 Intel Corporation. | ||
27 | * | ||
28 | * Redistribution and use in source and binary forms, with or without | ||
29 | * modification, are permitted provided that the following conditions | ||
30 | * are met: | ||
31 | * | ||
32 | * * Redistributions of source code must retain the above copyright | ||
33 | * notice, this list of conditions and the following disclaimer. | ||
34 | * * Redistributions in binary form must reproduce the above copyright | ||
35 | * notice, this list of conditions and the following disclaimer in | ||
36 | * the documentation and/or other materials provided with the | ||
37 | * distribution. | ||
38 | * * Neither the name of Intel Corporation nor the names of its | ||
39 | * contributors may be used to endorse or promote products derived | ||
40 | * from this software without specific prior written permission. | ||
41 | * | ||
42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
43 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
44 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
45 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
46 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
47 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
48 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
49 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
50 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
51 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
52 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
53 | * | ||
54 | */ | ||
55 | |||
56 | #include <linux/linkage.h> | ||
57 | |||
58 | #define DIGEST_PTR %rdi /* 1st arg */ | ||
59 | #define DATA_PTR %rsi /* 2nd arg */ | ||
60 | #define NUM_BLKS %rdx /* 3rd arg */ | ||
61 | |||
62 | #define SHA256CONSTANTS %rax | ||
63 | |||
64 | #define MSG %xmm0 | ||
65 | #define STATE0 %xmm1 | ||
66 | #define STATE1 %xmm2 | ||
67 | #define MSGTMP0 %xmm3 | ||
68 | #define MSGTMP1 %xmm4 | ||
69 | #define MSGTMP2 %xmm5 | ||
70 | #define MSGTMP3 %xmm6 | ||
71 | #define MSGTMP4 %xmm7 | ||
72 | |||
73 | #define SHUF_MASK %xmm8 | ||
74 | |||
75 | #define ABEF_SAVE %xmm9 | ||
76 | #define CDGH_SAVE %xmm10 | ||
77 | |||
78 | /* | ||
79 | * Intel SHA Extensions optimized implementation of a SHA-256 update function | ||
80 | * | ||
81 | * The function takes a pointer to the current hash values, a pointer to the | ||
82 | * input data, and a number of 64 byte blocks to process. Once all blocks have | ||
83 | * been processed, the digest pointer is updated with the resulting hash value. | ||
84 | * The function only processes complete blocks, there is no functionality to | ||
85 | * store partial blocks. All message padding and hash value initialization must | ||
86 | * be done outside the update function. | ||
87 | * | ||
88 | * The indented lines in the loop are instructions related to rounds processing. | ||
89 | * The non-indented lines are instructions related to the message schedule. | ||
90 | * | ||
91 | * void sha256_ni_transform(uint32_t *digest, const void *data, | ||
92 | uint32_t numBlocks); | ||
93 | * digest : pointer to digest | ||
94 | * data: pointer to input data | ||
95 | * numBlocks: Number of blocks to process | ||
96 | */ | ||
97 | |||
98 | .text | ||
99 | .align 32 | ||
100 | ENTRY(sha256_ni_transform) | ||
101 | |||
102 | shl $6, NUM_BLKS /* convert to bytes */ | ||
103 | jz .Ldone_hash | ||
104 | add DATA_PTR, NUM_BLKS /* pointer to end of data */ | ||
105 | |||
106 | /* | ||
107 | * load initial hash values | ||
108 | * Need to reorder these appropriately | ||
109 | * DCBA, HGFE -> ABEF, CDGH | ||
110 | */ | ||
111 | movdqu 0*16(DIGEST_PTR), STATE0 | ||
112 | movdqu 1*16(DIGEST_PTR), STATE1 | ||
113 | |||
114 | pshufd $0xB1, STATE0, STATE0 /* CDAB */ | ||
115 | pshufd $0x1B, STATE1, STATE1 /* EFGH */ | ||
116 | movdqa STATE0, MSGTMP4 | ||
117 | palignr $8, STATE1, STATE0 /* ABEF */ | ||
118 | pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ | ||
119 | |||
120 | movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK | ||
121 | lea K256(%rip), SHA256CONSTANTS | ||
122 | |||
123 | .Lloop0: | ||
124 | /* Save hash values for addition after rounds */ | ||
125 | movdqa STATE0, ABEF_SAVE | ||
126 | movdqa STATE1, CDGH_SAVE | ||
127 | |||
128 | /* Rounds 0-3 */ | ||
129 | movdqu 0*16(DATA_PTR), MSG | ||
130 | pshufb SHUF_MASK, MSG | ||
131 | movdqa MSG, MSGTMP0 | ||
132 | paddd 0*16(SHA256CONSTANTS), MSG | ||
133 | sha256rnds2 STATE0, STATE1 | ||
134 | pshufd $0x0E, MSG, MSG | ||
135 | sha256rnds2 STATE1, STATE0 | ||
136 | |||
137 | /* Rounds 4-7 */ | ||
138 | movdqu 1*16(DATA_PTR), MSG | ||
139 | pshufb SHUF_MASK, MSG | ||
140 | movdqa MSG, MSGTMP1 | ||
141 | paddd 1*16(SHA256CONSTANTS), MSG | ||
142 | sha256rnds2 STATE0, STATE1 | ||
143 | pshufd $0x0E, MSG, MSG | ||
144 | sha256rnds2 STATE1, STATE0 | ||
145 | sha256msg1 MSGTMP1, MSGTMP0 | ||
146 | |||
147 | /* Rounds 8-11 */ | ||
148 | movdqu 2*16(DATA_PTR), MSG | ||
149 | pshufb SHUF_MASK, MSG | ||
150 | movdqa MSG, MSGTMP2 | ||
151 | paddd 2*16(SHA256CONSTANTS), MSG | ||
152 | sha256rnds2 STATE0, STATE1 | ||
153 | pshufd $0x0E, MSG, MSG | ||
154 | sha256rnds2 STATE1, STATE0 | ||
155 | sha256msg1 MSGTMP2, MSGTMP1 | ||
156 | |||
157 | /* Rounds 12-15 */ | ||
158 | movdqu 3*16(DATA_PTR), MSG | ||
159 | pshufb SHUF_MASK, MSG | ||
160 | movdqa MSG, MSGTMP3 | ||
161 | paddd 3*16(SHA256CONSTANTS), MSG | ||
162 | sha256rnds2 STATE0, STATE1 | ||
163 | movdqa MSGTMP3, MSGTMP4 | ||
164 | palignr $4, MSGTMP2, MSGTMP4 | ||
165 | paddd MSGTMP4, MSGTMP0 | ||
166 | sha256msg2 MSGTMP3, MSGTMP0 | ||
167 | pshufd $0x0E, MSG, MSG | ||
168 | sha256rnds2 STATE1, STATE0 | ||
169 | sha256msg1 MSGTMP3, MSGTMP2 | ||
170 | |||
171 | /* Rounds 16-19 */ | ||
172 | movdqa MSGTMP0, MSG | ||
173 | paddd 4*16(SHA256CONSTANTS), MSG | ||
174 | sha256rnds2 STATE0, STATE1 | ||
175 | movdqa MSGTMP0, MSGTMP4 | ||
176 | palignr $4, MSGTMP3, MSGTMP4 | ||
177 | paddd MSGTMP4, MSGTMP1 | ||
178 | sha256msg2 MSGTMP0, MSGTMP1 | ||
179 | pshufd $0x0E, MSG, MSG | ||
180 | sha256rnds2 STATE1, STATE0 | ||
181 | sha256msg1 MSGTMP0, MSGTMP3 | ||
182 | |||
183 | /* Rounds 20-23 */ | ||
184 | movdqa MSGTMP1, MSG | ||
185 | paddd 5*16(SHA256CONSTANTS), MSG | ||
186 | sha256rnds2 STATE0, STATE1 | ||
187 | movdqa MSGTMP1, MSGTMP4 | ||
188 | palignr $4, MSGTMP0, MSGTMP4 | ||
189 | paddd MSGTMP4, MSGTMP2 | ||
190 | sha256msg2 MSGTMP1, MSGTMP2 | ||
191 | pshufd $0x0E, MSG, MSG | ||
192 | sha256rnds2 STATE1, STATE0 | ||
193 | sha256msg1 MSGTMP1, MSGTMP0 | ||
194 | |||
195 | /* Rounds 24-27 */ | ||
196 | movdqa MSGTMP2, MSG | ||
197 | paddd 6*16(SHA256CONSTANTS), MSG | ||
198 | sha256rnds2 STATE0, STATE1 | ||
199 | movdqa MSGTMP2, MSGTMP4 | ||
200 | palignr $4, MSGTMP1, MSGTMP4 | ||
201 | paddd MSGTMP4, MSGTMP3 | ||
202 | sha256msg2 MSGTMP2, MSGTMP3 | ||
203 | pshufd $0x0E, MSG, MSG | ||
204 | sha256rnds2 STATE1, STATE0 | ||
205 | sha256msg1 MSGTMP2, MSGTMP1 | ||
206 | |||
207 | /* Rounds 28-31 */ | ||
208 | movdqa MSGTMP3, MSG | ||
209 | paddd 7*16(SHA256CONSTANTS), MSG | ||
210 | sha256rnds2 STATE0, STATE1 | ||
211 | movdqa MSGTMP3, MSGTMP4 | ||
212 | palignr $4, MSGTMP2, MSGTMP4 | ||
213 | paddd MSGTMP4, MSGTMP0 | ||
214 | sha256msg2 MSGTMP3, MSGTMP0 | ||
215 | pshufd $0x0E, MSG, MSG | ||
216 | sha256rnds2 STATE1, STATE0 | ||
217 | sha256msg1 MSGTMP3, MSGTMP2 | ||
218 | |||
219 | /* Rounds 32-35 */ | ||
220 | movdqa MSGTMP0, MSG | ||
221 | paddd 8*16(SHA256CONSTANTS), MSG | ||
222 | sha256rnds2 STATE0, STATE1 | ||
223 | movdqa MSGTMP0, MSGTMP4 | ||
224 | palignr $4, MSGTMP3, MSGTMP4 | ||
225 | paddd MSGTMP4, MSGTMP1 | ||
226 | sha256msg2 MSGTMP0, MSGTMP1 | ||
227 | pshufd $0x0E, MSG, MSG | ||
228 | sha256rnds2 STATE1, STATE0 | ||
229 | sha256msg1 MSGTMP0, MSGTMP3 | ||
230 | |||
231 | /* Rounds 36-39 */ | ||
232 | movdqa MSGTMP1, MSG | ||
233 | paddd 9*16(SHA256CONSTANTS), MSG | ||
234 | sha256rnds2 STATE0, STATE1 | ||
235 | movdqa MSGTMP1, MSGTMP4 | ||
236 | palignr $4, MSGTMP0, MSGTMP4 | ||
237 | paddd MSGTMP4, MSGTMP2 | ||
238 | sha256msg2 MSGTMP1, MSGTMP2 | ||
239 | pshufd $0x0E, MSG, MSG | ||
240 | sha256rnds2 STATE1, STATE0 | ||
241 | sha256msg1 MSGTMP1, MSGTMP0 | ||
242 | |||
243 | /* Rounds 40-43 */ | ||
244 | movdqa MSGTMP2, MSG | ||
245 | paddd 10*16(SHA256CONSTANTS), MSG | ||
246 | sha256rnds2 STATE0, STATE1 | ||
247 | movdqa MSGTMP2, MSGTMP4 | ||
248 | palignr $4, MSGTMP1, MSGTMP4 | ||
249 | paddd MSGTMP4, MSGTMP3 | ||
250 | sha256msg2 MSGTMP2, MSGTMP3 | ||
251 | pshufd $0x0E, MSG, MSG | ||
252 | sha256rnds2 STATE1, STATE0 | ||
253 | sha256msg1 MSGTMP2, MSGTMP1 | ||
254 | |||
255 | /* Rounds 44-47 */ | ||
256 | movdqa MSGTMP3, MSG | ||
257 | paddd 11*16(SHA256CONSTANTS), MSG | ||
258 | sha256rnds2 STATE0, STATE1 | ||
259 | movdqa MSGTMP3, MSGTMP4 | ||
260 | palignr $4, MSGTMP2, MSGTMP4 | ||
261 | paddd MSGTMP4, MSGTMP0 | ||
262 | sha256msg2 MSGTMP3, MSGTMP0 | ||
263 | pshufd $0x0E, MSG, MSG | ||
264 | sha256rnds2 STATE1, STATE0 | ||
265 | sha256msg1 MSGTMP3, MSGTMP2 | ||
266 | |||
267 | /* Rounds 48-51 */ | ||
268 | movdqa MSGTMP0, MSG | ||
269 | paddd 12*16(SHA256CONSTANTS), MSG | ||
270 | sha256rnds2 STATE0, STATE1 | ||
271 | movdqa MSGTMP0, MSGTMP4 | ||
272 | palignr $4, MSGTMP3, MSGTMP4 | ||
273 | paddd MSGTMP4, MSGTMP1 | ||
274 | sha256msg2 MSGTMP0, MSGTMP1 | ||
275 | pshufd $0x0E, MSG, MSG | ||
276 | sha256rnds2 STATE1, STATE0 | ||
277 | sha256msg1 MSGTMP0, MSGTMP3 | ||
278 | |||
279 | /* Rounds 52-55 */ | ||
280 | movdqa MSGTMP1, MSG | ||
281 | paddd 13*16(SHA256CONSTANTS), MSG | ||
282 | sha256rnds2 STATE0, STATE1 | ||
283 | movdqa MSGTMP1, MSGTMP4 | ||
284 | palignr $4, MSGTMP0, MSGTMP4 | ||
285 | paddd MSGTMP4, MSGTMP2 | ||
286 | sha256msg2 MSGTMP1, MSGTMP2 | ||
287 | pshufd $0x0E, MSG, MSG | ||
288 | sha256rnds2 STATE1, STATE0 | ||
289 | |||
290 | /* Rounds 56-59 */ | ||
291 | movdqa MSGTMP2, MSG | ||
292 | paddd 14*16(SHA256CONSTANTS), MSG | ||
293 | sha256rnds2 STATE0, STATE1 | ||
294 | movdqa MSGTMP2, MSGTMP4 | ||
295 | palignr $4, MSGTMP1, MSGTMP4 | ||
296 | paddd MSGTMP4, MSGTMP3 | ||
297 | sha256msg2 MSGTMP2, MSGTMP3 | ||
298 | pshufd $0x0E, MSG, MSG | ||
299 | sha256rnds2 STATE1, STATE0 | ||
300 | |||
301 | /* Rounds 60-63 */ | ||
302 | movdqa MSGTMP3, MSG | ||
303 | paddd 15*16(SHA256CONSTANTS), MSG | ||
304 | sha256rnds2 STATE0, STATE1 | ||
305 | pshufd $0x0E, MSG, MSG | ||
306 | sha256rnds2 STATE1, STATE0 | ||
307 | |||
308 | /* Add current hash values with previously saved */ | ||
309 | paddd ABEF_SAVE, STATE0 | ||
310 | paddd CDGH_SAVE, STATE1 | ||
311 | |||
312 | /* Increment data pointer and loop if more to process */ | ||
313 | add $64, DATA_PTR | ||
314 | cmp NUM_BLKS, DATA_PTR | ||
315 | jne .Lloop0 | ||
316 | |||
317 | /* Write hash values back in the correct order */ | ||
318 | pshufd $0x1B, STATE0, STATE0 /* FEBA */ | ||
319 | pshufd $0xB1, STATE1, STATE1 /* DCHG */ | ||
320 | movdqa STATE0, MSGTMP4 | ||
321 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ | ||
322 | palignr $8, MSGTMP4, STATE1 /* HGFE */ | ||
323 | |||
324 | movdqu STATE0, 0*16(DIGEST_PTR) | ||
325 | movdqu STATE1, 1*16(DIGEST_PTR) | ||
326 | |||
327 | .Ldone_hash: | ||
328 | |||
329 | ret | ||
330 | ENDPROC(sha256_ni_transform) | ||
331 | |||
332 | .data | ||
333 | .align 64 | ||
334 | K256: | ||
335 | .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | ||
336 | .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | ||
337 | .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | ||
338 | .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | ||
339 | .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | ||
340 | .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | ||
341 | .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | ||
342 | .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | ||
343 | .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | ||
344 | .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | ||
345 | .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | ||
346 | .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | ||
347 | .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | ||
348 | .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | ||
349 | .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | ||
350 | .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | ||
351 | |||
352 | PSHUFFLE_BYTE_FLIP_MASK: | ||
353 | .octa 0x0c0d0e0f08090a0b0405060700010203 | ||
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 0e0e85aea634..5f4d6086dc59 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c | |||
@@ -42,19 +42,10 @@ | |||
42 | 42 | ||
43 | asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, | 43 | asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, |
44 | u64 rounds); | 44 | u64 rounds); |
45 | #ifdef CONFIG_AS_AVX | 45 | typedef void (sha256_transform_fn)(u32 *digest, const char *data, u64 rounds); |
46 | asmlinkage void sha256_transform_avx(u32 *digest, const char *data, | ||
47 | u64 rounds); | ||
48 | #endif | ||
49 | #ifdef CONFIG_AS_AVX2 | ||
50 | asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, | ||
51 | u64 rounds); | ||
52 | #endif | ||
53 | |||
54 | static void (*sha256_transform_asm)(u32 *, const char *, u64); | ||
55 | 46 | ||
56 | static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, | 47 | static int sha256_update(struct shash_desc *desc, const u8 *data, |
57 | unsigned int len) | 48 | unsigned int len, sha256_transform_fn *sha256_xform) |
58 | { | 49 | { |
59 | struct sha256_state *sctx = shash_desc_ctx(desc); | 50 | struct sha256_state *sctx = shash_desc_ctx(desc); |
60 | 51 | ||
@@ -67,14 +58,14 @@ static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, | |||
67 | 58 | ||
68 | kernel_fpu_begin(); | 59 | kernel_fpu_begin(); |
69 | sha256_base_do_update(desc, data, len, | 60 | sha256_base_do_update(desc, data, len, |
70 | (sha256_block_fn *)sha256_transform_asm); | 61 | (sha256_block_fn *)sha256_xform); |
71 | kernel_fpu_end(); | 62 | kernel_fpu_end(); |
72 | 63 | ||
73 | return 0; | 64 | return 0; |
74 | } | 65 | } |
75 | 66 | ||
76 | static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, | 67 | static int sha256_finup(struct shash_desc *desc, const u8 *data, |
77 | unsigned int len, u8 *out) | 68 | unsigned int len, u8 *out, sha256_transform_fn *sha256_xform) |
78 | { | 69 | { |
79 | if (!irq_fpu_usable()) | 70 | if (!irq_fpu_usable()) |
80 | return crypto_sha256_finup(desc, data, len, out); | 71 | return crypto_sha256_finup(desc, data, len, out); |
@@ -82,20 +73,32 @@ static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, | |||
82 | kernel_fpu_begin(); | 73 | kernel_fpu_begin(); |
83 | if (len) | 74 | if (len) |
84 | sha256_base_do_update(desc, data, len, | 75 | sha256_base_do_update(desc, data, len, |
85 | (sha256_block_fn *)sha256_transform_asm); | 76 | (sha256_block_fn *)sha256_xform); |
86 | sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_transform_asm); | 77 | sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_xform); |
87 | kernel_fpu_end(); | 78 | kernel_fpu_end(); |
88 | 79 | ||
89 | return sha256_base_finish(desc, out); | 80 | return sha256_base_finish(desc, out); |
90 | } | 81 | } |
91 | 82 | ||
83 | static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
84 | unsigned int len) | ||
85 | { | ||
86 | return sha256_update(desc, data, len, sha256_transform_ssse3); | ||
87 | } | ||
88 | |||
89 | static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, | ||
90 | unsigned int len, u8 *out) | ||
91 | { | ||
92 | return sha256_finup(desc, data, len, out, sha256_transform_ssse3); | ||
93 | } | ||
94 | |||
92 | /* Add padding and return the message digest. */ | 95 | /* Add padding and return the message digest. */ |
93 | static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) | 96 | static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) |
94 | { | 97 | { |
95 | return sha256_ssse3_finup(desc, NULL, 0, out); | 98 | return sha256_ssse3_finup(desc, NULL, 0, out); |
96 | } | 99 | } |
97 | 100 | ||
98 | static struct shash_alg algs[] = { { | 101 | static struct shash_alg sha256_ssse3_algs[] = { { |
99 | .digestsize = SHA256_DIGEST_SIZE, | 102 | .digestsize = SHA256_DIGEST_SIZE, |
100 | .init = sha256_base_init, | 103 | .init = sha256_base_init, |
101 | .update = sha256_ssse3_update, | 104 | .update = sha256_ssse3_update, |
@@ -127,8 +130,75 @@ static struct shash_alg algs[] = { { | |||
127 | } | 130 | } |
128 | } }; | 131 | } }; |
129 | 132 | ||
133 | static int register_sha256_ssse3(void) | ||
134 | { | ||
135 | if (boot_cpu_has(X86_FEATURE_SSSE3)) | ||
136 | return crypto_register_shashes(sha256_ssse3_algs, | ||
137 | ARRAY_SIZE(sha256_ssse3_algs)); | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | static void unregister_sha256_ssse3(void) | ||
142 | { | ||
143 | if (boot_cpu_has(X86_FEATURE_SSSE3)) | ||
144 | crypto_unregister_shashes(sha256_ssse3_algs, | ||
145 | ARRAY_SIZE(sha256_ssse3_algs)); | ||
146 | } | ||
147 | |||
130 | #ifdef CONFIG_AS_AVX | 148 | #ifdef CONFIG_AS_AVX |
131 | static bool __init avx_usable(void) | 149 | asmlinkage void sha256_transform_avx(u32 *digest, const char *data, |
150 | u64 rounds); | ||
151 | |||
152 | static int sha256_avx_update(struct shash_desc *desc, const u8 *data, | ||
153 | unsigned int len) | ||
154 | { | ||
155 | return sha256_update(desc, data, len, sha256_transform_avx); | ||
156 | } | ||
157 | |||
158 | static int sha256_avx_finup(struct shash_desc *desc, const u8 *data, | ||
159 | unsigned int len, u8 *out) | ||
160 | { | ||
161 | return sha256_finup(desc, data, len, out, sha256_transform_avx); | ||
162 | } | ||
163 | |||
164 | static int sha256_avx_final(struct shash_desc *desc, u8 *out) | ||
165 | { | ||
166 | return sha256_avx_finup(desc, NULL, 0, out); | ||
167 | } | ||
168 | |||
169 | static struct shash_alg sha256_avx_algs[] = { { | ||
170 | .digestsize = SHA256_DIGEST_SIZE, | ||
171 | .init = sha256_base_init, | ||
172 | .update = sha256_avx_update, | ||
173 | .final = sha256_avx_final, | ||
174 | .finup = sha256_avx_finup, | ||
175 | .descsize = sizeof(struct sha256_state), | ||
176 | .base = { | ||
177 | .cra_name = "sha256", | ||
178 | .cra_driver_name = "sha256-avx", | ||
179 | .cra_priority = 160, | ||
180 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
181 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
182 | .cra_module = THIS_MODULE, | ||
183 | } | ||
184 | }, { | ||
185 | .digestsize = SHA224_DIGEST_SIZE, | ||
186 | .init = sha224_base_init, | ||
187 | .update = sha256_avx_update, | ||
188 | .final = sha256_avx_final, | ||
189 | .finup = sha256_avx_finup, | ||
190 | .descsize = sizeof(struct sha256_state), | ||
191 | .base = { | ||
192 | .cra_name = "sha224", | ||
193 | .cra_driver_name = "sha224-avx", | ||
194 | .cra_priority = 160, | ||
195 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
196 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
197 | .cra_module = THIS_MODULE, | ||
198 | } | ||
199 | } }; | ||
200 | |||
201 | static bool avx_usable(void) | ||
132 | { | 202 | { |
133 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { | 203 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { |
134 | if (cpu_has_avx) | 204 | if (cpu_has_avx) |
@@ -138,47 +208,216 @@ static bool __init avx_usable(void) | |||
138 | 208 | ||
139 | return true; | 209 | return true; |
140 | } | 210 | } |
141 | #endif | ||
142 | 211 | ||
143 | static int __init sha256_ssse3_mod_init(void) | 212 | static int register_sha256_avx(void) |
144 | { | 213 | { |
145 | /* test for SSSE3 first */ | 214 | if (avx_usable()) |
146 | if (cpu_has_ssse3) | 215 | return crypto_register_shashes(sha256_avx_algs, |
147 | sha256_transform_asm = sha256_transform_ssse3; | 216 | ARRAY_SIZE(sha256_avx_algs)); |
217 | return 0; | ||
218 | } | ||
148 | 219 | ||
149 | #ifdef CONFIG_AS_AVX | 220 | static void unregister_sha256_avx(void) |
150 | /* allow AVX to override SSSE3, it's a little faster */ | 221 | { |
151 | if (avx_usable()) { | 222 | if (avx_usable()) |
152 | #ifdef CONFIG_AS_AVX2 | 223 | crypto_unregister_shashes(sha256_avx_algs, |
153 | if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2)) | 224 | ARRAY_SIZE(sha256_avx_algs)); |
154 | sha256_transform_asm = sha256_transform_rorx; | 225 | } |
155 | else | 226 | |
227 | #else | ||
228 | static inline int register_sha256_avx(void) { return 0; } | ||
229 | static inline void unregister_sha256_avx(void) { } | ||
156 | #endif | 230 | #endif |
157 | sha256_transform_asm = sha256_transform_avx; | 231 | |
232 | #if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX) | ||
233 | asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, | ||
234 | u64 rounds); | ||
235 | |||
236 | static int sha256_avx2_update(struct shash_desc *desc, const u8 *data, | ||
237 | unsigned int len) | ||
238 | { | ||
239 | return sha256_update(desc, data, len, sha256_transform_rorx); | ||
240 | } | ||
241 | |||
242 | static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data, | ||
243 | unsigned int len, u8 *out) | ||
244 | { | ||
245 | return sha256_finup(desc, data, len, out, sha256_transform_rorx); | ||
246 | } | ||
247 | |||
248 | static int sha256_avx2_final(struct shash_desc *desc, u8 *out) | ||
249 | { | ||
250 | return sha256_avx2_finup(desc, NULL, 0, out); | ||
251 | } | ||
252 | |||
253 | static struct shash_alg sha256_avx2_algs[] = { { | ||
254 | .digestsize = SHA256_DIGEST_SIZE, | ||
255 | .init = sha256_base_init, | ||
256 | .update = sha256_avx2_update, | ||
257 | .final = sha256_avx2_final, | ||
258 | .finup = sha256_avx2_finup, | ||
259 | .descsize = sizeof(struct sha256_state), | ||
260 | .base = { | ||
261 | .cra_name = "sha256", | ||
262 | .cra_driver_name = "sha256-avx2", | ||
263 | .cra_priority = 170, | ||
264 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
265 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
266 | .cra_module = THIS_MODULE, | ||
158 | } | 267 | } |
159 | #endif | 268 | }, { |
269 | .digestsize = SHA224_DIGEST_SIZE, | ||
270 | .init = sha224_base_init, | ||
271 | .update = sha256_avx2_update, | ||
272 | .final = sha256_avx2_final, | ||
273 | .finup = sha256_avx2_finup, | ||
274 | .descsize = sizeof(struct sha256_state), | ||
275 | .base = { | ||
276 | .cra_name = "sha224", | ||
277 | .cra_driver_name = "sha224-avx2", | ||
278 | .cra_priority = 170, | ||
279 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
280 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
281 | .cra_module = THIS_MODULE, | ||
282 | } | ||
283 | } }; | ||
160 | 284 | ||
161 | if (sha256_transform_asm) { | 285 | static bool avx2_usable(void) |
162 | #ifdef CONFIG_AS_AVX | 286 | { |
163 | if (sha256_transform_asm == sha256_transform_avx) | 287 | if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && |
164 | pr_info("Using AVX optimized SHA-256 implementation\n"); | 288 | boot_cpu_has(X86_FEATURE_BMI2)) |
165 | #ifdef CONFIG_AS_AVX2 | 289 | return true; |
166 | else if (sha256_transform_asm == sha256_transform_rorx) | 290 | |
167 | pr_info("Using AVX2 optimized SHA-256 implementation\n"); | 291 | return false; |
292 | } | ||
293 | |||
294 | static int register_sha256_avx2(void) | ||
295 | { | ||
296 | if (avx2_usable()) | ||
297 | return crypto_register_shashes(sha256_avx2_algs, | ||
298 | ARRAY_SIZE(sha256_avx2_algs)); | ||
299 | return 0; | ||
300 | } | ||
301 | |||
302 | static void unregister_sha256_avx2(void) | ||
303 | { | ||
304 | if (avx2_usable()) | ||
305 | crypto_unregister_shashes(sha256_avx2_algs, | ||
306 | ARRAY_SIZE(sha256_avx2_algs)); | ||
307 | } | ||
308 | |||
309 | #else | ||
310 | static inline int register_sha256_avx2(void) { return 0; } | ||
311 | static inline void unregister_sha256_avx2(void) { } | ||
168 | #endif | 312 | #endif |
169 | else | 313 | |
314 | #ifdef CONFIG_AS_SHA256_NI | ||
315 | asmlinkage void sha256_ni_transform(u32 *digest, const char *data, | ||
316 | u64 rounds); /*unsigned int rounds);*/ | ||
317 | |||
318 | static int sha256_ni_update(struct shash_desc *desc, const u8 *data, | ||
319 | unsigned int len) | ||
320 | { | ||
321 | return sha256_update(desc, data, len, sha256_ni_transform); | ||
322 | } | ||
323 | |||
324 | static int sha256_ni_finup(struct shash_desc *desc, const u8 *data, | ||
325 | unsigned int len, u8 *out) | ||
326 | { | ||
327 | return sha256_finup(desc, data, len, out, sha256_ni_transform); | ||
328 | } | ||
329 | |||
330 | static int sha256_ni_final(struct shash_desc *desc, u8 *out) | ||
331 | { | ||
332 | return sha256_ni_finup(desc, NULL, 0, out); | ||
333 | } | ||
334 | |||
335 | static struct shash_alg sha256_ni_algs[] = { { | ||
336 | .digestsize = SHA256_DIGEST_SIZE, | ||
337 | .init = sha256_base_init, | ||
338 | .update = sha256_ni_update, | ||
339 | .final = sha256_ni_final, | ||
340 | .finup = sha256_ni_finup, | ||
341 | .descsize = sizeof(struct sha256_state), | ||
342 | .base = { | ||
343 | .cra_name = "sha256", | ||
344 | .cra_driver_name = "sha256-ni", | ||
345 | .cra_priority = 250, | ||
346 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
347 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
348 | .cra_module = THIS_MODULE, | ||
349 | } | ||
350 | }, { | ||
351 | .digestsize = SHA224_DIGEST_SIZE, | ||
352 | .init = sha224_base_init, | ||
353 | .update = sha256_ni_update, | ||
354 | .final = sha256_ni_final, | ||
355 | .finup = sha256_ni_finup, | ||
356 | .descsize = sizeof(struct sha256_state), | ||
357 | .base = { | ||
358 | .cra_name = "sha224", | ||
359 | .cra_driver_name = "sha224-ni", | ||
360 | .cra_priority = 250, | ||
361 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
362 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
363 | .cra_module = THIS_MODULE, | ||
364 | } | ||
365 | } }; | ||
366 | |||
367 | static int register_sha256_ni(void) | ||
368 | { | ||
369 | if (boot_cpu_has(X86_FEATURE_SHA_NI)) | ||
370 | return crypto_register_shashes(sha256_ni_algs, | ||
371 | ARRAY_SIZE(sha256_ni_algs)); | ||
372 | return 0; | ||
373 | } | ||
374 | |||
375 | static void unregister_sha256_ni(void) | ||
376 | { | ||
377 | if (boot_cpu_has(X86_FEATURE_SHA_NI)) | ||
378 | crypto_unregister_shashes(sha256_ni_algs, | ||
379 | ARRAY_SIZE(sha256_ni_algs)); | ||
380 | } | ||
381 | |||
382 | #else | ||
383 | static inline int register_sha256_ni(void) { return 0; } | ||
384 | static inline void unregister_sha256_ni(void) { } | ||
170 | #endif | 385 | #endif |
171 | pr_info("Using SSSE3 optimized SHA-256 implementation\n"); | 386 | |
172 | return crypto_register_shashes(algs, ARRAY_SIZE(algs)); | 387 | static int __init sha256_ssse3_mod_init(void) |
388 | { | ||
389 | if (register_sha256_ssse3()) | ||
390 | goto fail; | ||
391 | |||
392 | if (register_sha256_avx()) { | ||
393 | unregister_sha256_ssse3(); | ||
394 | goto fail; | ||
173 | } | 395 | } |
174 | pr_info("Neither AVX nor SSSE3 is available/usable.\n"); | ||
175 | 396 | ||
397 | if (register_sha256_avx2()) { | ||
398 | unregister_sha256_avx(); | ||
399 | unregister_sha256_ssse3(); | ||
400 | goto fail; | ||
401 | } | ||
402 | |||
403 | if (register_sha256_ni()) { | ||
404 | unregister_sha256_avx2(); | ||
405 | unregister_sha256_avx(); | ||
406 | unregister_sha256_ssse3(); | ||
407 | goto fail; | ||
408 | } | ||
409 | |||
410 | return 0; | ||
411 | fail: | ||
176 | return -ENODEV; | 412 | return -ENODEV; |
177 | } | 413 | } |
178 | 414 | ||
179 | static void __exit sha256_ssse3_mod_fini(void) | 415 | static void __exit sha256_ssse3_mod_fini(void) |
180 | { | 416 | { |
181 | crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); | 417 | unregister_sha256_ni(); |
418 | unregister_sha256_avx2(); | ||
419 | unregister_sha256_avx(); | ||
420 | unregister_sha256_ssse3(); | ||
182 | } | 421 | } |
183 | 422 | ||
184 | module_init(sha256_ssse3_mod_init); | 423 | module_init(sha256_ssse3_mod_init); |
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 0c8c38c101ac..34e5083d6f36 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c | |||
@@ -41,19 +41,11 @@ | |||
41 | 41 | ||
42 | asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data, | 42 | asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data, |
43 | u64 rounds); | 43 | u64 rounds); |
44 | #ifdef CONFIG_AS_AVX | ||
45 | asmlinkage void sha512_transform_avx(u64 *digest, const char *data, | ||
46 | u64 rounds); | ||
47 | #endif | ||
48 | #ifdef CONFIG_AS_AVX2 | ||
49 | asmlinkage void sha512_transform_rorx(u64 *digest, const char *data, | ||
50 | u64 rounds); | ||
51 | #endif | ||
52 | 44 | ||
53 | static void (*sha512_transform_asm)(u64 *, const char *, u64); | 45 | typedef void (sha512_transform_fn)(u64 *digest, const char *data, u64 rounds); |
54 | 46 | ||
55 | static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, | 47 | static int sha512_update(struct shash_desc *desc, const u8 *data, |
56 | unsigned int len) | 48 | unsigned int len, sha512_transform_fn *sha512_xform) |
57 | { | 49 | { |
58 | struct sha512_state *sctx = shash_desc_ctx(desc); | 50 | struct sha512_state *sctx = shash_desc_ctx(desc); |
59 | 51 | ||
@@ -66,14 +58,14 @@ static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, | |||
66 | 58 | ||
67 | kernel_fpu_begin(); | 59 | kernel_fpu_begin(); |
68 | sha512_base_do_update(desc, data, len, | 60 | sha512_base_do_update(desc, data, len, |
69 | (sha512_block_fn *)sha512_transform_asm); | 61 | (sha512_block_fn *)sha512_xform); |
70 | kernel_fpu_end(); | 62 | kernel_fpu_end(); |
71 | 63 | ||
72 | return 0; | 64 | return 0; |
73 | } | 65 | } |
74 | 66 | ||
75 | static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data, | 67 | static int sha512_finup(struct shash_desc *desc, const u8 *data, |
76 | unsigned int len, u8 *out) | 68 | unsigned int len, u8 *out, sha512_transform_fn *sha512_xform) |
77 | { | 69 | { |
78 | if (!irq_fpu_usable()) | 70 | if (!irq_fpu_usable()) |
79 | return crypto_sha512_finup(desc, data, len, out); | 71 | return crypto_sha512_finup(desc, data, len, out); |
@@ -81,20 +73,32 @@ static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data, | |||
81 | kernel_fpu_begin(); | 73 | kernel_fpu_begin(); |
82 | if (len) | 74 | if (len) |
83 | sha512_base_do_update(desc, data, len, | 75 | sha512_base_do_update(desc, data, len, |
84 | (sha512_block_fn *)sha512_transform_asm); | 76 | (sha512_block_fn *)sha512_xform); |
85 | sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_transform_asm); | 77 | sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_xform); |
86 | kernel_fpu_end(); | 78 | kernel_fpu_end(); |
87 | 79 | ||
88 | return sha512_base_finish(desc, out); | 80 | return sha512_base_finish(desc, out); |
89 | } | 81 | } |
90 | 82 | ||
83 | static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
84 | unsigned int len) | ||
85 | { | ||
86 | return sha512_update(desc, data, len, sha512_transform_ssse3); | ||
87 | } | ||
88 | |||
89 | static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data, | ||
90 | unsigned int len, u8 *out) | ||
91 | { | ||
92 | return sha512_finup(desc, data, len, out, sha512_transform_ssse3); | ||
93 | } | ||
94 | |||
91 | /* Add padding and return the message digest. */ | 95 | /* Add padding and return the message digest. */ |
92 | static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) | 96 | static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) |
93 | { | 97 | { |
94 | return sha512_ssse3_finup(desc, NULL, 0, out); | 98 | return sha512_ssse3_finup(desc, NULL, 0, out); |
95 | } | 99 | } |
96 | 100 | ||
97 | static struct shash_alg algs[] = { { | 101 | static struct shash_alg sha512_ssse3_algs[] = { { |
98 | .digestsize = SHA512_DIGEST_SIZE, | 102 | .digestsize = SHA512_DIGEST_SIZE, |
99 | .init = sha512_base_init, | 103 | .init = sha512_base_init, |
100 | .update = sha512_ssse3_update, | 104 | .update = sha512_ssse3_update, |
@@ -126,8 +130,25 @@ static struct shash_alg algs[] = { { | |||
126 | } | 130 | } |
127 | } }; | 131 | } }; |
128 | 132 | ||
133 | static int register_sha512_ssse3(void) | ||
134 | { | ||
135 | if (boot_cpu_has(X86_FEATURE_SSSE3)) | ||
136 | return crypto_register_shashes(sha512_ssse3_algs, | ||
137 | ARRAY_SIZE(sha512_ssse3_algs)); | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | static void unregister_sha512_ssse3(void) | ||
142 | { | ||
143 | if (boot_cpu_has(X86_FEATURE_SSSE3)) | ||
144 | crypto_unregister_shashes(sha512_ssse3_algs, | ||
145 | ARRAY_SIZE(sha512_ssse3_algs)); | ||
146 | } | ||
147 | |||
129 | #ifdef CONFIG_AS_AVX | 148 | #ifdef CONFIG_AS_AVX |
130 | static bool __init avx_usable(void) | 149 | asmlinkage void sha512_transform_avx(u64 *digest, const char *data, |
150 | u64 rounds); | ||
151 | static bool avx_usable(void) | ||
131 | { | 152 | { |
132 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { | 153 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { |
133 | if (cpu_has_avx) | 154 | if (cpu_has_avx) |
@@ -137,47 +158,185 @@ static bool __init avx_usable(void) | |||
137 | 158 | ||
138 | return true; | 159 | return true; |
139 | } | 160 | } |
140 | #endif | ||
141 | 161 | ||
142 | static int __init sha512_ssse3_mod_init(void) | 162 | static int sha512_avx_update(struct shash_desc *desc, const u8 *data, |
163 | unsigned int len) | ||
143 | { | 164 | { |
144 | /* test for SSSE3 first */ | 165 | return sha512_update(desc, data, len, sha512_transform_avx); |
145 | if (cpu_has_ssse3) | 166 | } |
146 | sha512_transform_asm = sha512_transform_ssse3; | ||
147 | 167 | ||
148 | #ifdef CONFIG_AS_AVX | 168 | static int sha512_avx_finup(struct shash_desc *desc, const u8 *data, |
149 | /* allow AVX to override SSSE3, it's a little faster */ | 169 | unsigned int len, u8 *out) |
150 | if (avx_usable()) { | 170 | { |
151 | #ifdef CONFIG_AS_AVX2 | 171 | return sha512_finup(desc, data, len, out, sha512_transform_avx); |
152 | if (boot_cpu_has(X86_FEATURE_AVX2)) | 172 | } |
153 | sha512_transform_asm = sha512_transform_rorx; | 173 | |
154 | else | 174 | /* Add padding and return the message digest. */ |
155 | #endif | 175 | static int sha512_avx_final(struct shash_desc *desc, u8 *out) |
156 | sha512_transform_asm = sha512_transform_avx; | 176 | { |
177 | return sha512_avx_finup(desc, NULL, 0, out); | ||
178 | } | ||
179 | |||
180 | static struct shash_alg sha512_avx_algs[] = { { | ||
181 | .digestsize = SHA512_DIGEST_SIZE, | ||
182 | .init = sha512_base_init, | ||
183 | .update = sha512_avx_update, | ||
184 | .final = sha512_avx_final, | ||
185 | .finup = sha512_avx_finup, | ||
186 | .descsize = sizeof(struct sha512_state), | ||
187 | .base = { | ||
188 | .cra_name = "sha512", | ||
189 | .cra_driver_name = "sha512-avx", | ||
190 | .cra_priority = 160, | ||
191 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
192 | .cra_blocksize = SHA512_BLOCK_SIZE, | ||
193 | .cra_module = THIS_MODULE, | ||
157 | } | 194 | } |
158 | #endif | 195 | }, { |
196 | .digestsize = SHA384_DIGEST_SIZE, | ||
197 | .init = sha384_base_init, | ||
198 | .update = sha512_avx_update, | ||
199 | .final = sha512_avx_final, | ||
200 | .finup = sha512_avx_finup, | ||
201 | .descsize = sizeof(struct sha512_state), | ||
202 | .base = { | ||
203 | .cra_name = "sha384", | ||
204 | .cra_driver_name = "sha384-avx", | ||
205 | .cra_priority = 160, | ||
206 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
207 | .cra_blocksize = SHA384_BLOCK_SIZE, | ||
208 | .cra_module = THIS_MODULE, | ||
209 | } | ||
210 | } }; | ||
159 | 211 | ||
160 | if (sha512_transform_asm) { | 212 | static int register_sha512_avx(void) |
161 | #ifdef CONFIG_AS_AVX | 213 | { |
162 | if (sha512_transform_asm == sha512_transform_avx) | 214 | if (avx_usable()) |
163 | pr_info("Using AVX optimized SHA-512 implementation\n"); | 215 | return crypto_register_shashes(sha512_avx_algs, |
164 | #ifdef CONFIG_AS_AVX2 | 216 | ARRAY_SIZE(sha512_avx_algs)); |
165 | else if (sha512_transform_asm == sha512_transform_rorx) | 217 | return 0; |
166 | pr_info("Using AVX2 optimized SHA-512 implementation\n"); | 218 | } |
219 | |||
220 | static void unregister_sha512_avx(void) | ||
221 | { | ||
222 | if (avx_usable()) | ||
223 | crypto_unregister_shashes(sha512_avx_algs, | ||
224 | ARRAY_SIZE(sha512_avx_algs)); | ||
225 | } | ||
226 | #else | ||
227 | static inline int register_sha512_avx(void) { return 0; } | ||
228 | static inline void unregister_sha512_avx(void) { } | ||
167 | #endif | 229 | #endif |
168 | else | 230 | |
231 | #if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX) | ||
232 | asmlinkage void sha512_transform_rorx(u64 *digest, const char *data, | ||
233 | u64 rounds); | ||
234 | |||
235 | static int sha512_avx2_update(struct shash_desc *desc, const u8 *data, | ||
236 | unsigned int len) | ||
237 | { | ||
238 | return sha512_update(desc, data, len, sha512_transform_rorx); | ||
239 | } | ||
240 | |||
241 | static int sha512_avx2_finup(struct shash_desc *desc, const u8 *data, | ||
242 | unsigned int len, u8 *out) | ||
243 | { | ||
244 | return sha512_finup(desc, data, len, out, sha512_transform_rorx); | ||
245 | } | ||
246 | |||
247 | /* Add padding and return the message digest. */ | ||
248 | static int sha512_avx2_final(struct shash_desc *desc, u8 *out) | ||
249 | { | ||
250 | return sha512_avx2_finup(desc, NULL, 0, out); | ||
251 | } | ||
252 | |||
253 | static struct shash_alg sha512_avx2_algs[] = { { | ||
254 | .digestsize = SHA512_DIGEST_SIZE, | ||
255 | .init = sha512_base_init, | ||
256 | .update = sha512_avx2_update, | ||
257 | .final = sha512_avx2_final, | ||
258 | .finup = sha512_avx2_finup, | ||
259 | .descsize = sizeof(struct sha512_state), | ||
260 | .base = { | ||
261 | .cra_name = "sha512", | ||
262 | .cra_driver_name = "sha512-avx2", | ||
263 | .cra_priority = 170, | ||
264 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
265 | .cra_blocksize = SHA512_BLOCK_SIZE, | ||
266 | .cra_module = THIS_MODULE, | ||
267 | } | ||
268 | }, { | ||
269 | .digestsize = SHA384_DIGEST_SIZE, | ||
270 | .init = sha384_base_init, | ||
271 | .update = sha512_avx2_update, | ||
272 | .final = sha512_avx2_final, | ||
273 | .finup = sha512_avx2_finup, | ||
274 | .descsize = sizeof(struct sha512_state), | ||
275 | .base = { | ||
276 | .cra_name = "sha384", | ||
277 | .cra_driver_name = "sha384-avx2", | ||
278 | .cra_priority = 170, | ||
279 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
280 | .cra_blocksize = SHA384_BLOCK_SIZE, | ||
281 | .cra_module = THIS_MODULE, | ||
282 | } | ||
283 | } }; | ||
284 | |||
285 | static bool avx2_usable(void) | ||
286 | { | ||
287 | if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && | ||
288 | boot_cpu_has(X86_FEATURE_BMI2)) | ||
289 | return true; | ||
290 | |||
291 | return false; | ||
292 | } | ||
293 | |||
294 | static int register_sha512_avx2(void) | ||
295 | { | ||
296 | if (avx2_usable()) | ||
297 | return crypto_register_shashes(sha512_avx2_algs, | ||
298 | ARRAY_SIZE(sha512_avx2_algs)); | ||
299 | return 0; | ||
300 | } | ||
301 | |||
302 | static void unregister_sha512_avx2(void) | ||
303 | { | ||
304 | if (avx2_usable()) | ||
305 | crypto_unregister_shashes(sha512_avx2_algs, | ||
306 | ARRAY_SIZE(sha512_avx2_algs)); | ||
307 | } | ||
308 | #else | ||
309 | static inline int register_sha512_avx2(void) { return 0; } | ||
310 | static inline void unregister_sha512_avx2(void) { } | ||
169 | #endif | 311 | #endif |
170 | pr_info("Using SSSE3 optimized SHA-512 implementation\n"); | 312 | |
171 | return crypto_register_shashes(algs, ARRAY_SIZE(algs)); | 313 | static int __init sha512_ssse3_mod_init(void) |
314 | { | ||
315 | |||
316 | if (register_sha512_ssse3()) | ||
317 | goto fail; | ||
318 | |||
319 | if (register_sha512_avx()) { | ||
320 | unregister_sha512_ssse3(); | ||
321 | goto fail; | ||
172 | } | 322 | } |
173 | pr_info("Neither AVX nor SSSE3 is available/usable.\n"); | ||
174 | 323 | ||
324 | if (register_sha512_avx2()) { | ||
325 | unregister_sha512_avx(); | ||
326 | unregister_sha512_ssse3(); | ||
327 | goto fail; | ||
328 | } | ||
329 | |||
330 | return 0; | ||
331 | fail: | ||
175 | return -ENODEV; | 332 | return -ENODEV; |
176 | } | 333 | } |
177 | 334 | ||
178 | static void __exit sha512_ssse3_mod_fini(void) | 335 | static void __exit sha512_ssse3_mod_fini(void) |
179 | { | 336 | { |
180 | crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); | 337 | unregister_sha512_avx2(); |
338 | unregister_sha512_avx(); | ||
339 | unregister_sha512_ssse3(); | ||
181 | } | 340 | } |
182 | 341 | ||
183 | module_init(sha512_ssse3_mod_init); | 342 | module_init(sha512_ssse3_mod_init); |
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 53616ca03244..a55697d19824 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S | |||
@@ -509,6 +509,17 @@ END(irq_entries_start) | |||
509 | * tracking that we're in kernel mode. | 509 | * tracking that we're in kernel mode. |
510 | */ | 510 | */ |
511 | SWAPGS | 511 | SWAPGS |
512 | |||
513 | /* | ||
514 | * We need to tell lockdep that IRQs are off. We can't do this until | ||
515 | * we fix gsbase, and we should do it before enter_from_user_mode | ||
516 | * (which can take locks). Since TRACE_IRQS_OFF idempotent, | ||
517 | * the simplest way to handle it is to just call it twice if | ||
518 | * we enter from user mode. There's no reason to optimize this since | ||
519 | * TRACE_IRQS_OFF is a no-op if lockdep is off. | ||
520 | */ | ||
521 | TRACE_IRQS_OFF | ||
522 | |||
512 | #ifdef CONFIG_CONTEXT_TRACKING | 523 | #ifdef CONFIG_CONTEXT_TRACKING |
513 | call enter_from_user_mode | 524 | call enter_from_user_mode |
514 | #endif | 525 | #endif |
@@ -1049,12 +1060,18 @@ ENTRY(error_entry) | |||
1049 | SWAPGS | 1060 | SWAPGS |
1050 | 1061 | ||
1051 | .Lerror_entry_from_usermode_after_swapgs: | 1062 | .Lerror_entry_from_usermode_after_swapgs: |
1063 | /* | ||
1064 | * We need to tell lockdep that IRQs are off. We can't do this until | ||
1065 | * we fix gsbase, and we should do it before enter_from_user_mode | ||
1066 | * (which can take locks). | ||
1067 | */ | ||
1068 | TRACE_IRQS_OFF | ||
1052 | #ifdef CONFIG_CONTEXT_TRACKING | 1069 | #ifdef CONFIG_CONTEXT_TRACKING |
1053 | call enter_from_user_mode | 1070 | call enter_from_user_mode |
1054 | #endif | 1071 | #endif |
1072 | ret | ||
1055 | 1073 | ||
1056 | .Lerror_entry_done: | 1074 | .Lerror_entry_done: |
1057 | |||
1058 | TRACE_IRQS_OFF | 1075 | TRACE_IRQS_OFF |
1059 | ret | 1076 | ret |
1060 | 1077 | ||
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index caa2c712d1e7..f17705e1332c 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl | |||
@@ -382,3 +382,4 @@ | |||
382 | 373 i386 shutdown sys_shutdown | 382 | 373 i386 shutdown sys_shutdown |
383 | 374 i386 userfaultfd sys_userfaultfd | 383 | 374 i386 userfaultfd sys_userfaultfd |
384 | 375 i386 membarrier sys_membarrier | 384 | 375 i386 membarrier sys_membarrier |
385 | 376 i386 mlock2 sys_mlock2 | ||
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 278842fdf1f6..314a90bfc09c 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl | |||
@@ -331,6 +331,7 @@ | |||
331 | 322 64 execveat stub_execveat | 331 | 322 64 execveat stub_execveat |
332 | 323 common userfaultfd sys_userfaultfd | 332 | 323 common userfaultfd sys_userfaultfd |
333 | 324 common membarrier sys_membarrier | 333 | 324 common membarrier sys_membarrier |
334 | 325 common mlock2 sys_mlock2 | ||
334 | 335 | ||
335 | # | 336 | # |
336 | # x32-specific system call numbers start at 512 to avoid cache impact | 337 | # x32-specific system call numbers start at 512 to avoid cache impact |
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index 04e9d023168f..1c0b43724ce3 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h | |||
@@ -68,7 +68,6 @@ void *kmap_atomic(struct page *page); | |||
68 | void __kunmap_atomic(void *kvaddr); | 68 | void __kunmap_atomic(void *kvaddr); |
69 | void *kmap_atomic_pfn(unsigned long pfn); | 69 | void *kmap_atomic_pfn(unsigned long pfn); |
70 | void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); | 70 | void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); |
71 | struct page *kmap_atomic_to_page(void *ptr); | ||
72 | 71 | ||
73 | #define flush_cache_kmaps() do { } while (0) | 72 | #define flush_cache_kmaps() do { } while (0) |
74 | 73 | ||
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index ccffa53750a8..39bcefc20de7 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h | |||
@@ -60,6 +60,7 @@ struct legacy_pic { | |||
60 | void (*mask_all)(void); | 60 | void (*mask_all)(void); |
61 | void (*restore_mask)(void); | 61 | void (*restore_mask)(void); |
62 | void (*init)(int auto_eoi); | 62 | void (*init)(int auto_eoi); |
63 | int (*probe)(void); | ||
63 | int (*irq_pending)(unsigned int irq); | 64 | int (*irq_pending)(unsigned int irq); |
64 | void (*make_irq)(unsigned int irq); | 65 | void (*make_irq)(unsigned int irq); |
65 | }; | 66 | }; |
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 046c7fb1ca43..a210eba2727c 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h | |||
@@ -33,6 +33,11 @@ enum irq_remap_cap { | |||
33 | IRQ_POSTING_CAP = 0, | 33 | IRQ_POSTING_CAP = 0, |
34 | }; | 34 | }; |
35 | 35 | ||
36 | struct vcpu_data { | ||
37 | u64 pi_desc_addr; /* Physical address of PI Descriptor */ | ||
38 | u32 vector; /* Guest vector of the interrupt */ | ||
39 | }; | ||
40 | |||
36 | #ifdef CONFIG_IRQ_REMAP | 41 | #ifdef CONFIG_IRQ_REMAP |
37 | 42 | ||
38 | extern bool irq_remapping_cap(enum irq_remap_cap cap); | 43 | extern bool irq_remapping_cap(enum irq_remap_cap cap); |
@@ -58,11 +63,6 @@ static inline struct irq_domain *arch_get_ir_parent_domain(void) | |||
58 | return x86_vector_domain; | 63 | return x86_vector_domain; |
59 | } | 64 | } |
60 | 65 | ||
61 | struct vcpu_data { | ||
62 | u64 pi_desc_addr; /* Physical address of PI Descriptor */ | ||
63 | u32 vector; /* Guest vector of the interrupt */ | ||
64 | }; | ||
65 | |||
66 | #else /* CONFIG_IRQ_REMAP */ | 66 | #else /* CONFIG_IRQ_REMAP */ |
67 | 67 | ||
68 | static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; } | 68 | static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; } |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index e16466ec473c..e9cd7befcb76 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -112,6 +112,16 @@ struct x86_emulate_ops { | |||
112 | struct x86_exception *fault); | 112 | struct x86_exception *fault); |
113 | 113 | ||
114 | /* | 114 | /* |
115 | * read_phys: Read bytes of standard (non-emulated/special) memory. | ||
116 | * Used for descriptor reading. | ||
117 | * @addr: [IN ] Physical address from which to read. | ||
118 | * @val: [OUT] Value read from memory. | ||
119 | * @bytes: [IN ] Number of bytes to read from memory. | ||
120 | */ | ||
121 | int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr, | ||
122 | void *val, unsigned int bytes); | ||
123 | |||
124 | /* | ||
115 | * write_std: Write bytes of standard (non-emulated/special) memory. | 125 | * write_std: Write bytes of standard (non-emulated/special) memory. |
116 | * Used for descriptor writing. | 126 | * Used for descriptor writing. |
117 | * @addr: [IN ] Linear address to which to write. | 127 | * @addr: [IN ] Linear address to which to write. |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3a36ee704c30..30cfd64295a0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/perf_event.h> | 24 | #include <linux/perf_event.h> |
25 | #include <linux/pvclock_gtod.h> | 25 | #include <linux/pvclock_gtod.h> |
26 | #include <linux/clocksource.h> | 26 | #include <linux/clocksource.h> |
27 | #include <linux/irqbypass.h> | ||
27 | 28 | ||
28 | #include <asm/pvclock-abi.h> | 29 | #include <asm/pvclock-abi.h> |
29 | #include <asm/desc.h> | 30 | #include <asm/desc.h> |
@@ -176,6 +177,8 @@ enum { | |||
176 | */ | 177 | */ |
177 | #define KVM_APIC_PV_EOI_PENDING 1 | 178 | #define KVM_APIC_PV_EOI_PENDING 1 |
178 | 179 | ||
180 | struct kvm_kernel_irq_routing_entry; | ||
181 | |||
179 | /* | 182 | /* |
180 | * We don't want allocation failures within the mmu code, so we preallocate | 183 | * We don't want allocation failures within the mmu code, so we preallocate |
181 | * enough memory for a single page fault in a cache. | 184 | * enough memory for a single page fault in a cache. |
@@ -374,6 +377,7 @@ struct kvm_mtrr { | |||
374 | /* Hyper-V per vcpu emulation context */ | 377 | /* Hyper-V per vcpu emulation context */ |
375 | struct kvm_vcpu_hv { | 378 | struct kvm_vcpu_hv { |
376 | u64 hv_vapic; | 379 | u64 hv_vapic; |
380 | s64 runtime_offset; | ||
377 | }; | 381 | }; |
378 | 382 | ||
379 | struct kvm_vcpu_arch { | 383 | struct kvm_vcpu_arch { |
@@ -396,6 +400,7 @@ struct kvm_vcpu_arch { | |||
396 | u64 efer; | 400 | u64 efer; |
397 | u64 apic_base; | 401 | u64 apic_base; |
398 | struct kvm_lapic *apic; /* kernel irqchip context */ | 402 | struct kvm_lapic *apic; /* kernel irqchip context */ |
403 | u64 eoi_exit_bitmap[4]; | ||
399 | unsigned long apic_attention; | 404 | unsigned long apic_attention; |
400 | int32_t apic_arb_prio; | 405 | int32_t apic_arb_prio; |
401 | int mp_state; | 406 | int mp_state; |
@@ -500,6 +505,7 @@ struct kvm_vcpu_arch { | |||
500 | u32 virtual_tsc_mult; | 505 | u32 virtual_tsc_mult; |
501 | u32 virtual_tsc_khz; | 506 | u32 virtual_tsc_khz; |
502 | s64 ia32_tsc_adjust_msr; | 507 | s64 ia32_tsc_adjust_msr; |
508 | u64 tsc_scaling_ratio; | ||
503 | 509 | ||
504 | atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ | 510 | atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ |
505 | unsigned nmi_pending; /* NMI queued after currently running handler */ | 511 | unsigned nmi_pending; /* NMI queued after currently running handler */ |
@@ -573,6 +579,9 @@ struct kvm_vcpu_arch { | |||
573 | struct { | 579 | struct { |
574 | bool pv_unhalted; | 580 | bool pv_unhalted; |
575 | } pv; | 581 | } pv; |
582 | |||
583 | int pending_ioapic_eoi; | ||
584 | int pending_external_vector; | ||
576 | }; | 585 | }; |
577 | 586 | ||
578 | struct kvm_lpage_info { | 587 | struct kvm_lpage_info { |
@@ -683,6 +692,9 @@ struct kvm_arch { | |||
683 | u32 bsp_vcpu_id; | 692 | u32 bsp_vcpu_id; |
684 | 693 | ||
685 | u64 disabled_quirks; | 694 | u64 disabled_quirks; |
695 | |||
696 | bool irqchip_split; | ||
697 | u8 nr_reserved_ioapic_pins; | ||
686 | }; | 698 | }; |
687 | 699 | ||
688 | struct kvm_vm_stat { | 700 | struct kvm_vm_stat { |
@@ -766,7 +778,7 @@ struct kvm_x86_ops { | |||
766 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); | 778 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); |
767 | void (*vcpu_put)(struct kvm_vcpu *vcpu); | 779 | void (*vcpu_put)(struct kvm_vcpu *vcpu); |
768 | 780 | ||
769 | void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu); | 781 | void (*update_bp_intercept)(struct kvm_vcpu *vcpu); |
770 | int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); | 782 | int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); |
771 | int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); | 783 | int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); |
772 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); | 784 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); |
@@ -819,10 +831,10 @@ struct kvm_x86_ops { | |||
819 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); | 831 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
820 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); | 832 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); |
821 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); | 833 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); |
822 | int (*vm_has_apicv)(struct kvm *kvm); | 834 | int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu); |
823 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); | 835 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); |
824 | void (*hwapic_isr_update)(struct kvm *kvm, int isr); | 836 | void (*hwapic_isr_update)(struct kvm *kvm, int isr); |
825 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); | 837 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu); |
826 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); | 838 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); |
827 | void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); | 839 | void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); |
828 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); | 840 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); |
@@ -833,7 +845,7 @@ struct kvm_x86_ops { | |||
833 | int (*get_lpage_level)(void); | 845 | int (*get_lpage_level)(void); |
834 | bool (*rdtscp_supported)(void); | 846 | bool (*rdtscp_supported)(void); |
835 | bool (*invpcid_supported)(void); | 847 | bool (*invpcid_supported)(void); |
836 | void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host); | 848 | void (*adjust_tsc_offset_guest)(struct kvm_vcpu *vcpu, s64 adjustment); |
837 | 849 | ||
838 | void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | 850 | void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
839 | 851 | ||
@@ -841,11 +853,9 @@ struct kvm_x86_ops { | |||
841 | 853 | ||
842 | bool (*has_wbinvd_exit)(void); | 854 | bool (*has_wbinvd_exit)(void); |
843 | 855 | ||
844 | void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale); | ||
845 | u64 (*read_tsc_offset)(struct kvm_vcpu *vcpu); | 856 | u64 (*read_tsc_offset)(struct kvm_vcpu *vcpu); |
846 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); | 857 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); |
847 | 858 | ||
848 | u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); | ||
849 | u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc); | 859 | u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc); |
850 | 860 | ||
851 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); | 861 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); |
@@ -887,6 +897,20 @@ struct kvm_x86_ops { | |||
887 | gfn_t offset, unsigned long mask); | 897 | gfn_t offset, unsigned long mask); |
888 | /* pmu operations of sub-arch */ | 898 | /* pmu operations of sub-arch */ |
889 | const struct kvm_pmu_ops *pmu_ops; | 899 | const struct kvm_pmu_ops *pmu_ops; |
900 | |||
901 | /* | ||
902 | * Architecture specific hooks for vCPU blocking due to | ||
903 | * HLT instruction. | ||
904 | * Returns for .pre_block(): | ||
905 | * - 0 means continue to block the vCPU. | ||
906 | * - 1 means we cannot block the vCPU since some event | ||
907 | * happens during this period, such as, 'ON' bit in | ||
908 | * posted-interrupts descriptor is set. | ||
909 | */ | ||
910 | int (*pre_block)(struct kvm_vcpu *vcpu); | ||
911 | void (*post_block)(struct kvm_vcpu *vcpu); | ||
912 | int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, | ||
913 | uint32_t guest_irq, bool set); | ||
890 | }; | 914 | }; |
891 | 915 | ||
892 | struct kvm_arch_async_pf { | 916 | struct kvm_arch_async_pf { |
@@ -898,17 +922,6 @@ struct kvm_arch_async_pf { | |||
898 | 922 | ||
899 | extern struct kvm_x86_ops *kvm_x86_ops; | 923 | extern struct kvm_x86_ops *kvm_x86_ops; |
900 | 924 | ||
901 | static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, | ||
902 | s64 adjustment) | ||
903 | { | ||
904 | kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, false); | ||
905 | } | ||
906 | |||
907 | static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) | ||
908 | { | ||
909 | kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, true); | ||
910 | } | ||
911 | |||
912 | int kvm_mmu_module_init(void); | 925 | int kvm_mmu_module_init(void); |
913 | void kvm_mmu_module_exit(void); | 926 | void kvm_mmu_module_exit(void); |
914 | 927 | ||
@@ -961,10 +974,12 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); | |||
961 | 974 | ||
962 | /* control of guest tsc rate supported? */ | 975 | /* control of guest tsc rate supported? */ |
963 | extern bool kvm_has_tsc_control; | 976 | extern bool kvm_has_tsc_control; |
964 | /* minimum supported tsc_khz for guests */ | ||
965 | extern u32 kvm_min_guest_tsc_khz; | ||
966 | /* maximum supported tsc_khz for guests */ | 977 | /* maximum supported tsc_khz for guests */ |
967 | extern u32 kvm_max_guest_tsc_khz; | 978 | extern u32 kvm_max_guest_tsc_khz; |
979 | /* number of bits of the fractional part of the TSC scaling ratio */ | ||
980 | extern u8 kvm_tsc_scaling_ratio_frac_bits; | ||
981 | /* maximum allowed value of TSC scaling ratio */ | ||
982 | extern u64 kvm_max_tsc_scaling_ratio; | ||
968 | 983 | ||
969 | enum emulation_result { | 984 | enum emulation_result { |
970 | EMULATE_DONE, /* no further processing */ | 985 | EMULATE_DONE, /* no further processing */ |
@@ -1210,6 +1225,9 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | |||
1210 | void kvm_define_shared_msr(unsigned index, u32 msr); | 1225 | void kvm_define_shared_msr(unsigned index, u32 msr); |
1211 | int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | 1226 | int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); |
1212 | 1227 | ||
1228 | u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); | ||
1229 | u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); | ||
1230 | |||
1213 | unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); | 1231 | unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); |
1214 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); | 1232 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); |
1215 | 1233 | ||
@@ -1231,4 +1249,13 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); | |||
1231 | bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); | 1249 | bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); |
1232 | bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); | 1250 | bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); |
1233 | 1251 | ||
1252 | bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, | ||
1253 | struct kvm_vcpu **dest_vcpu); | ||
1254 | |||
1255 | void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, | ||
1256 | struct kvm_lapic_irq *irq); | ||
1257 | |||
1258 | static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} | ||
1259 | static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} | ||
1260 | |||
1234 | #endif /* _ASM_X86_KVM_HOST_H */ | 1261 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b8c14bb7fc8f..690b4027e17c 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -35,7 +35,7 @@ | |||
35 | #define MSR_IA32_PERFCTR0 0x000000c1 | 35 | #define MSR_IA32_PERFCTR0 0x000000c1 |
36 | #define MSR_IA32_PERFCTR1 0x000000c2 | 36 | #define MSR_IA32_PERFCTR1 0x000000c2 |
37 | #define MSR_FSB_FREQ 0x000000cd | 37 | #define MSR_FSB_FREQ 0x000000cd |
38 | #define MSR_NHM_PLATFORM_INFO 0x000000ce | 38 | #define MSR_PLATFORM_INFO 0x000000ce |
39 | 39 | ||
40 | #define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 | 40 | #define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 |
41 | #define NHM_C3_AUTO_DEMOTE (1UL << 25) | 41 | #define NHM_C3_AUTO_DEMOTE (1UL << 25) |
@@ -44,7 +44,6 @@ | |||
44 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) | 44 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) |
45 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | 45 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) |
46 | 46 | ||
47 | #define MSR_PLATFORM_INFO 0x000000ce | ||
48 | #define MSR_MTRRcap 0x000000fe | 47 | #define MSR_MTRRcap 0x000000fe |
49 | #define MSR_IA32_BBL_CR_CTL 0x00000119 | 48 | #define MSR_IA32_BBL_CR_CTL 0x00000119 |
50 | #define MSR_IA32_BBL_CR_CTL3 0x0000011e | 49 | #define MSR_IA32_BBL_CR_CTL3 0x0000011e |
@@ -206,6 +205,13 @@ | |||
206 | #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 | 205 | #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 |
207 | #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 | 206 | #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 |
208 | 207 | ||
208 | /* Config TDP MSRs */ | ||
209 | #define MSR_CONFIG_TDP_NOMINAL 0x00000648 | ||
210 | #define MSR_CONFIG_TDP_LEVEL1 0x00000649 | ||
211 | #define MSR_CONFIG_TDP_LEVEL2 0x0000064A | ||
212 | #define MSR_CONFIG_TDP_CONTROL 0x0000064B | ||
213 | #define MSR_TURBO_ACTIVATION_RATIO 0x0000064C | ||
214 | |||
209 | /* Hardware P state interface */ | 215 | /* Hardware P state interface */ |
210 | #define MSR_PPERF 0x0000064e | 216 | #define MSR_PPERF 0x0000064e |
211 | #define MSR_PERF_LIMIT_REASONS 0x0000064f | 217 | #define MSR_PERF_LIMIT_REASONS 0x0000064f |
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index c5b7fb2774d0..cc071c6f7d4d 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
@@ -9,19 +9,21 @@ | |||
9 | #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) | 9 | #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) |
10 | #define PAGE_MASK (~(PAGE_SIZE-1)) | 10 | #define PAGE_MASK (~(PAGE_SIZE-1)) |
11 | 11 | ||
12 | #define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) | ||
13 | #define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) | ||
14 | |||
15 | #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) | ||
16 | #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) | ||
17 | |||
12 | #define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1)) | 18 | #define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1)) |
13 | #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) | 19 | #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) |
14 | 20 | ||
15 | /* Cast PAGE_MASK to a signed type so that it is sign-extended if | 21 | /* Cast *PAGE_MASK to a signed type so that it is sign-extended if |
16 | virtual addresses are 32-bits but physical addresses are larger | 22 | virtual addresses are 32-bits but physical addresses are larger |
17 | (ie, 32-bit PAE). */ | 23 | (ie, 32-bit PAE). */ |
18 | #define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) | 24 | #define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) |
19 | 25 | #define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK) | |
20 | #define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) | 26 | #define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK) |
21 | #define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) | ||
22 | |||
23 | #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) | ||
24 | #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) | ||
25 | 27 | ||
26 | #define HPAGE_SHIFT PMD_SHIFT | 28 | #define HPAGE_SHIFT PMD_SHIFT |
27 | #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) | 29 | #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index e99cbe814ea8..d3eee663c41f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -322,6 +322,16 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) | |||
322 | return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); | 322 | return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); |
323 | } | 323 | } |
324 | 324 | ||
325 | static inline pte_t pte_clear_soft_dirty(pte_t pte) | ||
326 | { | ||
327 | return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); | ||
328 | } | ||
329 | |||
330 | static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) | ||
331 | { | ||
332 | return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY); | ||
333 | } | ||
334 | |||
325 | #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ | 335 | #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ |
326 | 336 | ||
327 | /* | 337 | /* |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index dd5b0aa9dd2f..a471cadb9630 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -279,17 +279,14 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) | |||
279 | static inline pudval_t pud_pfn_mask(pud_t pud) | 279 | static inline pudval_t pud_pfn_mask(pud_t pud) |
280 | { | 280 | { |
281 | if (native_pud_val(pud) & _PAGE_PSE) | 281 | if (native_pud_val(pud) & _PAGE_PSE) |
282 | return PUD_PAGE_MASK & PHYSICAL_PAGE_MASK; | 282 | return PHYSICAL_PUD_PAGE_MASK; |
283 | else | 283 | else |
284 | return PTE_PFN_MASK; | 284 | return PTE_PFN_MASK; |
285 | } | 285 | } |
286 | 286 | ||
287 | static inline pudval_t pud_flags_mask(pud_t pud) | 287 | static inline pudval_t pud_flags_mask(pud_t pud) |
288 | { | 288 | { |
289 | if (native_pud_val(pud) & _PAGE_PSE) | 289 | return ~pud_pfn_mask(pud); |
290 | return ~(PUD_PAGE_MASK & (pudval_t)PHYSICAL_PAGE_MASK); | ||
291 | else | ||
292 | return ~PTE_PFN_MASK; | ||
293 | } | 290 | } |
294 | 291 | ||
295 | static inline pudval_t pud_flags(pud_t pud) | 292 | static inline pudval_t pud_flags(pud_t pud) |
@@ -300,17 +297,14 @@ static inline pudval_t pud_flags(pud_t pud) | |||
300 | static inline pmdval_t pmd_pfn_mask(pmd_t pmd) | 297 | static inline pmdval_t pmd_pfn_mask(pmd_t pmd) |
301 | { | 298 | { |
302 | if (native_pmd_val(pmd) & _PAGE_PSE) | 299 | if (native_pmd_val(pmd) & _PAGE_PSE) |
303 | return PMD_PAGE_MASK & PHYSICAL_PAGE_MASK; | 300 | return PHYSICAL_PMD_PAGE_MASK; |
304 | else | 301 | else |
305 | return PTE_PFN_MASK; | 302 | return PTE_PFN_MASK; |
306 | } | 303 | } |
307 | 304 | ||
308 | static inline pmdval_t pmd_flags_mask(pmd_t pmd) | 305 | static inline pmdval_t pmd_flags_mask(pmd_t pmd) |
309 | { | 306 | { |
310 | if (native_pmd_val(pmd) & _PAGE_PSE) | 307 | return ~pmd_pfn_mask(pmd); |
311 | return ~(PMD_PAGE_MASK & (pmdval_t)PHYSICAL_PAGE_MASK); | ||
312 | else | ||
313 | return ~PTE_PFN_MASK; | ||
314 | } | 308 | } |
315 | 309 | ||
316 | static inline pmdval_t pmd_flags(pmd_t pmd) | 310 | static inline pmdval_t pmd_flags(pmd_t pmd) |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 448b7ca61aee..14c63c7e8337 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -72,7 +72,8 @@ | |||
72 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 | 72 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 |
73 | #define SECONDARY_EXEC_ENABLE_PML 0x00020000 | 73 | #define SECONDARY_EXEC_ENABLE_PML 0x00020000 |
74 | #define SECONDARY_EXEC_XSAVES 0x00100000 | 74 | #define SECONDARY_EXEC_XSAVES 0x00100000 |
75 | 75 | #define SECONDARY_EXEC_PCOMMIT 0x00200000 | |
76 | #define SECONDARY_EXEC_TSC_SCALING 0x02000000 | ||
76 | 77 | ||
77 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 | 78 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 |
78 | #define PIN_BASED_NMI_EXITING 0x00000008 | 79 | #define PIN_BASED_NMI_EXITING 0x00000008 |
@@ -167,6 +168,8 @@ enum vmcs_field { | |||
167 | VMWRITE_BITMAP = 0x00002028, | 168 | VMWRITE_BITMAP = 0x00002028, |
168 | XSS_EXIT_BITMAP = 0x0000202C, | 169 | XSS_EXIT_BITMAP = 0x0000202C, |
169 | XSS_EXIT_BITMAP_HIGH = 0x0000202D, | 170 | XSS_EXIT_BITMAP_HIGH = 0x0000202D, |
171 | TSC_MULTIPLIER = 0x00002032, | ||
172 | TSC_MULTIPLIER_HIGH = 0x00002033, | ||
170 | GUEST_PHYSICAL_ADDRESS = 0x00002400, | 173 | GUEST_PHYSICAL_ADDRESS = 0x00002400, |
171 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, | 174 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, |
172 | VMCS_LINK_POINTER = 0x00002800, | 175 | VMCS_LINK_POINTER = 0x00002800, |
@@ -416,6 +419,7 @@ enum vmcs_field { | |||
416 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) | 419 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) |
417 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) | 420 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) |
418 | 421 | ||
422 | #define VMX_VPID_INVVPID_BIT (1ull << 0) /* (32 - 32) */ | ||
419 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */ | 423 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */ |
420 | #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ | 424 | #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ |
421 | 425 | ||
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 10002a46c593..1ae89a2721d6 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -1,7 +1,6 @@ | |||
1 | #ifndef _ASM_X86_PLATFORM_H | 1 | #ifndef _ASM_X86_PLATFORM_H |
2 | #define _ASM_X86_PLATFORM_H | 2 | #define _ASM_X86_PLATFORM_H |
3 | 3 | ||
4 | #include <asm/pgtable_types.h> | ||
5 | #include <asm/bootparam.h> | 4 | #include <asm/bootparam.h> |
6 | 5 | ||
7 | struct mpc_bus; | 6 | struct mpc_bus; |
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index d866959e5685..8b2d4bea9962 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h | |||
@@ -57,4 +57,9 @@ static inline bool xen_x2apic_para_available(void) | |||
57 | } | 57 | } |
58 | #endif | 58 | #endif |
59 | 59 | ||
60 | #ifdef CONFIG_HOTPLUG_CPU | ||
61 | void xen_arch_register_cpu(int num); | ||
62 | void xen_arch_unregister_cpu(int num); | ||
63 | #endif | ||
64 | |||
60 | #endif /* _ASM_X86_XEN_HYPERVISOR_H */ | 65 | #endif /* _ASM_X86_XEN_HYPERVISOR_H */ |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 0679e11d2cf7..f5fb840b43e8 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -12,7 +12,7 @@ | |||
12 | #include <asm/pgtable.h> | 12 | #include <asm/pgtable.h> |
13 | 13 | ||
14 | #include <xen/interface/xen.h> | 14 | #include <xen/interface/xen.h> |
15 | #include <xen/grant_table.h> | 15 | #include <xen/interface/grant_table.h> |
16 | #include <xen/features.h> | 16 | #include <xen/features.h> |
17 | 17 | ||
18 | /* Xen machine address */ | 18 | /* Xen machine address */ |
@@ -43,6 +43,8 @@ extern unsigned long *xen_p2m_addr; | |||
43 | extern unsigned long xen_p2m_size; | 43 | extern unsigned long xen_p2m_size; |
44 | extern unsigned long xen_max_p2m_pfn; | 44 | extern unsigned long xen_max_p2m_pfn; |
45 | 45 | ||
46 | extern int xen_alloc_p2m_entry(unsigned long pfn); | ||
47 | |||
46 | extern unsigned long get_phys_to_machine(unsigned long pfn); | 48 | extern unsigned long get_phys_to_machine(unsigned long pfn); |
47 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 49 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
48 | extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 50 | extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
@@ -296,8 +298,8 @@ void make_lowmem_page_readwrite(void *vaddr); | |||
296 | #define xen_unmap(cookie) iounmap((cookie)) | 298 | #define xen_unmap(cookie) iounmap((cookie)) |
297 | 299 | ||
298 | static inline bool xen_arch_need_swiotlb(struct device *dev, | 300 | static inline bool xen_arch_need_swiotlb(struct device *dev, |
299 | unsigned long pfn, | 301 | phys_addr_t phys, |
300 | unsigned long bfn) | 302 | dma_addr_t dev_addr) |
301 | { | 303 | { |
302 | return false; | 304 | return false; |
303 | } | 305 | } |
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index f0412c50c47b..040d4083c24f 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h | |||
@@ -153,6 +153,12 @@ | |||
153 | /* MSR used to provide vcpu index */ | 153 | /* MSR used to provide vcpu index */ |
154 | #define HV_X64_MSR_VP_INDEX 0x40000002 | 154 | #define HV_X64_MSR_VP_INDEX 0x40000002 |
155 | 155 | ||
156 | /* MSR used to reset the guest OS. */ | ||
157 | #define HV_X64_MSR_RESET 0x40000003 | ||
158 | |||
159 | /* MSR used to provide vcpu runtime in 100ns units */ | ||
160 | #define HV_X64_MSR_VP_RUNTIME 0x40000010 | ||
161 | |||
156 | /* MSR used to read the per-partition time reference counter */ | 162 | /* MSR used to read the per-partition time reference counter */ |
157 | #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 | 163 | #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 |
158 | 164 | ||
@@ -251,4 +257,16 @@ typedef struct _HV_REFERENCE_TSC_PAGE { | |||
251 | __s64 tsc_offset; | 257 | __s64 tsc_offset; |
252 | } HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; | 258 | } HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; |
253 | 259 | ||
260 | /* Define the number of synthetic interrupt sources. */ | ||
261 | #define HV_SYNIC_SINT_COUNT (16) | ||
262 | /* Define the expected SynIC version. */ | ||
263 | #define HV_SYNIC_VERSION_1 (0x1) | ||
264 | |||
265 | #define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) | ||
266 | #define HV_SYNIC_SIMP_ENABLE (1ULL << 0) | ||
267 | #define HV_SYNIC_SIEFP_ENABLE (1ULL << 0) | ||
268 | #define HV_SYNIC_SINT_MASKED (1ULL << 16) | ||
269 | #define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) | ||
270 | #define HV_SYNIC_SINT_VECTOR_MASK (0xFF) | ||
271 | |||
254 | #endif | 272 | #endif |
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index b5d7640abc5d..8a4add8e4639 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h | |||
@@ -100,6 +100,7 @@ | |||
100 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ | 100 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ |
101 | { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ | 101 | { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ |
102 | { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ | 102 | { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ |
103 | { SVM_EXIT_EXCP_BASE + AC_VECTOR, "AC excp" }, \ | ||
103 | { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ | 104 | { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ |
104 | { SVM_EXIT_INTR, "interrupt" }, \ | 105 | { SVM_EXIT_INTR, "interrupt" }, \ |
105 | { SVM_EXIT_NMI, "nmi" }, \ | 106 | { SVM_EXIT_NMI, "nmi" }, \ |
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 37fee272618f..5b15d94a33f8 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
@@ -78,6 +78,7 @@ | |||
78 | #define EXIT_REASON_PML_FULL 62 | 78 | #define EXIT_REASON_PML_FULL 62 |
79 | #define EXIT_REASON_XSAVES 63 | 79 | #define EXIT_REASON_XSAVES 63 |
80 | #define EXIT_REASON_XRSTORS 64 | 80 | #define EXIT_REASON_XRSTORS 64 |
81 | #define EXIT_REASON_PCOMMIT 65 | ||
81 | 82 | ||
82 | #define VMX_EXIT_REASONS \ | 83 | #define VMX_EXIT_REASONS \ |
83 | { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ | 84 | { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ |
@@ -126,7 +127,8 @@ | |||
126 | { EXIT_REASON_INVVPID, "INVVPID" }, \ | 127 | { EXIT_REASON_INVVPID, "INVVPID" }, \ |
127 | { EXIT_REASON_INVPCID, "INVPCID" }, \ | 128 | { EXIT_REASON_INVPCID, "INVPCID" }, \ |
128 | { EXIT_REASON_XSAVES, "XSAVES" }, \ | 129 | { EXIT_REASON_XSAVES, "XSAVES" }, \ |
129 | { EXIT_REASON_XRSTORS, "XRSTORS" } | 130 | { EXIT_REASON_XRSTORS, "XRSTORS" }, \ |
131 | { EXIT_REASON_PCOMMIT, "PCOMMIT" } | ||
130 | 132 | ||
131 | #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 | 133 | #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 |
132 | #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 | 134 | #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ded848c20e05..e75907601a41 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -976,6 +976,8 @@ static int __init acpi_parse_madt_lapic_entries(void) | |||
976 | { | 976 | { |
977 | int count; | 977 | int count; |
978 | int x2count = 0; | 978 | int x2count = 0; |
979 | int ret; | ||
980 | struct acpi_subtable_proc madt_proc[2]; | ||
979 | 981 | ||
980 | if (!cpu_has_apic) | 982 | if (!cpu_has_apic) |
981 | return -ENODEV; | 983 | return -ENODEV; |
@@ -999,10 +1001,22 @@ static int __init acpi_parse_madt_lapic_entries(void) | |||
999 | acpi_parse_sapic, MAX_LOCAL_APIC); | 1001 | acpi_parse_sapic, MAX_LOCAL_APIC); |
1000 | 1002 | ||
1001 | if (!count) { | 1003 | if (!count) { |
1002 | x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, | 1004 | memset(madt_proc, 0, sizeof(madt_proc)); |
1003 | acpi_parse_x2apic, MAX_LOCAL_APIC); | 1005 | madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC; |
1004 | count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, | 1006 | madt_proc[0].handler = acpi_parse_lapic; |
1005 | acpi_parse_lapic, MAX_LOCAL_APIC); | 1007 | madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC; |
1008 | madt_proc[1].handler = acpi_parse_x2apic; | ||
1009 | ret = acpi_table_parse_entries_array(ACPI_SIG_MADT, | ||
1010 | sizeof(struct acpi_table_madt), | ||
1011 | madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC); | ||
1012 | if (ret < 0) { | ||
1013 | printk(KERN_ERR PREFIX | ||
1014 | "Error parsing LAPIC/X2APIC entries\n"); | ||
1015 | return ret; | ||
1016 | } | ||
1017 | |||
1018 | x2count = madt_proc[0].count; | ||
1019 | count = madt_proc[1].count; | ||
1006 | } | 1020 | } |
1007 | if (!count && !x2count) { | 1021 | if (!count && !x2count) { |
1008 | printk(KERN_ERR PREFIX "No LAPIC entries present\n"); | 1022 | printk(KERN_ERR PREFIX "No LAPIC entries present\n"); |
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 836d11b92811..861bc59c8f25 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c | |||
@@ -361,7 +361,11 @@ int __init arch_probe_nr_irqs(void) | |||
361 | if (nr < nr_irqs) | 361 | if (nr < nr_irqs) |
362 | nr_irqs = nr; | 362 | nr_irqs = nr; |
363 | 363 | ||
364 | return nr_legacy_irqs(); | 364 | /* |
365 | * We don't know if PIC is present at this point so we need to do | ||
366 | * probe() to get the right number of legacy IRQs. | ||
367 | */ | ||
368 | return legacy_pic->probe(); | ||
365 | } | 369 | } |
366 | 370 | ||
367 | #ifdef CONFIG_X86_IO_APIC | 371 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4a70fc6d400a..a8816b325162 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -352,6 +352,7 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) | |||
352 | #ifdef CONFIG_SMP | 352 | #ifdef CONFIG_SMP |
353 | unsigned bits; | 353 | unsigned bits; |
354 | int cpu = smp_processor_id(); | 354 | int cpu = smp_processor_id(); |
355 | unsigned int socket_id, core_complex_id; | ||
355 | 356 | ||
356 | bits = c->x86_coreid_bits; | 357 | bits = c->x86_coreid_bits; |
357 | /* Low order bits define the core id (index of core in socket) */ | 358 | /* Low order bits define the core id (index of core in socket) */ |
@@ -361,6 +362,18 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) | |||
361 | /* use socket ID also for last level cache */ | 362 | /* use socket ID also for last level cache */ |
362 | per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; | 363 | per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; |
363 | amd_get_topology(c); | 364 | amd_get_topology(c); |
365 | |||
366 | /* | ||
367 | * Fix percpu cpu_llc_id here as LLC topology is different | ||
368 | * for Fam17h systems. | ||
369 | */ | ||
370 | if (c->x86 != 0x17 || !cpuid_edx(0x80000006)) | ||
371 | return; | ||
372 | |||
373 | socket_id = (c->apicid >> bits) - 1; | ||
374 | core_complex_id = (c->apicid & ((1 << bits) - 1)) >> 3; | ||
375 | |||
376 | per_cpu(cpu_llc_id, cpu) = (socket_id << 3) | core_complex_id; | ||
364 | #endif | 377 | #endif |
365 | } | 378 | } |
366 | 379 | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4ddd780aeac9..c2b7522cbf35 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -273,10 +273,9 @@ __setup("nosmap", setup_disable_smap); | |||
273 | 273 | ||
274 | static __always_inline void setup_smap(struct cpuinfo_x86 *c) | 274 | static __always_inline void setup_smap(struct cpuinfo_x86 *c) |
275 | { | 275 | { |
276 | unsigned long eflags; | 276 | unsigned long eflags = native_save_fl(); |
277 | 277 | ||
278 | /* This should have been cleared long ago */ | 278 | /* This should have been cleared long ago */ |
279 | raw_local_save_flags(eflags); | ||
280 | BUG_ON(eflags & X86_EFLAGS_AC); | 279 | BUG_ON(eflags & X86_EFLAGS_AC); |
281 | 280 | ||
282 | if (cpu_has(c, X86_FEATURE_SMAP)) { | 281 | if (cpu_has(c, X86_FEATURE_SMAP)) { |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 98a13db5f4be..209ac1e7d1f0 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -97,6 +97,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) | |||
97 | switch (c->x86_model) { | 97 | switch (c->x86_model) { |
98 | case 0x27: /* Penwell */ | 98 | case 0x27: /* Penwell */ |
99 | case 0x35: /* Cloverview */ | 99 | case 0x35: /* Cloverview */ |
100 | case 0x4a: /* Merrifield */ | ||
100 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); | 101 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); |
101 | break; | 102 | break; |
102 | default: | 103 | default: |
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 7fc27f1cca58..b3e94ef461fd 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c | |||
@@ -698,3 +698,4 @@ int __init microcode_init(void) | |||
698 | return error; | 698 | return error; |
699 | 699 | ||
700 | } | 700 | } |
701 | late_initcall(microcode_init); | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4562cf070c27..2bf79d7c97df 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | 5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar |
6 | * Copyright (C) 2009 Jaswinder Singh Rajput | 6 | * Copyright (C) 2009 Jaswinder Singh Rajput |
7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | 7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter |
8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra |
9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | 9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> |
10 | * Copyright (C) 2009 Google, Inc., Stephane Eranian | 10 | * Copyright (C) 2009 Google, Inc., Stephane Eranian |
11 | * | 11 | * |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 499f533dd3cc..d0e35ebb2adb 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -5,7 +5,7 @@ | |||
5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | 5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar |
6 | * Copyright (C) 2009 Jaswinder Singh Rajput | 6 | * Copyright (C) 2009 Jaswinder Singh Rajput |
7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | 7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter |
8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra |
9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | 9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> |
10 | * Copyright (C) 2009 Google, Inc., Stephane Eranian | 10 | * Copyright (C) 2009 Google, Inc., Stephane Eranian |
11 | * | 11 | * |
@@ -387,7 +387,7 @@ struct cpu_hw_events { | |||
387 | /* Check flags and event code/umask, and set the HSW N/A flag */ | 387 | /* Check flags and event code/umask, and set the HSW N/A flag */ |
388 | #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \ | 388 | #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \ |
389 | __EVENT_CONSTRAINT(code, n, \ | 389 | __EVENT_CONSTRAINT(code, n, \ |
390 | INTEL_ARCH_EVENT_MASK|INTEL_ARCH_EVENT_MASK, \ | 390 | INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ |
391 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW) | 391 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW) |
392 | 392 | ||
393 | 393 | ||
@@ -627,6 +627,7 @@ struct x86_perf_task_context { | |||
627 | u64 lbr_from[MAX_LBR_ENTRIES]; | 627 | u64 lbr_from[MAX_LBR_ENTRIES]; |
628 | u64 lbr_to[MAX_LBR_ENTRIES]; | 628 | u64 lbr_to[MAX_LBR_ENTRIES]; |
629 | u64 lbr_info[MAX_LBR_ENTRIES]; | 629 | u64 lbr_info[MAX_LBR_ENTRIES]; |
630 | int tos; | ||
630 | int lbr_callstack_users; | 631 | int lbr_callstack_users; |
631 | int lbr_stack_state; | 632 | int lbr_stack_state; |
632 | }; | 633 | }; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index f63360be2238..e2a430021e46 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -232,7 +232,7 @@ static struct event_constraint intel_hsw_event_constraints[] = { | |||
232 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 232 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
233 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 233 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
234 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ | 234 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
235 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */ | 235 | INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ |
236 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ | 236 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ |
237 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ | 237 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ |
238 | /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ | 238 | /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index 377e8f8ed391..a316ca96f1b6 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c | |||
@@ -298,7 +298,7 @@ static bool __match_event(struct perf_event *a, struct perf_event *b) | |||
298 | static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event) | 298 | static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event) |
299 | { | 299 | { |
300 | if (event->attach_state & PERF_ATTACH_TASK) | 300 | if (event->attach_state & PERF_ATTACH_TASK) |
301 | return perf_cgroup_from_task(event->hw.target); | 301 | return perf_cgroup_from_task(event->hw.target, event->ctx); |
302 | 302 | ||
303 | return event->cgrp; | 303 | return event->cgrp; |
304 | } | 304 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index bfd0b717e944..659f01e165d5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -239,7 +239,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) | |||
239 | } | 239 | } |
240 | 240 | ||
241 | mask = x86_pmu.lbr_nr - 1; | 241 | mask = x86_pmu.lbr_nr - 1; |
242 | tos = intel_pmu_lbr_tos(); | 242 | tos = task_ctx->tos; |
243 | for (i = 0; i < tos; i++) { | 243 | for (i = 0; i < tos; i++) { |
244 | lbr_idx = (tos - i) & mask; | 244 | lbr_idx = (tos - i) & mask; |
245 | wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); | 245 | wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); |
@@ -247,6 +247,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) | |||
247 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) | 247 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) |
248 | wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); | 248 | wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); |
249 | } | 249 | } |
250 | wrmsrl(x86_pmu.lbr_tos, tos); | ||
250 | task_ctx->lbr_stack_state = LBR_NONE; | 251 | task_ctx->lbr_stack_state = LBR_NONE; |
251 | } | 252 | } |
252 | 253 | ||
@@ -270,6 +271,7 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) | |||
270 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) | 271 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) |
271 | rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); | 272 | rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); |
272 | } | 273 | } |
274 | task_ctx->tos = tos; | ||
273 | task_ctx->lbr_stack_state = LBR_VALID; | 275 | task_ctx->lbr_stack_state = LBR_VALID; |
274 | } | 276 | } |
275 | 277 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 81431c0f0614..ed446bdcbf31 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c | |||
@@ -107,12 +107,6 @@ static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ | |||
107 | static struct kobj_attribute format_attr_##_var = \ | 107 | static struct kobj_attribute format_attr_##_var = \ |
108 | __ATTR(_name, 0444, __rapl_##_var##_show, NULL) | 108 | __ATTR(_name, 0444, __rapl_##_var##_show, NULL) |
109 | 109 | ||
110 | #define RAPL_EVENT_DESC(_name, _config) \ | ||
111 | { \ | ||
112 | .attr = __ATTR(_name, 0444, rapl_event_show, NULL), \ | ||
113 | .config = _config, \ | ||
114 | } | ||
115 | |||
116 | #define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */ | 110 | #define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */ |
117 | 111 | ||
118 | #define RAPL_EVENT_ATTR_STR(_name, v, str) \ | 112 | #define RAPL_EVENT_ATTR_STR(_name, v, str) \ |
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c index f32ac13934f2..ec863b9a9f78 100644 --- a/arch/x86/kernel/cpu/perf_event_msr.c +++ b/arch/x86/kernel/cpu/perf_event_msr.c | |||
@@ -163,10 +163,9 @@ again: | |||
163 | goto again; | 163 | goto again; |
164 | 164 | ||
165 | delta = now - prev; | 165 | delta = now - prev; |
166 | if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) { | 166 | if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) |
167 | delta <<= 32; | 167 | delta = sign_extend64(delta, 31); |
168 | delta >>= 32; /* sign extend */ | 168 | |
169 | } | ||
170 | local64_add(now - prev, &event->count); | 169 | local64_add(now - prev, &event->count); |
171 | } | 170 | } |
172 | 171 | ||
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index ef29b742cea7..31c6a60505e6 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c | |||
@@ -385,20 +385,19 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, | |||
385 | */ | 385 | */ |
386 | void fpu__init_prepare_fx_sw_frame(void) | 386 | void fpu__init_prepare_fx_sw_frame(void) |
387 | { | 387 | { |
388 | int fsave_header_size = sizeof(struct fregs_state); | ||
389 | int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; | 388 | int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; |
390 | 389 | ||
391 | if (config_enabled(CONFIG_X86_32)) | ||
392 | size += fsave_header_size; | ||
393 | |||
394 | fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; | 390 | fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; |
395 | fx_sw_reserved.extended_size = size; | 391 | fx_sw_reserved.extended_size = size; |
396 | fx_sw_reserved.xfeatures = xfeatures_mask; | 392 | fx_sw_reserved.xfeatures = xfeatures_mask; |
397 | fx_sw_reserved.xstate_size = xstate_size; | 393 | fx_sw_reserved.xstate_size = xstate_size; |
398 | 394 | ||
399 | if (config_enabled(CONFIG_IA32_EMULATION)) { | 395 | if (config_enabled(CONFIG_IA32_EMULATION) || |
396 | config_enabled(CONFIG_X86_32)) { | ||
397 | int fsave_header_size = sizeof(struct fregs_state); | ||
398 | |||
400 | fx_sw_reserved_ia32 = fx_sw_reserved; | 399 | fx_sw_reserved_ia32 = fx_sw_reserved; |
401 | fx_sw_reserved_ia32.extended_size += fsave_header_size; | 400 | fx_sw_reserved_ia32.extended_size = size + fsave_header_size; |
402 | } | 401 | } |
403 | } | 402 | } |
404 | 403 | ||
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 6454f2731b56..70fc312221fc 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c | |||
@@ -694,7 +694,6 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) | |||
694 | if (!boot_cpu_has(X86_FEATURE_XSAVE)) | 694 | if (!boot_cpu_has(X86_FEATURE_XSAVE)) |
695 | return NULL; | 695 | return NULL; |
696 | 696 | ||
697 | xsave = ¤t->thread.fpu.state.xsave; | ||
698 | /* | 697 | /* |
699 | * We should not ever be requesting features that we | 698 | * We should not ever be requesting features that we |
700 | * have not enabled. Remember that pcntxt_mask is | 699 | * have not enabled. Remember that pcntxt_mask is |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8b7b0a51e742..311bcf338f07 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -556,6 +556,7 @@ void ftrace_replace_code(int enable) | |||
556 | run_sync(); | 556 | run_sync(); |
557 | 557 | ||
558 | report = "updating code"; | 558 | report = "updating code"; |
559 | count = 0; | ||
559 | 560 | ||
560 | for_ftrace_rec_iter(iter) { | 561 | for_ftrace_rec_iter(iter) { |
561 | rec = ftrace_rec_iter_record(iter); | 562 | rec = ftrace_rec_iter_record(iter); |
@@ -563,11 +564,13 @@ void ftrace_replace_code(int enable) | |||
563 | ret = add_update(rec, enable); | 564 | ret = add_update(rec, enable); |
564 | if (ret) | 565 | if (ret) |
565 | goto remove_breakpoints; | 566 | goto remove_breakpoints; |
567 | count++; | ||
566 | } | 568 | } |
567 | 569 | ||
568 | run_sync(); | 570 | run_sync(); |
569 | 571 | ||
570 | report = "removing breakpoints"; | 572 | report = "removing breakpoints"; |
573 | count = 0; | ||
571 | 574 | ||
572 | for_ftrace_rec_iter(iter) { | 575 | for_ftrace_rec_iter(iter) { |
573 | rec = ftrace_rec_iter_record(iter); | 576 | rec = ftrace_rec_iter_record(iter); |
@@ -575,6 +578,7 @@ void ftrace_replace_code(int enable) | |||
575 | ret = finish_update(rec, enable); | 578 | ret = finish_update(rec, enable); |
576 | if (ret) | 579 | if (ret) |
577 | goto remove_breakpoints; | 580 | goto remove_breakpoints; |
581 | count++; | ||
578 | } | 582 | } |
579 | 583 | ||
580 | run_sync(); | 584 | run_sync(); |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 1d40ca8a73f2..ffdc0e860390 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -65,6 +65,9 @@ startup_64: | |||
65 | * tables and then reload them. | 65 | * tables and then reload them. |
66 | */ | 66 | */ |
67 | 67 | ||
68 | /* Sanitize CPU configuration */ | ||
69 | call verify_cpu | ||
70 | |||
68 | /* | 71 | /* |
69 | * Compute the delta between the address I am compiled to run at and the | 72 | * Compute the delta between the address I am compiled to run at and the |
70 | * address I am actually running at. | 73 | * address I am actually running at. |
@@ -174,6 +177,9 @@ ENTRY(secondary_startup_64) | |||
174 | * after the boot processor executes this code. | 177 | * after the boot processor executes this code. |
175 | */ | 178 | */ |
176 | 179 | ||
180 | /* Sanitize CPU configuration */ | ||
181 | call verify_cpu | ||
182 | |||
177 | movq $(init_level4_pgt - __START_KERNEL_map), %rax | 183 | movq $(init_level4_pgt - __START_KERNEL_map), %rax |
178 | 1: | 184 | 1: |
179 | 185 | ||
@@ -288,6 +294,8 @@ ENTRY(secondary_startup_64) | |||
288 | pushq %rax # target address in negative space | 294 | pushq %rax # target address in negative space |
289 | lretq | 295 | lretq |
290 | 296 | ||
297 | #include "verify_cpu.S" | ||
298 | |||
291 | #ifdef CONFIG_HOTPLUG_CPU | 299 | #ifdef CONFIG_HOTPLUG_CPU |
292 | /* | 300 | /* |
293 | * Boot CPU0 entry point. It's called from play_dead(). Everything has been set | 301 | * Boot CPU0 entry point. It's called from play_dead(). Everything has been set |
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 16cb827a5b27..be22f5a2192e 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
@@ -295,16 +295,11 @@ static void unmask_8259A(void) | |||
295 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); | 295 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
296 | } | 296 | } |
297 | 297 | ||
298 | static void init_8259A(int auto_eoi) | 298 | static int probe_8259A(void) |
299 | { | 299 | { |
300 | unsigned long flags; | 300 | unsigned long flags; |
301 | unsigned char probe_val = ~(1 << PIC_CASCADE_IR); | 301 | unsigned char probe_val = ~(1 << PIC_CASCADE_IR); |
302 | unsigned char new_val; | 302 | unsigned char new_val; |
303 | |||
304 | i8259A_auto_eoi = auto_eoi; | ||
305 | |||
306 | raw_spin_lock_irqsave(&i8259A_lock, flags); | ||
307 | |||
308 | /* | 303 | /* |
309 | * Check to see if we have a PIC. | 304 | * Check to see if we have a PIC. |
310 | * Mask all except the cascade and read | 305 | * Mask all except the cascade and read |
@@ -312,16 +307,28 @@ static void init_8259A(int auto_eoi) | |||
312 | * have a PIC, we will read 0xff as opposed to the | 307 | * have a PIC, we will read 0xff as opposed to the |
313 | * value we wrote. | 308 | * value we wrote. |
314 | */ | 309 | */ |
310 | raw_spin_lock_irqsave(&i8259A_lock, flags); | ||
311 | |||
315 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ | 312 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ |
316 | outb(probe_val, PIC_MASTER_IMR); | 313 | outb(probe_val, PIC_MASTER_IMR); |
317 | new_val = inb(PIC_MASTER_IMR); | 314 | new_val = inb(PIC_MASTER_IMR); |
318 | if (new_val != probe_val) { | 315 | if (new_val != probe_val) { |
319 | printk(KERN_INFO "Using NULL legacy PIC\n"); | 316 | printk(KERN_INFO "Using NULL legacy PIC\n"); |
320 | legacy_pic = &null_legacy_pic; | 317 | legacy_pic = &null_legacy_pic; |
321 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); | ||
322 | return; | ||
323 | } | 318 | } |
324 | 319 | ||
320 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); | ||
321 | return nr_legacy_irqs(); | ||
322 | } | ||
323 | |||
324 | static void init_8259A(int auto_eoi) | ||
325 | { | ||
326 | unsigned long flags; | ||
327 | |||
328 | i8259A_auto_eoi = auto_eoi; | ||
329 | |||
330 | raw_spin_lock_irqsave(&i8259A_lock, flags); | ||
331 | |||
325 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ | 332 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ |
326 | 333 | ||
327 | /* | 334 | /* |
@@ -379,6 +386,10 @@ static int legacy_pic_irq_pending_noop(unsigned int irq) | |||
379 | { | 386 | { |
380 | return 0; | 387 | return 0; |
381 | } | 388 | } |
389 | static int legacy_pic_probe(void) | ||
390 | { | ||
391 | return 0; | ||
392 | } | ||
382 | 393 | ||
383 | struct legacy_pic null_legacy_pic = { | 394 | struct legacy_pic null_legacy_pic = { |
384 | .nr_legacy_irqs = 0, | 395 | .nr_legacy_irqs = 0, |
@@ -388,6 +399,7 @@ struct legacy_pic null_legacy_pic = { | |||
388 | .mask_all = legacy_pic_noop, | 399 | .mask_all = legacy_pic_noop, |
389 | .restore_mask = legacy_pic_noop, | 400 | .restore_mask = legacy_pic_noop, |
390 | .init = legacy_pic_int_noop, | 401 | .init = legacy_pic_int_noop, |
402 | .probe = legacy_pic_probe, | ||
391 | .irq_pending = legacy_pic_irq_pending_noop, | 403 | .irq_pending = legacy_pic_irq_pending_noop, |
392 | .make_irq = legacy_pic_uint_noop, | 404 | .make_irq = legacy_pic_uint_noop, |
393 | }; | 405 | }; |
@@ -400,6 +412,7 @@ struct legacy_pic default_legacy_pic = { | |||
400 | .mask_all = mask_8259A, | 412 | .mask_all = mask_8259A, |
401 | .restore_mask = unmask_8259A, | 413 | .restore_mask = unmask_8259A, |
402 | .init = init_8259A, | 414 | .init = init_8259A, |
415 | .probe = probe_8259A, | ||
403 | .irq_pending = i8259A_irq_pending, | 416 | .irq_pending = i8259A_irq_pending, |
404 | .make_irq = make_8259A_irq, | 417 | .make_irq = make_8259A_irq, |
405 | }; | 418 | }; |
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index dc5fa6a1e8d6..3512ba607361 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * x86 specific code for irq_work | 2 | * x86 specific code for irq_work |
3 | * | 3 | * |
4 | * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 4 | * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/kernel.h> | 7 | #include <linux/kernel.h> |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 2c7aafa70702..2bd81e302427 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -32,6 +32,7 @@ | |||
32 | static int kvmclock = 1; | 32 | static int kvmclock = 1; |
33 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | 33 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; |
34 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; | 34 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; |
35 | static cycle_t kvm_sched_clock_offset; | ||
35 | 36 | ||
36 | static int parse_no_kvmclock(char *arg) | 37 | static int parse_no_kvmclock(char *arg) |
37 | { | 38 | { |
@@ -92,6 +93,29 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs) | |||
92 | return kvm_clock_read(); | 93 | return kvm_clock_read(); |
93 | } | 94 | } |
94 | 95 | ||
96 | static cycle_t kvm_sched_clock_read(void) | ||
97 | { | ||
98 | return kvm_clock_read() - kvm_sched_clock_offset; | ||
99 | } | ||
100 | |||
101 | static inline void kvm_sched_clock_init(bool stable) | ||
102 | { | ||
103 | if (!stable) { | ||
104 | pv_time_ops.sched_clock = kvm_clock_read; | ||
105 | return; | ||
106 | } | ||
107 | |||
108 | kvm_sched_clock_offset = kvm_clock_read(); | ||
109 | pv_time_ops.sched_clock = kvm_sched_clock_read; | ||
110 | set_sched_clock_stable(); | ||
111 | |||
112 | printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n", | ||
113 | kvm_sched_clock_offset); | ||
114 | |||
115 | BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) > | ||
116 | sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time)); | ||
117 | } | ||
118 | |||
95 | /* | 119 | /* |
96 | * If we don't do that, there is the possibility that the guest | 120 | * If we don't do that, there is the possibility that the guest |
97 | * will calibrate under heavy load - thus, getting a lower lpj - | 121 | * will calibrate under heavy load - thus, getting a lower lpj - |
@@ -248,7 +272,17 @@ void __init kvmclock_init(void) | |||
248 | memblock_free(mem, size); | 272 | memblock_free(mem, size); |
249 | return; | 273 | return; |
250 | } | 274 | } |
251 | pv_time_ops.sched_clock = kvm_clock_read; | 275 | |
276 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) | ||
277 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); | ||
278 | |||
279 | cpu = get_cpu(); | ||
280 | vcpu_time = &hv_clock[cpu].pvti; | ||
281 | flags = pvclock_read_flags(vcpu_time); | ||
282 | |||
283 | kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT); | ||
284 | put_cpu(); | ||
285 | |||
252 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | 286 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; |
253 | x86_platform.get_wallclock = kvm_get_wallclock; | 287 | x86_platform.get_wallclock = kvm_get_wallclock; |
254 | x86_platform.set_wallclock = kvm_set_wallclock; | 288 | x86_platform.set_wallclock = kvm_set_wallclock; |
@@ -265,16 +299,6 @@ void __init kvmclock_init(void) | |||
265 | kvm_get_preset_lpj(); | 299 | kvm_get_preset_lpj(); |
266 | clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); | 300 | clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); |
267 | pv_info.name = "KVM"; | 301 | pv_info.name = "KVM"; |
268 | |||
269 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) | ||
270 | pvclock_set_flags(~0); | ||
271 | |||
272 | cpu = get_cpu(); | ||
273 | vcpu_time = &hv_clock[cpu].pvti; | ||
274 | flags = pvclock_read_flags(vcpu_time); | ||
275 | if (flags & PVCLOCK_COUNTS_FROM_ZERO) | ||
276 | set_sched_clock_stable(); | ||
277 | put_cpu(); | ||
278 | } | 302 | } |
279 | 303 | ||
280 | int __init kvm_setup_vsyscall_timeinfo(void) | 304 | int __init kvm_setup_vsyscall_timeinfo(void) |
diff --git a/arch/x86/kernel/livepatch.c b/arch/x86/kernel/livepatch.c index ff3c3101d003..d1d35ccffed3 100644 --- a/arch/x86/kernel/livepatch.c +++ b/arch/x86/kernel/livepatch.c | |||
@@ -42,7 +42,6 @@ int klp_write_module_reloc(struct module *mod, unsigned long type, | |||
42 | bool readonly; | 42 | bool readonly; |
43 | unsigned long val; | 43 | unsigned long val; |
44 | unsigned long core = (unsigned long)mod->module_core; | 44 | unsigned long core = (unsigned long)mod->module_core; |
45 | unsigned long core_ro_size = mod->core_ro_size; | ||
46 | unsigned long core_size = mod->core_size; | 45 | unsigned long core_size = mod->core_size; |
47 | 46 | ||
48 | switch (type) { | 47 | switch (type) { |
@@ -70,10 +69,12 @@ int klp_write_module_reloc(struct module *mod, unsigned long type, | |||
70 | /* loc does not point to any symbol inside the module */ | 69 | /* loc does not point to any symbol inside the module */ |
71 | return -EINVAL; | 70 | return -EINVAL; |
72 | 71 | ||
73 | if (loc < core + core_ro_size) | 72 | readonly = false; |
73 | |||
74 | #ifdef CONFIG_DEBUG_SET_MODULE_RONX | ||
75 | if (loc < core + mod->core_ro_size) | ||
74 | readonly = true; | 76 | readonly = true; |
75 | else | 77 | #endif |
76 | readonly = false; | ||
77 | 78 | ||
78 | /* determine if the relocation spans a page boundary */ | 79 | /* determine if the relocation spans a page boundary */ |
79 | numpages = ((loc & PAGE_MASK) == ((loc + size) & PAGE_MASK)) ? 1 : 2; | 80 | numpages = ((loc & PAGE_MASK) == ((loc + size) & PAGE_MASK)) ? 1 : 2; |
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 94ea120fa21f..87e1762e2bca 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S | |||
@@ -278,6 +278,12 @@ trace: | |||
278 | /* save_mcount_regs fills in first two parameters */ | 278 | /* save_mcount_regs fills in first two parameters */ |
279 | save_mcount_regs | 279 | save_mcount_regs |
280 | 280 | ||
281 | /* | ||
282 | * When DYNAMIC_FTRACE is not defined, ARCH_SUPPORTS_FTRACE_OPS is not | ||
283 | * set (see include/asm/ftrace.h and include/linux/ftrace.h). Only the | ||
284 | * ip and parent ip are used and the list function is called when | ||
285 | * function tracing is enabled. | ||
286 | */ | ||
281 | call *ftrace_trace_function | 287 | call *ftrace_trace_function |
282 | 288 | ||
283 | restore_mcount_regs | 289 | restore_mcount_regs |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index cd99433b8ba1..6ba014c61d62 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -90,7 +90,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, | |||
90 | again: | 90 | again: |
91 | page = NULL; | 91 | page = NULL; |
92 | /* CMA can be used only in the context which permits sleeping */ | 92 | /* CMA can be used only in the context which permits sleeping */ |
93 | if (flag & __GFP_WAIT) { | 93 | if (gfpflags_allow_blocking(flag)) { |
94 | page = dma_alloc_from_contiguous(dev, count, get_order(size)); | 94 | page = dma_alloc_from_contiguous(dev, count, get_order(size)); |
95 | if (page && page_to_phys(page) + size > dma_mask) { | 95 | if (page && page_to_phys(page) + size > dma_mask) { |
96 | dma_release_from_contiguous(dev, page, count); | 96 | dma_release_from_contiguous(dev, page, count); |
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c index 4f00b63d7ff3..14415aff1813 100644 --- a/arch/x86/kernel/pmem.c +++ b/arch/x86/kernel/pmem.c | |||
@@ -4,10 +4,22 @@ | |||
4 | */ | 4 | */ |
5 | #include <linux/platform_device.h> | 5 | #include <linux/platform_device.h> |
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <linux/ioport.h> | ||
8 | |||
9 | static int found(u64 start, u64 end, void *data) | ||
10 | { | ||
11 | return 1; | ||
12 | } | ||
7 | 13 | ||
8 | static __init int register_e820_pmem(void) | 14 | static __init int register_e820_pmem(void) |
9 | { | 15 | { |
16 | char *pmem = "Persistent Memory (legacy)"; | ||
10 | struct platform_device *pdev; | 17 | struct platform_device *pdev; |
18 | int rc; | ||
19 | |||
20 | rc = walk_iomem_res(pmem, IORESOURCE_MEM, 0, -1, NULL, found); | ||
21 | if (rc <= 0) | ||
22 | return 0; | ||
11 | 23 | ||
12 | /* | 24 | /* |
13 | * See drivers/nvdimm/e820.c for the implementation, this is | 25 | * See drivers/nvdimm/e820.c for the implementation, this is |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a1e4da98c8f0..d2bbe343fda7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -1188,7 +1188,7 @@ void __init setup_arch(char **cmdline_p) | |||
1188 | */ | 1188 | */ |
1189 | clone_pgd_range(initial_page_table, | 1189 | clone_pgd_range(initial_page_table, |
1190 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, | 1190 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, |
1191 | KERNEL_PGD_PTRS); | 1191 | min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); |
1192 | #endif | 1192 | #endif |
1193 | 1193 | ||
1194 | tboot_probe(); | 1194 | tboot_probe(); |
@@ -1250,8 +1250,6 @@ void __init setup_arch(char **cmdline_p) | |||
1250 | if (efi_enabled(EFI_BOOT)) | 1250 | if (efi_enabled(EFI_BOOT)) |
1251 | efi_apply_memmap_quirks(); | 1251 | efi_apply_memmap_quirks(); |
1252 | #endif | 1252 | #endif |
1253 | |||
1254 | microcode_init(); | ||
1255 | } | 1253 | } |
1256 | 1254 | ||
1257 | #ifdef CONFIG_X86_32 | 1255 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b7ffb7c00075..cb6282c3638f 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -690,12 +690,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) | |||
690 | signal_setup_done(failed, ksig, stepping); | 690 | signal_setup_done(failed, ksig, stepping); |
691 | } | 691 | } |
692 | 692 | ||
693 | #ifdef CONFIG_X86_32 | 693 | static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) |
694 | #define NR_restart_syscall __NR_restart_syscall | 694 | { |
695 | #else /* !CONFIG_X86_32 */ | 695 | #if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64) |
696 | #define NR_restart_syscall \ | 696 | return __NR_restart_syscall; |
697 | test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall | 697 | #else /* !CONFIG_X86_32 && CONFIG_X86_64 */ |
698 | #endif /* CONFIG_X86_32 */ | 698 | return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : |
699 | __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT); | ||
700 | #endif /* CONFIG_X86_32 || !CONFIG_X86_64 */ | ||
701 | } | ||
699 | 702 | ||
700 | /* | 703 | /* |
701 | * Note that 'init' is a special process: it doesn't get signals it doesn't | 704 | * Note that 'init' is a special process: it doesn't get signals it doesn't |
@@ -724,7 +727,7 @@ void do_signal(struct pt_regs *regs) | |||
724 | break; | 727 | break; |
725 | 728 | ||
726 | case -ERESTART_RESTARTBLOCK: | 729 | case -ERESTART_RESTARTBLOCK: |
727 | regs->ax = NR_restart_syscall; | 730 | regs->ax = get_nr_restart_syscall(regs); |
728 | regs->ip -= 2; | 731 | regs->ip -= 2; |
729 | break; | 732 | break; |
730 | } | 733 | } |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4df777710ab7..f2281e9cfdbe 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -509,7 +509,7 @@ void __inquire_remote_apic(int apicid) | |||
509 | */ | 509 | */ |
510 | #define UDELAY_10MS_DEFAULT 10000 | 510 | #define UDELAY_10MS_DEFAULT 10000 |
511 | 511 | ||
512 | static unsigned int init_udelay = INT_MAX; | 512 | static unsigned int init_udelay = UINT_MAX; |
513 | 513 | ||
514 | static int __init cpu_init_udelay(char *str) | 514 | static int __init cpu_init_udelay(char *str) |
515 | { | 515 | { |
@@ -522,14 +522,15 @@ early_param("cpu_init_udelay", cpu_init_udelay); | |||
522 | static void __init smp_quirk_init_udelay(void) | 522 | static void __init smp_quirk_init_udelay(void) |
523 | { | 523 | { |
524 | /* if cmdline changed it from default, leave it alone */ | 524 | /* if cmdline changed it from default, leave it alone */ |
525 | if (init_udelay != INT_MAX) | 525 | if (init_udelay != UINT_MAX) |
526 | return; | 526 | return; |
527 | 527 | ||
528 | /* if modern processor, use no delay */ | 528 | /* if modern processor, use no delay */ |
529 | if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) || | 529 | if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) || |
530 | ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) | 530 | ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) { |
531 | init_udelay = 0; | 531 | init_udelay = 0; |
532 | 532 | return; | |
533 | } | ||
533 | /* else, use legacy delay */ | 534 | /* else, use legacy delay */ |
534 | init_udelay = UDELAY_10MS_DEFAULT; | 535 | init_udelay = UDELAY_10MS_DEFAULT; |
535 | } | 536 | } |
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index b9242bacbe59..4cf401f581e7 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S | |||
@@ -34,10 +34,11 @@ | |||
34 | #include <asm/msr-index.h> | 34 | #include <asm/msr-index.h> |
35 | 35 | ||
36 | verify_cpu: | 36 | verify_cpu: |
37 | pushfl # Save caller passed flags | 37 | pushf # Save caller passed flags |
38 | pushl $0 # Kill any dangerous flags | 38 | push $0 # Kill any dangerous flags |
39 | popfl | 39 | popf |
40 | 40 | ||
41 | #ifndef __x86_64__ | ||
41 | pushfl # standard way to check for cpuid | 42 | pushfl # standard way to check for cpuid |
42 | popl %eax | 43 | popl %eax |
43 | movl %eax,%ebx | 44 | movl %eax,%ebx |
@@ -48,6 +49,7 @@ verify_cpu: | |||
48 | popl %eax | 49 | popl %eax |
49 | cmpl %eax,%ebx | 50 | cmpl %eax,%ebx |
50 | jz verify_cpu_no_longmode # cpu has no cpuid | 51 | jz verify_cpu_no_longmode # cpu has no cpuid |
52 | #endif | ||
51 | 53 | ||
52 | movl $0x0,%eax # See if cpuid 1 is implemented | 54 | movl $0x0,%eax # See if cpuid 1 is implemented |
53 | cpuid | 55 | cpuid |
@@ -130,10 +132,10 @@ verify_cpu_sse_test: | |||
130 | jmp verify_cpu_sse_test # try again | 132 | jmp verify_cpu_sse_test # try again |
131 | 133 | ||
132 | verify_cpu_no_longmode: | 134 | verify_cpu_no_longmode: |
133 | popfl # Restore caller passed flags | 135 | popf # Restore caller passed flags |
134 | movl $1,%eax | 136 | movl $1,%eax |
135 | ret | 137 | ret |
136 | verify_cpu_sse_ok: | 138 | verify_cpu_sse_ok: |
137 | popfl # Restore caller passed flags | 139 | popf # Restore caller passed flags |
138 | xorl %eax, %eax | 140 | xorl %eax, %eax |
139 | ret | 141 | ret |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index d8a1d56276e1..639a6e34500c 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -28,6 +28,8 @@ config KVM | |||
28 | select ANON_INODES | 28 | select ANON_INODES |
29 | select HAVE_KVM_IRQCHIP | 29 | select HAVE_KVM_IRQCHIP |
30 | select HAVE_KVM_IRQFD | 30 | select HAVE_KVM_IRQFD |
31 | select IRQ_BYPASS_MANAGER | ||
32 | select HAVE_KVM_IRQ_BYPASS | ||
31 | select HAVE_KVM_IRQ_ROUTING | 33 | select HAVE_KVM_IRQ_ROUTING |
32 | select HAVE_KVM_EVENTFD | 34 | select HAVE_KVM_EVENTFD |
33 | select KVM_APIC_ARCHITECTURE | 35 | select KVM_APIC_ARCHITECTURE |
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c index d090ecf08809..9dc091acd5fb 100644 --- a/arch/x86/kvm/assigned-dev.c +++ b/arch/x86/kvm/assigned-dev.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include "irq.h" | 22 | #include "irq.h" |
23 | #include "assigned-dev.h" | 23 | #include "assigned-dev.h" |
24 | #include "trace/events/kvm.h" | ||
24 | 25 | ||
25 | struct kvm_assigned_dev_kernel { | 26 | struct kvm_assigned_dev_kernel { |
26 | struct kvm_irq_ack_notifier ack_notifier; | 27 | struct kvm_irq_ack_notifier ack_notifier; |
@@ -131,7 +132,42 @@ static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) | |||
131 | return IRQ_HANDLED; | 132 | return IRQ_HANDLED; |
132 | } | 133 | } |
133 | 134 | ||
134 | #ifdef __KVM_HAVE_MSI | 135 | /* |
136 | * Deliver an IRQ in an atomic context if we can, or return a failure, | ||
137 | * user can retry in a process context. | ||
138 | * Return value: | ||
139 | * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. | ||
140 | * Other values - No need to retry. | ||
141 | */ | ||
142 | static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, | ||
143 | int level) | ||
144 | { | ||
145 | struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; | ||
146 | struct kvm_kernel_irq_routing_entry *e; | ||
147 | int ret = -EINVAL; | ||
148 | int idx; | ||
149 | |||
150 | trace_kvm_set_irq(irq, level, irq_source_id); | ||
151 | |||
152 | /* | ||
153 | * Injection into either PIC or IOAPIC might need to scan all CPUs, | ||
154 | * which would need to be retried from thread context; when same GSI | ||
155 | * is connected to both PIC and IOAPIC, we'd have to report a | ||
156 | * partial failure here. | ||
157 | * Since there's no easy way to do this, we only support injecting MSI | ||
158 | * which is limited to 1:1 GSI mapping. | ||
159 | */ | ||
160 | idx = srcu_read_lock(&kvm->irq_srcu); | ||
161 | if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { | ||
162 | e = &entries[0]; | ||
163 | ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id, | ||
164 | irq, level); | ||
165 | } | ||
166 | srcu_read_unlock(&kvm->irq_srcu, idx); | ||
167 | return ret; | ||
168 | } | ||
169 | |||
170 | |||
135 | static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) | 171 | static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) |
136 | { | 172 | { |
137 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | 173 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; |
@@ -150,9 +186,7 @@ static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) | |||
150 | 186 | ||
151 | return IRQ_HANDLED; | 187 | return IRQ_HANDLED; |
152 | } | 188 | } |
153 | #endif | ||
154 | 189 | ||
155 | #ifdef __KVM_HAVE_MSIX | ||
156 | static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) | 190 | static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) |
157 | { | 191 | { |
158 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | 192 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; |
@@ -183,7 +217,6 @@ static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) | |||
183 | 217 | ||
184 | return IRQ_HANDLED; | 218 | return IRQ_HANDLED; |
185 | } | 219 | } |
186 | #endif | ||
187 | 220 | ||
188 | /* Ack the irq line for an assigned device */ | 221 | /* Ack the irq line for an assigned device */ |
189 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | 222 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) |
@@ -386,7 +419,6 @@ static int assigned_device_enable_host_intx(struct kvm *kvm, | |||
386 | return 0; | 419 | return 0; |
387 | } | 420 | } |
388 | 421 | ||
389 | #ifdef __KVM_HAVE_MSI | ||
390 | static int assigned_device_enable_host_msi(struct kvm *kvm, | 422 | static int assigned_device_enable_host_msi(struct kvm *kvm, |
391 | struct kvm_assigned_dev_kernel *dev) | 423 | struct kvm_assigned_dev_kernel *dev) |
392 | { | 424 | { |
@@ -408,9 +440,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, | |||
408 | 440 | ||
409 | return 0; | 441 | return 0; |
410 | } | 442 | } |
411 | #endif | ||
412 | 443 | ||
413 | #ifdef __KVM_HAVE_MSIX | ||
414 | static int assigned_device_enable_host_msix(struct kvm *kvm, | 444 | static int assigned_device_enable_host_msix(struct kvm *kvm, |
415 | struct kvm_assigned_dev_kernel *dev) | 445 | struct kvm_assigned_dev_kernel *dev) |
416 | { | 446 | { |
@@ -443,8 +473,6 @@ err: | |||
443 | return r; | 473 | return r; |
444 | } | 474 | } |
445 | 475 | ||
446 | #endif | ||
447 | |||
448 | static int assigned_device_enable_guest_intx(struct kvm *kvm, | 476 | static int assigned_device_enable_guest_intx(struct kvm *kvm, |
449 | struct kvm_assigned_dev_kernel *dev, | 477 | struct kvm_assigned_dev_kernel *dev, |
450 | struct kvm_assigned_irq *irq) | 478 | struct kvm_assigned_irq *irq) |
@@ -454,7 +482,6 @@ static int assigned_device_enable_guest_intx(struct kvm *kvm, | |||
454 | return 0; | 482 | return 0; |
455 | } | 483 | } |
456 | 484 | ||
457 | #ifdef __KVM_HAVE_MSI | ||
458 | static int assigned_device_enable_guest_msi(struct kvm *kvm, | 485 | static int assigned_device_enable_guest_msi(struct kvm *kvm, |
459 | struct kvm_assigned_dev_kernel *dev, | 486 | struct kvm_assigned_dev_kernel *dev, |
460 | struct kvm_assigned_irq *irq) | 487 | struct kvm_assigned_irq *irq) |
@@ -463,9 +490,7 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm, | |||
463 | dev->ack_notifier.gsi = -1; | 490 | dev->ack_notifier.gsi = -1; |
464 | return 0; | 491 | return 0; |
465 | } | 492 | } |
466 | #endif | ||
467 | 493 | ||
468 | #ifdef __KVM_HAVE_MSIX | ||
469 | static int assigned_device_enable_guest_msix(struct kvm *kvm, | 494 | static int assigned_device_enable_guest_msix(struct kvm *kvm, |
470 | struct kvm_assigned_dev_kernel *dev, | 495 | struct kvm_assigned_dev_kernel *dev, |
471 | struct kvm_assigned_irq *irq) | 496 | struct kvm_assigned_irq *irq) |
@@ -474,7 +499,6 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm, | |||
474 | dev->ack_notifier.gsi = -1; | 499 | dev->ack_notifier.gsi = -1; |
475 | return 0; | 500 | return 0; |
476 | } | 501 | } |
477 | #endif | ||
478 | 502 | ||
479 | static int assign_host_irq(struct kvm *kvm, | 503 | static int assign_host_irq(struct kvm *kvm, |
480 | struct kvm_assigned_dev_kernel *dev, | 504 | struct kvm_assigned_dev_kernel *dev, |
@@ -492,16 +516,12 @@ static int assign_host_irq(struct kvm *kvm, | |||
492 | case KVM_DEV_IRQ_HOST_INTX: | 516 | case KVM_DEV_IRQ_HOST_INTX: |
493 | r = assigned_device_enable_host_intx(kvm, dev); | 517 | r = assigned_device_enable_host_intx(kvm, dev); |
494 | break; | 518 | break; |
495 | #ifdef __KVM_HAVE_MSI | ||
496 | case KVM_DEV_IRQ_HOST_MSI: | 519 | case KVM_DEV_IRQ_HOST_MSI: |
497 | r = assigned_device_enable_host_msi(kvm, dev); | 520 | r = assigned_device_enable_host_msi(kvm, dev); |
498 | break; | 521 | break; |
499 | #endif | ||
500 | #ifdef __KVM_HAVE_MSIX | ||
501 | case KVM_DEV_IRQ_HOST_MSIX: | 522 | case KVM_DEV_IRQ_HOST_MSIX: |
502 | r = assigned_device_enable_host_msix(kvm, dev); | 523 | r = assigned_device_enable_host_msix(kvm, dev); |
503 | break; | 524 | break; |
504 | #endif | ||
505 | default: | 525 | default: |
506 | r = -EINVAL; | 526 | r = -EINVAL; |
507 | } | 527 | } |
@@ -534,16 +554,12 @@ static int assign_guest_irq(struct kvm *kvm, | |||
534 | case KVM_DEV_IRQ_GUEST_INTX: | 554 | case KVM_DEV_IRQ_GUEST_INTX: |
535 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | 555 | r = assigned_device_enable_guest_intx(kvm, dev, irq); |
536 | break; | 556 | break; |
537 | #ifdef __KVM_HAVE_MSI | ||
538 | case KVM_DEV_IRQ_GUEST_MSI: | 557 | case KVM_DEV_IRQ_GUEST_MSI: |
539 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | 558 | r = assigned_device_enable_guest_msi(kvm, dev, irq); |
540 | break; | 559 | break; |
541 | #endif | ||
542 | #ifdef __KVM_HAVE_MSIX | ||
543 | case KVM_DEV_IRQ_GUEST_MSIX: | 560 | case KVM_DEV_IRQ_GUEST_MSIX: |
544 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | 561 | r = assigned_device_enable_guest_msix(kvm, dev, irq); |
545 | break; | 562 | break; |
546 | #endif | ||
547 | default: | 563 | default: |
548 | r = -EINVAL; | 564 | r = -EINVAL; |
549 | } | 565 | } |
@@ -826,7 +842,6 @@ out: | |||
826 | } | 842 | } |
827 | 843 | ||
828 | 844 | ||
829 | #ifdef __KVM_HAVE_MSIX | ||
830 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | 845 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, |
831 | struct kvm_assigned_msix_nr *entry_nr) | 846 | struct kvm_assigned_msix_nr *entry_nr) |
832 | { | 847 | { |
@@ -906,7 +921,6 @@ msix_entry_out: | |||
906 | 921 | ||
907 | return r; | 922 | return r; |
908 | } | 923 | } |
909 | #endif | ||
910 | 924 | ||
911 | static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, | 925 | static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, |
912 | struct kvm_assigned_pci_dev *assigned_dev) | 926 | struct kvm_assigned_pci_dev *assigned_dev) |
@@ -1012,7 +1026,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
1012 | goto out; | 1026 | goto out; |
1013 | break; | 1027 | break; |
1014 | } | 1028 | } |
1015 | #ifdef __KVM_HAVE_MSIX | ||
1016 | case KVM_ASSIGN_SET_MSIX_NR: { | 1029 | case KVM_ASSIGN_SET_MSIX_NR: { |
1017 | struct kvm_assigned_msix_nr entry_nr; | 1030 | struct kvm_assigned_msix_nr entry_nr; |
1018 | r = -EFAULT; | 1031 | r = -EFAULT; |
@@ -1033,7 +1046,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
1033 | goto out; | 1046 | goto out; |
1034 | break; | 1047 | break; |
1035 | } | 1048 | } |
1036 | #endif | ||
1037 | case KVM_ASSIGN_SET_INTX_MASK: { | 1049 | case KVM_ASSIGN_SET_INTX_MASK: { |
1038 | struct kvm_assigned_pci_dev assigned_dev; | 1050 | struct kvm_assigned_pci_dev assigned_dev; |
1039 | 1051 | ||
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 156441bcaac8..6525e926f566 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -348,7 +348,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
348 | F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | | 348 | F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | |
349 | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | | 349 | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | |
350 | F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) | | 350 | F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) | |
351 | F(AVX512CD); | 351 | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT); |
352 | 352 | ||
353 | /* cpuid 0xD.1.eax */ | 353 | /* cpuid 0xD.1.eax */ |
354 | const u32 kvm_supported_word10_x86_features = | 354 | const u32 kvm_supported_word10_x86_features = |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index dd05b9cef6ae..06332cb7e7d1 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -133,4 +133,41 @@ static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) | |||
133 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | 133 | best = kvm_find_cpuid_entry(vcpu, 7, 0); |
134 | return best && (best->ebx & bit(X86_FEATURE_MPX)); | 134 | return best && (best->ebx & bit(X86_FEATURE_MPX)); |
135 | } | 135 | } |
136 | |||
137 | static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) | ||
138 | { | ||
139 | struct kvm_cpuid_entry2 *best; | ||
140 | |||
141 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
142 | return best && (best->ebx & bit(X86_FEATURE_PCOMMIT)); | ||
143 | } | ||
144 | |||
145 | static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) | ||
146 | { | ||
147 | struct kvm_cpuid_entry2 *best; | ||
148 | |||
149 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
150 | return best && (best->edx & bit(X86_FEATURE_RDTSCP)); | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 | ||
155 | */ | ||
156 | #define BIT_NRIPS 3 | ||
157 | |||
158 | static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu) | ||
159 | { | ||
160 | struct kvm_cpuid_entry2 *best; | ||
161 | |||
162 | best = kvm_find_cpuid_entry(vcpu, 0x8000000a, 0); | ||
163 | |||
164 | /* | ||
165 | * NRIPS is a scattered cpuid feature, so we can't use | ||
166 | * X86_FEATURE_NRIPS here (X86_FEATURE_NRIPS would be bit | ||
167 | * position 8, not 3). | ||
168 | */ | ||
169 | return best && (best->edx & bit(BIT_NRIPS)); | ||
170 | } | ||
171 | #undef BIT_NRIPS | ||
172 | |||
136 | #endif | 173 | #endif |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 9da95b9daf8d..1505587d06e9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -2272,8 +2272,8 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) | |||
2272 | #define GET_SMSTATE(type, smbase, offset) \ | 2272 | #define GET_SMSTATE(type, smbase, offset) \ |
2273 | ({ \ | 2273 | ({ \ |
2274 | type __val; \ | 2274 | type __val; \ |
2275 | int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val, \ | 2275 | int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \ |
2276 | sizeof(__val), NULL); \ | 2276 | sizeof(__val)); \ |
2277 | if (r != X86EMUL_CONTINUE) \ | 2277 | if (r != X86EMUL_CONTINUE) \ |
2278 | return X86EMUL_UNHANDLEABLE; \ | 2278 | return X86EMUL_UNHANDLEABLE; \ |
2279 | __val; \ | 2279 | __val; \ |
@@ -2484,17 +2484,36 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) | |||
2484 | 2484 | ||
2485 | /* | 2485 | /* |
2486 | * Get back to real mode, to prepare a safe state in which to load | 2486 | * Get back to real mode, to prepare a safe state in which to load |
2487 | * CR0/CR3/CR4/EFER. Also this will ensure that addresses passed | 2487 | * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU |
2488 | * to read_std/write_std are not virtual. | 2488 | * supports long mode. |
2489 | * | ||
2490 | * CR4.PCIDE must be zero, because it is a 64-bit mode only feature. | ||
2491 | */ | 2489 | */ |
2490 | cr4 = ctxt->ops->get_cr(ctxt, 4); | ||
2491 | if (emulator_has_longmode(ctxt)) { | ||
2492 | struct desc_struct cs_desc; | ||
2493 | |||
2494 | /* Zero CR4.PCIDE before CR0.PG. */ | ||
2495 | if (cr4 & X86_CR4_PCIDE) { | ||
2496 | ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); | ||
2497 | cr4 &= ~X86_CR4_PCIDE; | ||
2498 | } | ||
2499 | |||
2500 | /* A 32-bit code segment is required to clear EFER.LMA. */ | ||
2501 | memset(&cs_desc, 0, sizeof(cs_desc)); | ||
2502 | cs_desc.type = 0xb; | ||
2503 | cs_desc.s = cs_desc.g = cs_desc.p = 1; | ||
2504 | ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS); | ||
2505 | } | ||
2506 | |||
2507 | /* For the 64-bit case, this will clear EFER.LMA. */ | ||
2492 | cr0 = ctxt->ops->get_cr(ctxt, 0); | 2508 | cr0 = ctxt->ops->get_cr(ctxt, 0); |
2493 | if (cr0 & X86_CR0_PE) | 2509 | if (cr0 & X86_CR0_PE) |
2494 | ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); | 2510 | ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); |
2495 | cr4 = ctxt->ops->get_cr(ctxt, 4); | 2511 | |
2512 | /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */ | ||
2496 | if (cr4 & X86_CR4_PAE) | 2513 | if (cr4 & X86_CR4_PAE) |
2497 | ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); | 2514 | ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); |
2515 | |||
2516 | /* And finally go back to 32-bit mode. */ | ||
2498 | efer = 0; | 2517 | efer = 0; |
2499 | ctxt->ops->set_msr(ctxt, MSR_EFER, efer); | 2518 | ctxt->ops->set_msr(ctxt, MSR_EFER, efer); |
2500 | 2519 | ||
@@ -4455,7 +4474,7 @@ static const struct opcode twobyte_table[256] = { | |||
4455 | F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, | 4474 | F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, |
4456 | /* 0xA8 - 0xAF */ | 4475 | /* 0xA8 - 0xAF */ |
4457 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), | 4476 | I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), |
4458 | II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm), | 4477 | II(EmulateOnUD | ImplicitOps, em_rsm, rsm), |
4459 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), | 4478 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), |
4460 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), | 4479 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), |
4461 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), | 4480 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), |
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index a8160d2ae362..62cf8c915e95 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c | |||
@@ -41,6 +41,7 @@ static bool kvm_hv_msr_partition_wide(u32 msr) | |||
41 | case HV_X64_MSR_TIME_REF_COUNT: | 41 | case HV_X64_MSR_TIME_REF_COUNT: |
42 | case HV_X64_MSR_CRASH_CTL: | 42 | case HV_X64_MSR_CRASH_CTL: |
43 | case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: | 43 | case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: |
44 | case HV_X64_MSR_RESET: | ||
44 | r = true; | 45 | r = true; |
45 | break; | 46 | break; |
46 | } | 47 | } |
@@ -163,6 +164,12 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, | |||
163 | data); | 164 | data); |
164 | case HV_X64_MSR_CRASH_CTL: | 165 | case HV_X64_MSR_CRASH_CTL: |
165 | return kvm_hv_msr_set_crash_ctl(vcpu, data, host); | 166 | return kvm_hv_msr_set_crash_ctl(vcpu, data, host); |
167 | case HV_X64_MSR_RESET: | ||
168 | if (data == 1) { | ||
169 | vcpu_debug(vcpu, "hyper-v reset requested\n"); | ||
170 | kvm_make_request(KVM_REQ_HV_RESET, vcpu); | ||
171 | } | ||
172 | break; | ||
166 | default: | 173 | default: |
167 | vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", | 174 | vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", |
168 | msr, data); | 175 | msr, data); |
@@ -171,7 +178,16 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, | |||
171 | return 0; | 178 | return 0; |
172 | } | 179 | } |
173 | 180 | ||
174 | static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 181 | /* Calculate cpu time spent by current task in 100ns units */ |
182 | static u64 current_task_runtime_100ns(void) | ||
183 | { | ||
184 | cputime_t utime, stime; | ||
185 | |||
186 | task_cputime_adjusted(current, &utime, &stime); | ||
187 | return div_u64(cputime_to_nsecs(utime + stime), 100); | ||
188 | } | ||
189 | |||
190 | static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) | ||
175 | { | 191 | { |
176 | struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; | 192 | struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; |
177 | 193 | ||
@@ -205,6 +221,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
205 | return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); | 221 | return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); |
206 | case HV_X64_MSR_TPR: | 222 | case HV_X64_MSR_TPR: |
207 | return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); | 223 | return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); |
224 | case HV_X64_MSR_VP_RUNTIME: | ||
225 | if (!host) | ||
226 | return 1; | ||
227 | hv->runtime_offset = data - current_task_runtime_100ns(); | ||
228 | break; | ||
208 | default: | 229 | default: |
209 | vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", | 230 | vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", |
210 | msr, data); | 231 | msr, data); |
@@ -241,6 +262,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
241 | pdata); | 262 | pdata); |
242 | case HV_X64_MSR_CRASH_CTL: | 263 | case HV_X64_MSR_CRASH_CTL: |
243 | return kvm_hv_msr_get_crash_ctl(vcpu, pdata); | 264 | return kvm_hv_msr_get_crash_ctl(vcpu, pdata); |
265 | case HV_X64_MSR_RESET: | ||
266 | data = 0; | ||
267 | break; | ||
244 | default: | 268 | default: |
245 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 269 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
246 | return 1; | 270 | return 1; |
@@ -277,6 +301,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
277 | case HV_X64_MSR_APIC_ASSIST_PAGE: | 301 | case HV_X64_MSR_APIC_ASSIST_PAGE: |
278 | data = hv->hv_vapic; | 302 | data = hv->hv_vapic; |
279 | break; | 303 | break; |
304 | case HV_X64_MSR_VP_RUNTIME: | ||
305 | data = current_task_runtime_100ns() + hv->runtime_offset; | ||
306 | break; | ||
280 | default: | 307 | default: |
281 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 308 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
282 | return 1; | 309 | return 1; |
@@ -295,7 +322,7 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) | |||
295 | mutex_unlock(&vcpu->kvm->lock); | 322 | mutex_unlock(&vcpu->kvm->lock); |
296 | return r; | 323 | return r; |
297 | } else | 324 | } else |
298 | return kvm_hv_set_msr(vcpu, msr, data); | 325 | return kvm_hv_set_msr(vcpu, msr, data, host); |
299 | } | 326 | } |
300 | 327 | ||
301 | int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | 328 | int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index f90952f64e79..08116ff227cc 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/kvm_host.h> | 35 | #include <linux/kvm_host.h> |
36 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
37 | 37 | ||
38 | #include "ioapic.h" | ||
38 | #include "irq.h" | 39 | #include "irq.h" |
39 | #include "i8254.h" | 40 | #include "i8254.h" |
40 | #include "x86.h" | 41 | #include "x86.h" |
@@ -333,7 +334,8 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) | |||
333 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; | 334 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; |
334 | s64 interval; | 335 | s64 interval; |
335 | 336 | ||
336 | if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) | 337 | if (!ioapic_in_kernel(kvm) || |
338 | ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) | ||
337 | return; | 339 | return; |
338 | 340 | ||
339 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); | 341 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); |
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 856f79105bb5..88d0a92d3f94 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c | |||
@@ -233,21 +233,7 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) | |||
233 | } | 233 | } |
234 | 234 | ||
235 | 235 | ||
236 | static void update_handled_vectors(struct kvm_ioapic *ioapic) | 236 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) |
237 | { | ||
238 | DECLARE_BITMAP(handled_vectors, 256); | ||
239 | int i; | ||
240 | |||
241 | memset(handled_vectors, 0, sizeof(handled_vectors)); | ||
242 | for (i = 0; i < IOAPIC_NUM_PINS; ++i) | ||
243 | __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors); | ||
244 | memcpy(ioapic->handled_vectors, handled_vectors, | ||
245 | sizeof(handled_vectors)); | ||
246 | smp_wmb(); | ||
247 | } | ||
248 | |||
249 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, | ||
250 | u32 *tmr) | ||
251 | { | 237 | { |
252 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; | 238 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; |
253 | union kvm_ioapic_redirect_entry *e; | 239 | union kvm_ioapic_redirect_entry *e; |
@@ -260,13 +246,11 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, | |||
260 | kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) || | 246 | kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) || |
261 | index == RTC_GSI) { | 247 | index == RTC_GSI) { |
262 | if (kvm_apic_match_dest(vcpu, NULL, 0, | 248 | if (kvm_apic_match_dest(vcpu, NULL, 0, |
263 | e->fields.dest_id, e->fields.dest_mode)) { | 249 | e->fields.dest_id, e->fields.dest_mode) || |
250 | (e->fields.trig_mode == IOAPIC_EDGE_TRIG && | ||
251 | kvm_apic_pending_eoi(vcpu, e->fields.vector))) | ||
264 | __set_bit(e->fields.vector, | 252 | __set_bit(e->fields.vector, |
265 | (unsigned long *)eoi_exit_bitmap); | 253 | (unsigned long *)eoi_exit_bitmap); |
266 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG) | ||
267 | __set_bit(e->fields.vector, | ||
268 | (unsigned long *)tmr); | ||
269 | } | ||
270 | } | 254 | } |
271 | } | 255 | } |
272 | spin_unlock(&ioapic->lock); | 256 | spin_unlock(&ioapic->lock); |
@@ -315,7 +299,6 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | |||
315 | e->bits |= (u32) val; | 299 | e->bits |= (u32) val; |
316 | e->fields.remote_irr = 0; | 300 | e->fields.remote_irr = 0; |
317 | } | 301 | } |
318 | update_handled_vectors(ioapic); | ||
319 | mask_after = e->fields.mask; | 302 | mask_after = e->fields.mask; |
320 | if (mask_before != mask_after) | 303 | if (mask_before != mask_after) |
321 | kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); | 304 | kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); |
@@ -599,7 +582,6 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) | |||
599 | ioapic->id = 0; | 582 | ioapic->id = 0; |
600 | memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); | 583 | memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); |
601 | rtc_irq_eoi_tracking_reset(ioapic); | 584 | rtc_irq_eoi_tracking_reset(ioapic); |
602 | update_handled_vectors(ioapic); | ||
603 | } | 585 | } |
604 | 586 | ||
605 | static const struct kvm_io_device_ops ioapic_mmio_ops = { | 587 | static const struct kvm_io_device_ops ioapic_mmio_ops = { |
@@ -628,8 +610,10 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
628 | if (ret < 0) { | 610 | if (ret < 0) { |
629 | kvm->arch.vioapic = NULL; | 611 | kvm->arch.vioapic = NULL; |
630 | kfree(ioapic); | 612 | kfree(ioapic); |
613 | return ret; | ||
631 | } | 614 | } |
632 | 615 | ||
616 | kvm_vcpu_request_scan_ioapic(kvm); | ||
633 | return ret; | 617 | return ret; |
634 | } | 618 | } |
635 | 619 | ||
@@ -666,7 +650,6 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | |||
666 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); | 650 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); |
667 | ioapic->irr = 0; | 651 | ioapic->irr = 0; |
668 | ioapic->irr_delivered = 0; | 652 | ioapic->irr_delivered = 0; |
669 | update_handled_vectors(ioapic); | ||
670 | kvm_vcpu_request_scan_ioapic(kvm); | 653 | kvm_vcpu_request_scan_ioapic(kvm); |
671 | kvm_ioapic_inject_all(ioapic, state->irr); | 654 | kvm_ioapic_inject_all(ioapic, state->irr); |
672 | spin_unlock(&ioapic->lock); | 655 | spin_unlock(&ioapic->lock); |
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index ca0b0b4e6256..084617d37c74 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h | |||
@@ -9,6 +9,7 @@ struct kvm; | |||
9 | struct kvm_vcpu; | 9 | struct kvm_vcpu; |
10 | 10 | ||
11 | #define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS | 11 | #define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS |
12 | #define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES | ||
12 | #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ | 13 | #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ |
13 | #define IOAPIC_EDGE_TRIG 0 | 14 | #define IOAPIC_EDGE_TRIG 0 |
14 | #define IOAPIC_LEVEL_TRIG 1 | 15 | #define IOAPIC_LEVEL_TRIG 1 |
@@ -73,7 +74,6 @@ struct kvm_ioapic { | |||
73 | struct kvm *kvm; | 74 | struct kvm *kvm; |
74 | void (*ack_notifier)(void *opaque, int irq); | 75 | void (*ack_notifier)(void *opaque, int irq); |
75 | spinlock_t lock; | 76 | spinlock_t lock; |
76 | DECLARE_BITMAP(handled_vectors, 256); | ||
77 | struct rtc_status rtc_status; | 77 | struct rtc_status rtc_status; |
78 | struct delayed_work eoi_inject; | 78 | struct delayed_work eoi_inject; |
79 | u32 irq_eoi[IOAPIC_NUM_PINS]; | 79 | u32 irq_eoi[IOAPIC_NUM_PINS]; |
@@ -98,11 +98,12 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) | |||
98 | return kvm->arch.vioapic; | 98 | return kvm->arch.vioapic; |
99 | } | 99 | } |
100 | 100 | ||
101 | static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) | 101 | static inline int ioapic_in_kernel(struct kvm *kvm) |
102 | { | 102 | { |
103 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 103 | int ret; |
104 | smp_rmb(); | 104 | |
105 | return test_bit(vector, ioapic->handled_vectors); | 105 | ret = (ioapic_irqchip(kvm) != NULL); |
106 | return ret; | ||
106 | } | 107 | } |
107 | 108 | ||
108 | void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); | 109 | void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); |
@@ -120,7 +121,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
120 | struct kvm_lapic_irq *irq, unsigned long *dest_map); | 121 | struct kvm_lapic_irq *irq, unsigned long *dest_map); |
121 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | 122 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); |
122 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | 123 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); |
123 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, | 124 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); |
124 | u32 *tmr); | 125 | void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); |
125 | 126 | ||
126 | #endif | 127 | #endif |
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index a1ec6a50a05a..097060e33bd6 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
@@ -38,14 +38,27 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
38 | EXPORT_SYMBOL(kvm_cpu_has_pending_timer); | 38 | EXPORT_SYMBOL(kvm_cpu_has_pending_timer); |
39 | 39 | ||
40 | /* | 40 | /* |
41 | * check if there is a pending userspace external interrupt | ||
42 | */ | ||
43 | static int pending_userspace_extint(struct kvm_vcpu *v) | ||
44 | { | ||
45 | return v->arch.pending_external_vector != -1; | ||
46 | } | ||
47 | |||
48 | /* | ||
41 | * check if there is pending interrupt from | 49 | * check if there is pending interrupt from |
42 | * non-APIC source without intack. | 50 | * non-APIC source without intack. |
43 | */ | 51 | */ |
44 | static int kvm_cpu_has_extint(struct kvm_vcpu *v) | 52 | static int kvm_cpu_has_extint(struct kvm_vcpu *v) |
45 | { | 53 | { |
46 | if (kvm_apic_accept_pic_intr(v)) | 54 | u8 accept = kvm_apic_accept_pic_intr(v); |
47 | return pic_irqchip(v->kvm)->output; /* PIC */ | 55 | |
48 | else | 56 | if (accept) { |
57 | if (irqchip_split(v->kvm)) | ||
58 | return pending_userspace_extint(v); | ||
59 | else | ||
60 | return pic_irqchip(v->kvm)->output; | ||
61 | } else | ||
49 | return 0; | 62 | return 0; |
50 | } | 63 | } |
51 | 64 | ||
@@ -57,13 +70,13 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v) | |||
57 | */ | 70 | */ |
58 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) | 71 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) |
59 | { | 72 | { |
60 | if (!irqchip_in_kernel(v->kvm)) | 73 | if (!lapic_in_kernel(v)) |
61 | return v->arch.interrupt.pending; | 74 | return v->arch.interrupt.pending; |
62 | 75 | ||
63 | if (kvm_cpu_has_extint(v)) | 76 | if (kvm_cpu_has_extint(v)) |
64 | return 1; | 77 | return 1; |
65 | 78 | ||
66 | if (kvm_apic_vid_enabled(v->kvm)) | 79 | if (kvm_vcpu_apic_vid_enabled(v)) |
67 | return 0; | 80 | return 0; |
68 | 81 | ||
69 | return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ | 82 | return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ |
@@ -75,7 +88,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) | |||
75 | */ | 88 | */ |
76 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v) | 89 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v) |
77 | { | 90 | { |
78 | if (!irqchip_in_kernel(v->kvm)) | 91 | if (!lapic_in_kernel(v)) |
79 | return v->arch.interrupt.pending; | 92 | return v->arch.interrupt.pending; |
80 | 93 | ||
81 | if (kvm_cpu_has_extint(v)) | 94 | if (kvm_cpu_has_extint(v)) |
@@ -91,9 +104,16 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); | |||
91 | */ | 104 | */ |
92 | static int kvm_cpu_get_extint(struct kvm_vcpu *v) | 105 | static int kvm_cpu_get_extint(struct kvm_vcpu *v) |
93 | { | 106 | { |
94 | if (kvm_cpu_has_extint(v)) | 107 | if (kvm_cpu_has_extint(v)) { |
95 | return kvm_pic_read_irq(v->kvm); /* PIC */ | 108 | if (irqchip_split(v->kvm)) { |
96 | return -1; | 109 | int vector = v->arch.pending_external_vector; |
110 | |||
111 | v->arch.pending_external_vector = -1; | ||
112 | return vector; | ||
113 | } else | ||
114 | return kvm_pic_read_irq(v->kvm); /* PIC */ | ||
115 | } else | ||
116 | return -1; | ||
97 | } | 117 | } |
98 | 118 | ||
99 | /* | 119 | /* |
@@ -103,7 +123,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) | |||
103 | { | 123 | { |
104 | int vector; | 124 | int vector; |
105 | 125 | ||
106 | if (!irqchip_in_kernel(v->kvm)) | 126 | if (!lapic_in_kernel(v)) |
107 | return v->arch.interrupt.nr; | 127 | return v->arch.interrupt.nr; |
108 | 128 | ||
109 | vector = kvm_cpu_get_extint(v); | 129 | vector = kvm_cpu_get_extint(v); |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 3d782a2c336a..ae5c78f2337d 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -83,13 +83,38 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) | |||
83 | return kvm->arch.vpic; | 83 | return kvm->arch.vpic; |
84 | } | 84 | } |
85 | 85 | ||
86 | static inline int pic_in_kernel(struct kvm *kvm) | ||
87 | { | ||
88 | int ret; | ||
89 | |||
90 | ret = (pic_irqchip(kvm) != NULL); | ||
91 | return ret; | ||
92 | } | ||
93 | |||
94 | static inline int irqchip_split(struct kvm *kvm) | ||
95 | { | ||
96 | return kvm->arch.irqchip_split; | ||
97 | } | ||
98 | |||
86 | static inline int irqchip_in_kernel(struct kvm *kvm) | 99 | static inline int irqchip_in_kernel(struct kvm *kvm) |
87 | { | 100 | { |
88 | struct kvm_pic *vpic = pic_irqchip(kvm); | 101 | struct kvm_pic *vpic = pic_irqchip(kvm); |
102 | bool ret; | ||
103 | |||
104 | ret = (vpic != NULL); | ||
105 | ret |= irqchip_split(kvm); | ||
89 | 106 | ||
90 | /* Read vpic before kvm->irq_routing. */ | 107 | /* Read vpic before kvm->irq_routing. */ |
91 | smp_rmb(); | 108 | smp_rmb(); |
92 | return vpic != NULL; | 109 | return ret; |
110 | } | ||
111 | |||
112 | static inline int lapic_in_kernel(struct kvm_vcpu *vcpu) | ||
113 | { | ||
114 | /* Same as irqchip_in_kernel(vcpu->kvm), but with less | ||
115 | * pointer chasing and no unnecessary memory barriers. | ||
116 | */ | ||
117 | return vcpu->arch.apic != NULL; | ||
93 | } | 118 | } |
94 | 119 | ||
95 | void kvm_pic_reset(struct kvm_kpic_state *s); | 120 | void kvm_pic_reset(struct kvm_kpic_state *s); |
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 9efff9e5b58c..84b96d319909 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c | |||
@@ -91,8 +91,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
91 | return r; | 91 | return r; |
92 | } | 92 | } |
93 | 93 | ||
94 | static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, | 94 | void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, |
95 | struct kvm_lapic_irq *irq) | 95 | struct kvm_lapic_irq *irq) |
96 | { | 96 | { |
97 | trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); | 97 | trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); |
98 | 98 | ||
@@ -108,6 +108,7 @@ static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, | |||
108 | irq->level = 1; | 108 | irq->level = 1; |
109 | irq->shorthand = 0; | 109 | irq->shorthand = 0; |
110 | } | 110 | } |
111 | EXPORT_SYMBOL_GPL(kvm_set_msi_irq); | ||
111 | 112 | ||
112 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | 113 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, |
113 | struct kvm *kvm, int irq_source_id, int level, bool line_status) | 114 | struct kvm *kvm, int irq_source_id, int level, bool line_status) |
@@ -123,12 +124,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | |||
123 | } | 124 | } |
124 | 125 | ||
125 | 126 | ||
126 | static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, | 127 | int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, |
127 | struct kvm *kvm) | 128 | struct kvm *kvm, int irq_source_id, int level, |
129 | bool line_status) | ||
128 | { | 130 | { |
129 | struct kvm_lapic_irq irq; | 131 | struct kvm_lapic_irq irq; |
130 | int r; | 132 | int r; |
131 | 133 | ||
134 | if (unlikely(e->type != KVM_IRQ_ROUTING_MSI)) | ||
135 | return -EWOULDBLOCK; | ||
136 | |||
132 | kvm_set_msi_irq(e, &irq); | 137 | kvm_set_msi_irq(e, &irq); |
133 | 138 | ||
134 | if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) | 139 | if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) |
@@ -137,42 +142,6 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, | |||
137 | return -EWOULDBLOCK; | 142 | return -EWOULDBLOCK; |
138 | } | 143 | } |
139 | 144 | ||
140 | /* | ||
141 | * Deliver an IRQ in an atomic context if we can, or return a failure, | ||
142 | * user can retry in a process context. | ||
143 | * Return value: | ||
144 | * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. | ||
145 | * Other values - No need to retry. | ||
146 | */ | ||
147 | int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) | ||
148 | { | ||
149 | struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; | ||
150 | struct kvm_kernel_irq_routing_entry *e; | ||
151 | int ret = -EINVAL; | ||
152 | int idx; | ||
153 | |||
154 | trace_kvm_set_irq(irq, level, irq_source_id); | ||
155 | |||
156 | /* | ||
157 | * Injection into either PIC or IOAPIC might need to scan all CPUs, | ||
158 | * which would need to be retried from thread context; when same GSI | ||
159 | * is connected to both PIC and IOAPIC, we'd have to report a | ||
160 | * partial failure here. | ||
161 | * Since there's no easy way to do this, we only support injecting MSI | ||
162 | * which is limited to 1:1 GSI mapping. | ||
163 | */ | ||
164 | idx = srcu_read_lock(&kvm->irq_srcu); | ||
165 | if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { | ||
166 | e = &entries[0]; | ||
167 | if (likely(e->type == KVM_IRQ_ROUTING_MSI)) | ||
168 | ret = kvm_set_msi_inatomic(e, kvm); | ||
169 | else | ||
170 | ret = -EWOULDBLOCK; | ||
171 | } | ||
172 | srcu_read_unlock(&kvm->irq_srcu, idx); | ||
173 | return ret; | ||
174 | } | ||
175 | |||
176 | int kvm_request_irq_source_id(struct kvm *kvm) | 145 | int kvm_request_irq_source_id(struct kvm *kvm) |
177 | { | 146 | { |
178 | unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; | 147 | unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; |
@@ -208,7 +177,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) | |||
208 | goto unlock; | 177 | goto unlock; |
209 | } | 178 | } |
210 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); | 179 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); |
211 | if (!irqchip_in_kernel(kvm)) | 180 | if (!ioapic_in_kernel(kvm)) |
212 | goto unlock; | 181 | goto unlock; |
213 | 182 | ||
214 | kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); | 183 | kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); |
@@ -297,6 +266,33 @@ out: | |||
297 | return r; | 266 | return r; |
298 | } | 267 | } |
299 | 268 | ||
269 | bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, | ||
270 | struct kvm_vcpu **dest_vcpu) | ||
271 | { | ||
272 | int i, r = 0; | ||
273 | struct kvm_vcpu *vcpu; | ||
274 | |||
275 | if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu)) | ||
276 | return true; | ||
277 | |||
278 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
279 | if (!kvm_apic_present(vcpu)) | ||
280 | continue; | ||
281 | |||
282 | if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand, | ||
283 | irq->dest_id, irq->dest_mode)) | ||
284 | continue; | ||
285 | |||
286 | if (++r == 2) | ||
287 | return false; | ||
288 | |||
289 | *dest_vcpu = vcpu; | ||
290 | } | ||
291 | |||
292 | return r == 1; | ||
293 | } | ||
294 | EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu); | ||
295 | |||
300 | #define IOAPIC_ROUTING_ENTRY(irq) \ | 296 | #define IOAPIC_ROUTING_ENTRY(irq) \ |
301 | { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ | 297 | { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ |
302 | .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } | 298 | .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } |
@@ -328,3 +324,54 @@ int kvm_setup_default_irq_routing(struct kvm *kvm) | |||
328 | return kvm_set_irq_routing(kvm, default_routing, | 324 | return kvm_set_irq_routing(kvm, default_routing, |
329 | ARRAY_SIZE(default_routing), 0); | 325 | ARRAY_SIZE(default_routing), 0); |
330 | } | 326 | } |
327 | |||
328 | static const struct kvm_irq_routing_entry empty_routing[] = {}; | ||
329 | |||
330 | int kvm_setup_empty_irq_routing(struct kvm *kvm) | ||
331 | { | ||
332 | return kvm_set_irq_routing(kvm, empty_routing, 0, 0); | ||
333 | } | ||
334 | |||
335 | void kvm_arch_irq_routing_update(struct kvm *kvm) | ||
336 | { | ||
337 | if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm)) | ||
338 | return; | ||
339 | kvm_make_scan_ioapic_request(kvm); | ||
340 | } | ||
341 | |||
342 | void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | ||
343 | { | ||
344 | struct kvm *kvm = vcpu->kvm; | ||
345 | struct kvm_kernel_irq_routing_entry *entry; | ||
346 | struct kvm_irq_routing_table *table; | ||
347 | u32 i, nr_ioapic_pins; | ||
348 | int idx; | ||
349 | |||
350 | /* kvm->irq_routing must be read after clearing | ||
351 | * KVM_SCAN_IOAPIC. */ | ||
352 | smp_mb(); | ||
353 | idx = srcu_read_lock(&kvm->irq_srcu); | ||
354 | table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); | ||
355 | nr_ioapic_pins = min_t(u32, table->nr_rt_entries, | ||
356 | kvm->arch.nr_reserved_ioapic_pins); | ||
357 | for (i = 0; i < nr_ioapic_pins; ++i) { | ||
358 | hlist_for_each_entry(entry, &table->map[i], link) { | ||
359 | u32 dest_id, dest_mode; | ||
360 | bool level; | ||
361 | |||
362 | if (entry->type != KVM_IRQ_ROUTING_MSI) | ||
363 | continue; | ||
364 | dest_id = (entry->msi.address_lo >> 12) & 0xff; | ||
365 | dest_mode = (entry->msi.address_lo >> 2) & 0x1; | ||
366 | level = entry->msi.data & MSI_DATA_TRIGGER_LEVEL; | ||
367 | if (level && kvm_apic_match_dest(vcpu, NULL, 0, | ||
368 | dest_id, dest_mode)) { | ||
369 | u32 vector = entry->msi.data & 0xff; | ||
370 | |||
371 | __set_bit(vector, | ||
372 | (unsigned long *) eoi_exit_bitmap); | ||
373 | } | ||
374 | } | ||
375 | } | ||
376 | srcu_read_unlock(&kvm->irq_srcu, idx); | ||
377 | } | ||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8d9013c5e1ee..4d30b865be30 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -209,7 +209,7 @@ out: | |||
209 | if (old) | 209 | if (old) |
210 | kfree_rcu(old, rcu); | 210 | kfree_rcu(old, rcu); |
211 | 211 | ||
212 | kvm_vcpu_request_scan_ioapic(kvm); | 212 | kvm_make_scan_ioapic_request(kvm); |
213 | } | 213 | } |
214 | 214 | ||
215 | static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) | 215 | static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) |
@@ -348,6 +348,8 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) | |||
348 | struct kvm_lapic *apic = vcpu->arch.apic; | 348 | struct kvm_lapic *apic = vcpu->arch.apic; |
349 | 349 | ||
350 | __kvm_apic_update_irr(pir, apic->regs); | 350 | __kvm_apic_update_irr(pir, apic->regs); |
351 | |||
352 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
351 | } | 353 | } |
352 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); | 354 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); |
353 | 355 | ||
@@ -390,7 +392,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) | |||
390 | 392 | ||
391 | vcpu = apic->vcpu; | 393 | vcpu = apic->vcpu; |
392 | 394 | ||
393 | if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) { | 395 | if (unlikely(kvm_vcpu_apic_vid_enabled(vcpu))) { |
394 | /* try to update RVI */ | 396 | /* try to update RVI */ |
395 | apic_clear_vector(vec, apic->regs + APIC_IRR); | 397 | apic_clear_vector(vec, apic->regs + APIC_IRR); |
396 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 398 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
@@ -551,15 +553,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) | |||
551 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | 553 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); |
552 | } | 554 | } |
553 | 555 | ||
554 | void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) | ||
555 | { | ||
556 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
557 | int i; | ||
558 | |||
559 | for (i = 0; i < 8; i++) | ||
560 | apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]); | ||
561 | } | ||
562 | |||
563 | static void apic_update_ppr(struct kvm_lapic *apic) | 556 | static void apic_update_ppr(struct kvm_lapic *apic) |
564 | { | 557 | { |
565 | u32 tpr, isrv, ppr, old_ppr; | 558 | u32 tpr, isrv, ppr, old_ppr; |
@@ -764,6 +757,65 @@ out: | |||
764 | return ret; | 757 | return ret; |
765 | } | 758 | } |
766 | 759 | ||
760 | bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, | ||
761 | struct kvm_vcpu **dest_vcpu) | ||
762 | { | ||
763 | struct kvm_apic_map *map; | ||
764 | bool ret = false; | ||
765 | struct kvm_lapic *dst = NULL; | ||
766 | |||
767 | if (irq->shorthand) | ||
768 | return false; | ||
769 | |||
770 | rcu_read_lock(); | ||
771 | map = rcu_dereference(kvm->arch.apic_map); | ||
772 | |||
773 | if (!map) | ||
774 | goto out; | ||
775 | |||
776 | if (irq->dest_mode == APIC_DEST_PHYSICAL) { | ||
777 | if (irq->dest_id == 0xFF) | ||
778 | goto out; | ||
779 | |||
780 | if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) | ||
781 | goto out; | ||
782 | |||
783 | dst = map->phys_map[irq->dest_id]; | ||
784 | if (dst && kvm_apic_present(dst->vcpu)) | ||
785 | *dest_vcpu = dst->vcpu; | ||
786 | else | ||
787 | goto out; | ||
788 | } else { | ||
789 | u16 cid; | ||
790 | unsigned long bitmap = 1; | ||
791 | int i, r = 0; | ||
792 | |||
793 | if (!kvm_apic_logical_map_valid(map)) | ||
794 | goto out; | ||
795 | |||
796 | apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); | ||
797 | |||
798 | if (cid >= ARRAY_SIZE(map->logical_map)) | ||
799 | goto out; | ||
800 | |||
801 | for_each_set_bit(i, &bitmap, 16) { | ||
802 | dst = map->logical_map[cid][i]; | ||
803 | if (++r == 2) | ||
804 | goto out; | ||
805 | } | ||
806 | |||
807 | if (dst && kvm_apic_present(dst->vcpu)) | ||
808 | *dest_vcpu = dst->vcpu; | ||
809 | else | ||
810 | goto out; | ||
811 | } | ||
812 | |||
813 | ret = true; | ||
814 | out: | ||
815 | rcu_read_unlock(); | ||
816 | return ret; | ||
817 | } | ||
818 | |||
767 | /* | 819 | /* |
768 | * Add a pending IRQ into lapic. | 820 | * Add a pending IRQ into lapic. |
769 | * Return 1 if successfully added and 0 if discarded. | 821 | * Return 1 if successfully added and 0 if discarded. |
@@ -781,6 +833,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
781 | case APIC_DM_LOWEST: | 833 | case APIC_DM_LOWEST: |
782 | vcpu->arch.apic_arb_prio++; | 834 | vcpu->arch.apic_arb_prio++; |
783 | case APIC_DM_FIXED: | 835 | case APIC_DM_FIXED: |
836 | if (unlikely(trig_mode && !level)) | ||
837 | break; | ||
838 | |||
784 | /* FIXME add logic for vcpu on reset */ | 839 | /* FIXME add logic for vcpu on reset */ |
785 | if (unlikely(!apic_enabled(apic))) | 840 | if (unlikely(!apic_enabled(apic))) |
786 | break; | 841 | break; |
@@ -790,6 +845,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
790 | if (dest_map) | 845 | if (dest_map) |
791 | __set_bit(vcpu->vcpu_id, dest_map); | 846 | __set_bit(vcpu->vcpu_id, dest_map); |
792 | 847 | ||
848 | if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { | ||
849 | if (trig_mode) | ||
850 | apic_set_vector(vector, apic->regs + APIC_TMR); | ||
851 | else | ||
852 | apic_clear_vector(vector, apic->regs + APIC_TMR); | ||
853 | } | ||
854 | |||
793 | if (kvm_x86_ops->deliver_posted_interrupt) | 855 | if (kvm_x86_ops->deliver_posted_interrupt) |
794 | kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); | 856 | kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); |
795 | else { | 857 | else { |
@@ -868,16 +930,32 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) | |||
868 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; | 930 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; |
869 | } | 931 | } |
870 | 932 | ||
933 | static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) | ||
934 | { | ||
935 | return test_bit(vector, (ulong *)apic->vcpu->arch.eoi_exit_bitmap); | ||
936 | } | ||
937 | |||
871 | static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) | 938 | static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) |
872 | { | 939 | { |
873 | if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { | 940 | int trigger_mode; |
874 | int trigger_mode; | 941 | |
875 | if (apic_test_vector(vector, apic->regs + APIC_TMR)) | 942 | /* Eoi the ioapic only if the ioapic doesn't own the vector. */ |
876 | trigger_mode = IOAPIC_LEVEL_TRIG; | 943 | if (!kvm_ioapic_handles_vector(apic, vector)) |
877 | else | 944 | return; |
878 | trigger_mode = IOAPIC_EDGE_TRIG; | 945 | |
879 | kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); | 946 | /* Request a KVM exit to inform the userspace IOAPIC. */ |
947 | if (irqchip_split(apic->vcpu->kvm)) { | ||
948 | apic->vcpu->arch.pending_ioapic_eoi = vector; | ||
949 | kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu); | ||
950 | return; | ||
880 | } | 951 | } |
952 | |||
953 | if (apic_test_vector(vector, apic->regs + APIC_TMR)) | ||
954 | trigger_mode = IOAPIC_LEVEL_TRIG; | ||
955 | else | ||
956 | trigger_mode = IOAPIC_EDGE_TRIG; | ||
957 | |||
958 | kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); | ||
881 | } | 959 | } |
882 | 960 | ||
883 | static int apic_set_eoi(struct kvm_lapic *apic) | 961 | static int apic_set_eoi(struct kvm_lapic *apic) |
@@ -1172,7 +1250,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) | |||
1172 | 1250 | ||
1173 | tsc_deadline = apic->lapic_timer.expired_tscdeadline; | 1251 | tsc_deadline = apic->lapic_timer.expired_tscdeadline; |
1174 | apic->lapic_timer.expired_tscdeadline = 0; | 1252 | apic->lapic_timer.expired_tscdeadline = 0; |
1175 | guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); | 1253 | guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); |
1176 | trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); | 1254 | trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); |
1177 | 1255 | ||
1178 | /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ | 1256 | /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ |
@@ -1240,7 +1318,7 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
1240 | local_irq_save(flags); | 1318 | local_irq_save(flags); |
1241 | 1319 | ||
1242 | now = apic->lapic_timer.timer.base->get_time(); | 1320 | now = apic->lapic_timer.timer.base->get_time(); |
1243 | guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); | 1321 | guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); |
1244 | if (likely(tscdeadline > guest_tsc)) { | 1322 | if (likely(tscdeadline > guest_tsc)) { |
1245 | ns = (tscdeadline - guest_tsc) * 1000000ULL; | 1323 | ns = (tscdeadline - guest_tsc) * 1000000ULL; |
1246 | do_div(ns, this_tsc_khz); | 1324 | do_div(ns, this_tsc_khz); |
@@ -1615,7 +1693,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
1615 | apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); | 1693 | apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); |
1616 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); | 1694 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); |
1617 | } | 1695 | } |
1618 | apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm); | 1696 | apic->irr_pending = kvm_vcpu_apic_vid_enabled(vcpu); |
1619 | apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0; | 1697 | apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0; |
1620 | apic->highest_isr_cache = -1; | 1698 | apic->highest_isr_cache = -1; |
1621 | update_divide_count(apic); | 1699 | update_divide_count(apic); |
@@ -1838,7 +1916,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, | |||
1838 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, | 1916 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, |
1839 | apic_find_highest_isr(apic)); | 1917 | apic_find_highest_isr(apic)); |
1840 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 1918 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
1841 | kvm_rtc_eoi_tracking_restore_one(vcpu); | 1919 | if (ioapic_in_kernel(vcpu->kvm)) |
1920 | kvm_rtc_eoi_tracking_restore_one(vcpu); | ||
1921 | |||
1922 | vcpu->arch.apic_arb_prio = 0; | ||
1842 | } | 1923 | } |
1843 | 1924 | ||
1844 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | 1925 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) |
@@ -1922,7 +2003,7 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, | |||
1922 | /* Cache not set: could be safe but we don't bother. */ | 2003 | /* Cache not set: could be safe but we don't bother. */ |
1923 | apic->highest_isr_cache == -1 || | 2004 | apic->highest_isr_cache == -1 || |
1924 | /* Need EOI to update ioapic. */ | 2005 | /* Need EOI to update ioapic. */ |
1925 | kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { | 2006 | kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) { |
1926 | /* | 2007 | /* |
1927 | * PV EOI was disabled by apic_sync_pv_eoi_from_guest | 2008 | * PV EOI was disabled by apic_sync_pv_eoi_from_guest |
1928 | * so we need not do anything here. | 2009 | * so we need not do anything here. |
@@ -1978,7 +2059,7 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1978 | struct kvm_lapic *apic = vcpu->arch.apic; | 2059 | struct kvm_lapic *apic = vcpu->arch.apic; |
1979 | u32 reg = (msr - APIC_BASE_MSR) << 4; | 2060 | u32 reg = (msr - APIC_BASE_MSR) << 4; |
1980 | 2061 | ||
1981 | if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) | 2062 | if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) |
1982 | return 1; | 2063 | return 1; |
1983 | 2064 | ||
1984 | if (reg == APIC_ICR2) | 2065 | if (reg == APIC_ICR2) |
@@ -1995,7 +2076,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) | |||
1995 | struct kvm_lapic *apic = vcpu->arch.apic; | 2076 | struct kvm_lapic *apic = vcpu->arch.apic; |
1996 | u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; | 2077 | u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; |
1997 | 2078 | ||
1998 | if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) | 2079 | if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) |
1999 | return 1; | 2080 | return 1; |
2000 | 2081 | ||
2001 | if (reg == APIC_DFR || reg == APIC_ICR2) { | 2082 | if (reg == APIC_DFR || reg == APIC_ICR2) { |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 764037991d26..fde8e35d5850 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -57,7 +57,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); | |||
57 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); | 57 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); |
58 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); | 58 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); |
59 | 59 | ||
60 | void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); | ||
61 | void __kvm_apic_update_irr(u32 *pir, void *regs); | 60 | void __kvm_apic_update_irr(u32 *pir, void *regs); |
62 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); | 61 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); |
63 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, | 62 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, |
@@ -144,9 +143,9 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic) | |||
144 | return apic->vcpu->arch.apic_base & X2APIC_ENABLE; | 143 | return apic->vcpu->arch.apic_base & X2APIC_ENABLE; |
145 | } | 144 | } |
146 | 145 | ||
147 | static inline bool kvm_apic_vid_enabled(struct kvm *kvm) | 146 | static inline bool kvm_vcpu_apic_vid_enabled(struct kvm_vcpu *vcpu) |
148 | { | 147 | { |
149 | return kvm_x86_ops->vm_has_apicv(kvm); | 148 | return kvm_x86_ops->cpu_uses_apicv(vcpu); |
150 | } | 149 | } |
151 | 150 | ||
152 | static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) | 151 | static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) |
@@ -169,4 +168,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); | |||
169 | 168 | ||
170 | void wait_lapic_expire(struct kvm_vcpu *vcpu); | 169 | void wait_lapic_expire(struct kvm_vcpu *vcpu); |
171 | 170 | ||
171 | bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, | ||
172 | struct kvm_vcpu **dest_vcpu); | ||
172 | #endif | 173 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ff606f507913..e7c2c1428a69 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -818,14 +818,11 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
818 | kvm->arch.indirect_shadow_pages--; | 818 | kvm->arch.indirect_shadow_pages--; |
819 | } | 819 | } |
820 | 820 | ||
821 | static int has_wrprotected_page(struct kvm_vcpu *vcpu, | 821 | static int __has_wrprotected_page(gfn_t gfn, int level, |
822 | gfn_t gfn, | 822 | struct kvm_memory_slot *slot) |
823 | int level) | ||
824 | { | 823 | { |
825 | struct kvm_memory_slot *slot; | ||
826 | struct kvm_lpage_info *linfo; | 824 | struct kvm_lpage_info *linfo; |
827 | 825 | ||
828 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); | ||
829 | if (slot) { | 826 | if (slot) { |
830 | linfo = lpage_info_slot(gfn, slot, level); | 827 | linfo = lpage_info_slot(gfn, slot, level); |
831 | return linfo->write_count; | 828 | return linfo->write_count; |
@@ -834,6 +831,14 @@ static int has_wrprotected_page(struct kvm_vcpu *vcpu, | |||
834 | return 1; | 831 | return 1; |
835 | } | 832 | } |
836 | 833 | ||
834 | static int has_wrprotected_page(struct kvm_vcpu *vcpu, gfn_t gfn, int level) | ||
835 | { | ||
836 | struct kvm_memory_slot *slot; | ||
837 | |||
838 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); | ||
839 | return __has_wrprotected_page(gfn, level, slot); | ||
840 | } | ||
841 | |||
837 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | 842 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) |
838 | { | 843 | { |
839 | unsigned long page_size; | 844 | unsigned long page_size; |
@@ -851,6 +856,17 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | |||
851 | return ret; | 856 | return ret; |
852 | } | 857 | } |
853 | 858 | ||
859 | static inline bool memslot_valid_for_gpte(struct kvm_memory_slot *slot, | ||
860 | bool no_dirty_log) | ||
861 | { | ||
862 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) | ||
863 | return false; | ||
864 | if (no_dirty_log && slot->dirty_bitmap) | ||
865 | return false; | ||
866 | |||
867 | return true; | ||
868 | } | ||
869 | |||
854 | static struct kvm_memory_slot * | 870 | static struct kvm_memory_slot * |
855 | gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, | 871 | gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, |
856 | bool no_dirty_log) | 872 | bool no_dirty_log) |
@@ -858,21 +874,25 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
858 | struct kvm_memory_slot *slot; | 874 | struct kvm_memory_slot *slot; |
859 | 875 | ||
860 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); | 876 | slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); |
861 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID || | 877 | if (!memslot_valid_for_gpte(slot, no_dirty_log)) |
862 | (no_dirty_log && slot->dirty_bitmap)) | ||
863 | slot = NULL; | 878 | slot = NULL; |
864 | 879 | ||
865 | return slot; | 880 | return slot; |
866 | } | 881 | } |
867 | 882 | ||
868 | static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 883 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, |
869 | { | 884 | bool *force_pt_level) |
870 | return !gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true); | ||
871 | } | ||
872 | |||
873 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | ||
874 | { | 885 | { |
875 | int host_level, level, max_level; | 886 | int host_level, level, max_level; |
887 | struct kvm_memory_slot *slot; | ||
888 | |||
889 | if (unlikely(*force_pt_level)) | ||
890 | return PT_PAGE_TABLE_LEVEL; | ||
891 | |||
892 | slot = kvm_vcpu_gfn_to_memslot(vcpu, large_gfn); | ||
893 | *force_pt_level = !memslot_valid_for_gpte(slot, true); | ||
894 | if (unlikely(*force_pt_level)) | ||
895 | return PT_PAGE_TABLE_LEVEL; | ||
876 | 896 | ||
877 | host_level = host_mapping_level(vcpu->kvm, large_gfn); | 897 | host_level = host_mapping_level(vcpu->kvm, large_gfn); |
878 | 898 | ||
@@ -882,7 +902,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
882 | max_level = min(kvm_x86_ops->get_lpage_level(), host_level); | 902 | max_level = min(kvm_x86_ops->get_lpage_level(), host_level); |
883 | 903 | ||
884 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) | 904 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) |
885 | if (has_wrprotected_page(vcpu, large_gfn, level)) | 905 | if (__has_wrprotected_page(large_gfn, level, slot)) |
886 | break; | 906 | break; |
887 | 907 | ||
888 | return level - 1; | 908 | return level - 1; |
@@ -2962,14 +2982,13 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | |||
2962 | { | 2982 | { |
2963 | int r; | 2983 | int r; |
2964 | int level; | 2984 | int level; |
2965 | int force_pt_level; | 2985 | bool force_pt_level = false; |
2966 | pfn_t pfn; | 2986 | pfn_t pfn; |
2967 | unsigned long mmu_seq; | 2987 | unsigned long mmu_seq; |
2968 | bool map_writable, write = error_code & PFERR_WRITE_MASK; | 2988 | bool map_writable, write = error_code & PFERR_WRITE_MASK; |
2969 | 2989 | ||
2970 | force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); | 2990 | level = mapping_level(vcpu, gfn, &force_pt_level); |
2971 | if (likely(!force_pt_level)) { | 2991 | if (likely(!force_pt_level)) { |
2972 | level = mapping_level(vcpu, gfn); | ||
2973 | /* | 2992 | /* |
2974 | * This path builds a PAE pagetable - so we can map | 2993 | * This path builds a PAE pagetable - so we can map |
2975 | * 2mb pages at maximum. Therefore check if the level | 2994 | * 2mb pages at maximum. Therefore check if the level |
@@ -2979,8 +2998,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | |||
2979 | level = PT_DIRECTORY_LEVEL; | 2998 | level = PT_DIRECTORY_LEVEL; |
2980 | 2999 | ||
2981 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | 3000 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); |
2982 | } else | 3001 | } |
2983 | level = PT_PAGE_TABLE_LEVEL; | ||
2984 | 3002 | ||
2985 | if (fast_page_fault(vcpu, v, level, error_code)) | 3003 | if (fast_page_fault(vcpu, v, level, error_code)) |
2986 | return 0; | 3004 | return 0; |
@@ -3341,7 +3359,7 @@ exit: | |||
3341 | return reserved; | 3359 | return reserved; |
3342 | } | 3360 | } |
3343 | 3361 | ||
3344 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) | 3362 | int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) |
3345 | { | 3363 | { |
3346 | u64 spte; | 3364 | u64 spte; |
3347 | bool reserved; | 3365 | bool reserved; |
@@ -3350,7 +3368,7 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) | |||
3350 | return RET_MMIO_PF_EMULATE; | 3368 | return RET_MMIO_PF_EMULATE; |
3351 | 3369 | ||
3352 | reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); | 3370 | reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); |
3353 | if (unlikely(reserved)) | 3371 | if (WARN_ON(reserved)) |
3354 | return RET_MMIO_PF_BUG; | 3372 | return RET_MMIO_PF_BUG; |
3355 | 3373 | ||
3356 | if (is_mmio_spte(spte)) { | 3374 | if (is_mmio_spte(spte)) { |
@@ -3374,17 +3392,7 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) | |||
3374 | */ | 3392 | */ |
3375 | return RET_MMIO_PF_RETRY; | 3393 | return RET_MMIO_PF_RETRY; |
3376 | } | 3394 | } |
3377 | EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common); | 3395 | EXPORT_SYMBOL_GPL(handle_mmio_page_fault); |
3378 | |||
3379 | static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, | ||
3380 | u32 error_code, bool direct) | ||
3381 | { | ||
3382 | int ret; | ||
3383 | |||
3384 | ret = handle_mmio_page_fault_common(vcpu, addr, direct); | ||
3385 | WARN_ON(ret == RET_MMIO_PF_BUG); | ||
3386 | return ret; | ||
3387 | } | ||
3388 | 3396 | ||
3389 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | 3397 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, |
3390 | u32 error_code, bool prefault) | 3398 | u32 error_code, bool prefault) |
@@ -3395,7 +3403,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
3395 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); | 3403 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); |
3396 | 3404 | ||
3397 | if (unlikely(error_code & PFERR_RSVD_MASK)) { | 3405 | if (unlikely(error_code & PFERR_RSVD_MASK)) { |
3398 | r = handle_mmio_page_fault(vcpu, gva, error_code, true); | 3406 | r = handle_mmio_page_fault(vcpu, gva, true); |
3399 | 3407 | ||
3400 | if (likely(r != RET_MMIO_PF_INVALID)) | 3408 | if (likely(r != RET_MMIO_PF_INVALID)) |
3401 | return r; | 3409 | return r; |
@@ -3427,7 +3435,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) | |||
3427 | 3435 | ||
3428 | static bool can_do_async_pf(struct kvm_vcpu *vcpu) | 3436 | static bool can_do_async_pf(struct kvm_vcpu *vcpu) |
3429 | { | 3437 | { |
3430 | if (unlikely(!irqchip_in_kernel(vcpu->kvm) || | 3438 | if (unlikely(!lapic_in_kernel(vcpu) || |
3431 | kvm_event_needs_reinjection(vcpu))) | 3439 | kvm_event_needs_reinjection(vcpu))) |
3432 | return false; | 3440 | return false; |
3433 | 3441 | ||
@@ -3476,7 +3484,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3476 | pfn_t pfn; | 3484 | pfn_t pfn; |
3477 | int r; | 3485 | int r; |
3478 | int level; | 3486 | int level; |
3479 | int force_pt_level; | 3487 | bool force_pt_level; |
3480 | gfn_t gfn = gpa >> PAGE_SHIFT; | 3488 | gfn_t gfn = gpa >> PAGE_SHIFT; |
3481 | unsigned long mmu_seq; | 3489 | unsigned long mmu_seq; |
3482 | int write = error_code & PFERR_WRITE_MASK; | 3490 | int write = error_code & PFERR_WRITE_MASK; |
@@ -3485,7 +3493,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3485 | MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 3493 | MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
3486 | 3494 | ||
3487 | if (unlikely(error_code & PFERR_RSVD_MASK)) { | 3495 | if (unlikely(error_code & PFERR_RSVD_MASK)) { |
3488 | r = handle_mmio_page_fault(vcpu, gpa, error_code, true); | 3496 | r = handle_mmio_page_fault(vcpu, gpa, true); |
3489 | 3497 | ||
3490 | if (likely(r != RET_MMIO_PF_INVALID)) | 3498 | if (likely(r != RET_MMIO_PF_INVALID)) |
3491 | return r; | 3499 | return r; |
@@ -3495,20 +3503,15 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3495 | if (r) | 3503 | if (r) |
3496 | return r; | 3504 | return r; |
3497 | 3505 | ||
3498 | if (mapping_level_dirty_bitmap(vcpu, gfn) || | 3506 | force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn, |
3499 | !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL)) | 3507 | PT_DIRECTORY_LEVEL); |
3500 | force_pt_level = 1; | 3508 | level = mapping_level(vcpu, gfn, &force_pt_level); |
3501 | else | ||
3502 | force_pt_level = 0; | ||
3503 | |||
3504 | if (likely(!force_pt_level)) { | 3509 | if (likely(!force_pt_level)) { |
3505 | level = mapping_level(vcpu, gfn); | ||
3506 | if (level > PT_DIRECTORY_LEVEL && | 3510 | if (level > PT_DIRECTORY_LEVEL && |
3507 | !check_hugepage_cache_consistency(vcpu, gfn, level)) | 3511 | !check_hugepage_cache_consistency(vcpu, gfn, level)) |
3508 | level = PT_DIRECTORY_LEVEL; | 3512 | level = PT_DIRECTORY_LEVEL; |
3509 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | 3513 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); |
3510 | } else | 3514 | } |
3511 | level = PT_PAGE_TABLE_LEVEL; | ||
3512 | 3515 | ||
3513 | if (fast_page_fault(vcpu, gpa, level, error_code)) | 3516 | if (fast_page_fault(vcpu, gpa, level, error_code)) |
3514 | return 0; | 3517 | return 0; |
@@ -3706,7 +3709,7 @@ static void | |||
3706 | __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, | 3709 | __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, |
3707 | int maxphyaddr, bool execonly) | 3710 | int maxphyaddr, bool execonly) |
3708 | { | 3711 | { |
3709 | int pte; | 3712 | u64 bad_mt_xwr; |
3710 | 3713 | ||
3711 | rsvd_check->rsvd_bits_mask[0][3] = | 3714 | rsvd_check->rsvd_bits_mask[0][3] = |
3712 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); | 3715 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); |
@@ -3724,14 +3727,16 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, | |||
3724 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20); | 3727 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20); |
3725 | rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; | 3728 | rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; |
3726 | 3729 | ||
3727 | for (pte = 0; pte < 64; pte++) { | 3730 | bad_mt_xwr = 0xFFull << (2 * 8); /* bits 3..5 must not be 2 */ |
3728 | int rwx_bits = pte & 7; | 3731 | bad_mt_xwr |= 0xFFull << (3 * 8); /* bits 3..5 must not be 3 */ |
3729 | int mt = pte >> 3; | 3732 | bad_mt_xwr |= 0xFFull << (7 * 8); /* bits 3..5 must not be 7 */ |
3730 | if (mt == 0x2 || mt == 0x3 || mt == 0x7 || | 3733 | bad_mt_xwr |= REPEAT_BYTE(1ull << 2); /* bits 0..2 must not be 010 */ |
3731 | rwx_bits == 0x2 || rwx_bits == 0x6 || | 3734 | bad_mt_xwr |= REPEAT_BYTE(1ull << 6); /* bits 0..2 must not be 110 */ |
3732 | (rwx_bits == 0x4 && !execonly)) | 3735 | if (!execonly) { |
3733 | rsvd_check->bad_mt_xwr |= (1ull << pte); | 3736 | /* bits 0..2 must not be 100 unless VMX capabilities allow it */ |
3737 | bad_mt_xwr |= REPEAT_BYTE(1ull << 4); | ||
3734 | } | 3738 | } |
3739 | rsvd_check->bad_mt_xwr = bad_mt_xwr; | ||
3735 | } | 3740 | } |
3736 | 3741 | ||
3737 | static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, | 3742 | static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index e4202e41d535..55ffb7b0f95e 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -56,13 +56,13 @@ void | |||
56 | reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); | 56 | reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); |
57 | 57 | ||
58 | /* | 58 | /* |
59 | * Return values of handle_mmio_page_fault_common: | 59 | * Return values of handle_mmio_page_fault: |
60 | * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction | 60 | * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction |
61 | * directly. | 61 | * directly. |
62 | * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page | 62 | * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page |
63 | * fault path update the mmio spte. | 63 | * fault path update the mmio spte. |
64 | * RET_MMIO_PF_RETRY: let CPU fault again on the address. | 64 | * RET_MMIO_PF_RETRY: let CPU fault again on the address. |
65 | * RET_MMIO_PF_BUG: bug is detected. | 65 | * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed). |
66 | */ | 66 | */ |
67 | enum { | 67 | enum { |
68 | RET_MMIO_PF_EMULATE = 1, | 68 | RET_MMIO_PF_EMULATE = 1, |
@@ -71,7 +71,7 @@ enum { | |||
71 | RET_MMIO_PF_BUG = -1 | 71 | RET_MMIO_PF_BUG = -1 |
72 | }; | 72 | }; |
73 | 73 | ||
74 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); | 74 | int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct); |
75 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); | 75 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); |
76 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); | 76 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); |
77 | 77 | ||
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 736e6ab8784d..3058a22a658d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -698,15 +698,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
698 | int r; | 698 | int r; |
699 | pfn_t pfn; | 699 | pfn_t pfn; |
700 | int level = PT_PAGE_TABLE_LEVEL; | 700 | int level = PT_PAGE_TABLE_LEVEL; |
701 | int force_pt_level; | 701 | bool force_pt_level = false; |
702 | unsigned long mmu_seq; | 702 | unsigned long mmu_seq; |
703 | bool map_writable, is_self_change_mapping; | 703 | bool map_writable, is_self_change_mapping; |
704 | 704 | ||
705 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); | 705 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
706 | 706 | ||
707 | if (unlikely(error_code & PFERR_RSVD_MASK)) { | 707 | if (unlikely(error_code & PFERR_RSVD_MASK)) { |
708 | r = handle_mmio_page_fault(vcpu, addr, error_code, | 708 | r = handle_mmio_page_fault(vcpu, addr, mmu_is_nested(vcpu)); |
709 | mmu_is_nested(vcpu)); | ||
710 | if (likely(r != RET_MMIO_PF_INVALID)) | 709 | if (likely(r != RET_MMIO_PF_INVALID)) |
711 | return r; | 710 | return r; |
712 | 711 | ||
@@ -743,15 +742,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
743 | is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, | 742 | is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, |
744 | &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable); | 743 | &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable); |
745 | 744 | ||
746 | if (walker.level >= PT_DIRECTORY_LEVEL) | 745 | if (walker.level >= PT_DIRECTORY_LEVEL && !is_self_change_mapping) { |
747 | force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn) | 746 | level = mapping_level(vcpu, walker.gfn, &force_pt_level); |
748 | || is_self_change_mapping; | 747 | if (likely(!force_pt_level)) { |
749 | else | 748 | level = min(walker.level, level); |
750 | force_pt_level = 1; | 749 | walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); |
751 | if (!force_pt_level) { | 750 | } |
752 | level = min(walker.level, mapping_level(vcpu, walker.gfn)); | 751 | } else |
753 | walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); | 752 | force_pt_level = true; |
754 | } | ||
755 | 753 | ||
756 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 754 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
757 | smp_rmb(); | 755 | smp_rmb(); |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2f9ed1ff0632..83a1c643f9a5 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -158,7 +158,8 @@ struct vcpu_svm { | |||
158 | unsigned long int3_rip; | 158 | unsigned long int3_rip; |
159 | u32 apf_reason; | 159 | u32 apf_reason; |
160 | 160 | ||
161 | u64 tsc_ratio; | 161 | /* cached guest cpuid flags for faster access */ |
162 | bool nrips_enabled : 1; | ||
162 | }; | 163 | }; |
163 | 164 | ||
164 | static DEFINE_PER_CPU(u64, current_tsc_ratio); | 165 | static DEFINE_PER_CPU(u64, current_tsc_ratio); |
@@ -211,7 +212,6 @@ static int nested_svm_intercept(struct vcpu_svm *svm); | |||
211 | static int nested_svm_vmexit(struct vcpu_svm *svm); | 212 | static int nested_svm_vmexit(struct vcpu_svm *svm); |
212 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 213 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
213 | bool has_error_code, u32 error_code); | 214 | bool has_error_code, u32 error_code); |
214 | static u64 __scale_tsc(u64 ratio, u64 tsc); | ||
215 | 215 | ||
216 | enum { | 216 | enum { |
217 | VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, | 217 | VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, |
@@ -891,20 +891,9 @@ static __init int svm_hardware_setup(void) | |||
891 | kvm_enable_efer_bits(EFER_FFXSR); | 891 | kvm_enable_efer_bits(EFER_FFXSR); |
892 | 892 | ||
893 | if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { | 893 | if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { |
894 | u64 max; | ||
895 | |||
896 | kvm_has_tsc_control = true; | 894 | kvm_has_tsc_control = true; |
897 | 895 | kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX; | |
898 | /* | 896 | kvm_tsc_scaling_ratio_frac_bits = 32; |
899 | * Make sure the user can only configure tsc_khz values that | ||
900 | * fit into a signed integer. | ||
901 | * A min value is not calculated needed because it will always | ||
902 | * be 1 on all machines and a value of 0 is used to disable | ||
903 | * tsc-scaling for the vcpu. | ||
904 | */ | ||
905 | max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX)); | ||
906 | |||
907 | kvm_max_guest_tsc_khz = max; | ||
908 | } | 897 | } |
909 | 898 | ||
910 | if (nested) { | 899 | if (nested) { |
@@ -968,68 +957,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) | |||
968 | seg->base = 0; | 957 | seg->base = 0; |
969 | } | 958 | } |
970 | 959 | ||
971 | static u64 __scale_tsc(u64 ratio, u64 tsc) | ||
972 | { | ||
973 | u64 mult, frac, _tsc; | ||
974 | |||
975 | mult = ratio >> 32; | ||
976 | frac = ratio & ((1ULL << 32) - 1); | ||
977 | |||
978 | _tsc = tsc; | ||
979 | _tsc *= mult; | ||
980 | _tsc += (tsc >> 32) * frac; | ||
981 | _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32; | ||
982 | |||
983 | return _tsc; | ||
984 | } | ||
985 | |||
986 | static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) | ||
987 | { | ||
988 | struct vcpu_svm *svm = to_svm(vcpu); | ||
989 | u64 _tsc = tsc; | ||
990 | |||
991 | if (svm->tsc_ratio != TSC_RATIO_DEFAULT) | ||
992 | _tsc = __scale_tsc(svm->tsc_ratio, tsc); | ||
993 | |||
994 | return _tsc; | ||
995 | } | ||
996 | |||
997 | static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) | ||
998 | { | ||
999 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1000 | u64 ratio; | ||
1001 | u64 khz; | ||
1002 | |||
1003 | /* Guest TSC same frequency as host TSC? */ | ||
1004 | if (!scale) { | ||
1005 | svm->tsc_ratio = TSC_RATIO_DEFAULT; | ||
1006 | return; | ||
1007 | } | ||
1008 | |||
1009 | /* TSC scaling supported? */ | ||
1010 | if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { | ||
1011 | if (user_tsc_khz > tsc_khz) { | ||
1012 | vcpu->arch.tsc_catchup = 1; | ||
1013 | vcpu->arch.tsc_always_catchup = 1; | ||
1014 | } else | ||
1015 | WARN(1, "user requested TSC rate below hardware speed\n"); | ||
1016 | return; | ||
1017 | } | ||
1018 | |||
1019 | khz = user_tsc_khz; | ||
1020 | |||
1021 | /* TSC scaling required - calculate ratio */ | ||
1022 | ratio = khz << 32; | ||
1023 | do_div(ratio, tsc_khz); | ||
1024 | |||
1025 | if (ratio == 0 || ratio & TSC_RATIO_RSVD) { | ||
1026 | WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n", | ||
1027 | user_tsc_khz); | ||
1028 | return; | ||
1029 | } | ||
1030 | svm->tsc_ratio = ratio; | ||
1031 | } | ||
1032 | |||
1033 | static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu) | 960 | static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu) |
1034 | { | 961 | { |
1035 | struct vcpu_svm *svm = to_svm(vcpu); | 962 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -1056,16 +983,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | |||
1056 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | 983 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); |
1057 | } | 984 | } |
1058 | 985 | ||
1059 | static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) | 986 | static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) |
1060 | { | 987 | { |
1061 | struct vcpu_svm *svm = to_svm(vcpu); | 988 | struct vcpu_svm *svm = to_svm(vcpu); |
1062 | 989 | ||
1063 | if (host) { | ||
1064 | if (svm->tsc_ratio != TSC_RATIO_DEFAULT) | ||
1065 | WARN_ON(adjustment < 0); | ||
1066 | adjustment = svm_scale_tsc(vcpu, (u64)adjustment); | ||
1067 | } | ||
1068 | |||
1069 | svm->vmcb->control.tsc_offset += adjustment; | 990 | svm->vmcb->control.tsc_offset += adjustment; |
1070 | if (is_guest_mode(vcpu)) | 991 | if (is_guest_mode(vcpu)) |
1071 | svm->nested.hsave->control.tsc_offset += adjustment; | 992 | svm->nested.hsave->control.tsc_offset += adjustment; |
@@ -1077,16 +998,7 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho | |||
1077 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | 998 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); |
1078 | } | 999 | } |
1079 | 1000 | ||
1080 | static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | 1001 | static void init_vmcb(struct vcpu_svm *svm) |
1081 | { | ||
1082 | u64 tsc; | ||
1083 | |||
1084 | tsc = svm_scale_tsc(vcpu, rdtsc()); | ||
1085 | |||
1086 | return target_tsc - tsc; | ||
1087 | } | ||
1088 | |||
1089 | static void init_vmcb(struct vcpu_svm *svm, bool init_event) | ||
1090 | { | 1002 | { |
1091 | struct vmcb_control_area *control = &svm->vmcb->control; | 1003 | struct vmcb_control_area *control = &svm->vmcb->control; |
1092 | struct vmcb_save_area *save = &svm->vmcb->save; | 1004 | struct vmcb_save_area *save = &svm->vmcb->save; |
@@ -1107,6 +1019,8 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) | |||
1107 | set_exception_intercept(svm, PF_VECTOR); | 1019 | set_exception_intercept(svm, PF_VECTOR); |
1108 | set_exception_intercept(svm, UD_VECTOR); | 1020 | set_exception_intercept(svm, UD_VECTOR); |
1109 | set_exception_intercept(svm, MC_VECTOR); | 1021 | set_exception_intercept(svm, MC_VECTOR); |
1022 | set_exception_intercept(svm, AC_VECTOR); | ||
1023 | set_exception_intercept(svm, DB_VECTOR); | ||
1110 | 1024 | ||
1111 | set_intercept(svm, INTERCEPT_INTR); | 1025 | set_intercept(svm, INTERCEPT_INTR); |
1112 | set_intercept(svm, INTERCEPT_NMI); | 1026 | set_intercept(svm, INTERCEPT_NMI); |
@@ -1157,8 +1071,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) | |||
1157 | init_sys_seg(&save->ldtr, SEG_TYPE_LDT); | 1071 | init_sys_seg(&save->ldtr, SEG_TYPE_LDT); |
1158 | init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); | 1072 | init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); |
1159 | 1073 | ||
1160 | if (!init_event) | 1074 | svm_set_efer(&svm->vcpu, 0); |
1161 | svm_set_efer(&svm->vcpu, 0); | ||
1162 | save->dr6 = 0xffff0ff0; | 1075 | save->dr6 = 0xffff0ff0; |
1163 | kvm_set_rflags(&svm->vcpu, 2); | 1076 | kvm_set_rflags(&svm->vcpu, 2); |
1164 | save->rip = 0x0000fff0; | 1077 | save->rip = 0x0000fff0; |
@@ -1212,7 +1125,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
1212 | if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) | 1125 | if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) |
1213 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; | 1126 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; |
1214 | } | 1127 | } |
1215 | init_vmcb(svm, init_event); | 1128 | init_vmcb(svm); |
1216 | 1129 | ||
1217 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); | 1130 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); |
1218 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); | 1131 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); |
@@ -1233,8 +1146,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
1233 | goto out; | 1146 | goto out; |
1234 | } | 1147 | } |
1235 | 1148 | ||
1236 | svm->tsc_ratio = TSC_RATIO_DEFAULT; | ||
1237 | |||
1238 | err = kvm_vcpu_init(&svm->vcpu, kvm, id); | 1149 | err = kvm_vcpu_init(&svm->vcpu, kvm, id); |
1239 | if (err) | 1150 | if (err) |
1240 | goto free_svm; | 1151 | goto free_svm; |
@@ -1268,7 +1179,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
1268 | clear_page(svm->vmcb); | 1179 | clear_page(svm->vmcb); |
1269 | svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; | 1180 | svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; |
1270 | svm->asid_generation = 0; | 1181 | svm->asid_generation = 0; |
1271 | init_vmcb(svm, false); | 1182 | init_vmcb(svm); |
1272 | 1183 | ||
1273 | svm_init_osvw(&svm->vcpu); | 1184 | svm_init_osvw(&svm->vcpu); |
1274 | 1185 | ||
@@ -1320,10 +1231,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1320 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 1231 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
1321 | rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 1232 | rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
1322 | 1233 | ||
1323 | if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && | 1234 | if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { |
1324 | svm->tsc_ratio != __this_cpu_read(current_tsc_ratio)) { | 1235 | u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio; |
1325 | __this_cpu_write(current_tsc_ratio, svm->tsc_ratio); | 1236 | if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) { |
1326 | wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); | 1237 | __this_cpu_write(current_tsc_ratio, tsc_ratio); |
1238 | wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio); | ||
1239 | } | ||
1327 | } | 1240 | } |
1328 | } | 1241 | } |
1329 | 1242 | ||
@@ -1642,20 +1555,13 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, | |||
1642 | mark_dirty(svm->vmcb, VMCB_SEG); | 1555 | mark_dirty(svm->vmcb, VMCB_SEG); |
1643 | } | 1556 | } |
1644 | 1557 | ||
1645 | static void update_db_bp_intercept(struct kvm_vcpu *vcpu) | 1558 | static void update_bp_intercept(struct kvm_vcpu *vcpu) |
1646 | { | 1559 | { |
1647 | struct vcpu_svm *svm = to_svm(vcpu); | 1560 | struct vcpu_svm *svm = to_svm(vcpu); |
1648 | 1561 | ||
1649 | clr_exception_intercept(svm, DB_VECTOR); | ||
1650 | clr_exception_intercept(svm, BP_VECTOR); | 1562 | clr_exception_intercept(svm, BP_VECTOR); |
1651 | 1563 | ||
1652 | if (svm->nmi_singlestep) | ||
1653 | set_exception_intercept(svm, DB_VECTOR); | ||
1654 | |||
1655 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | 1564 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { |
1656 | if (vcpu->guest_debug & | ||
1657 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) | ||
1658 | set_exception_intercept(svm, DB_VECTOR); | ||
1659 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | 1565 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) |
1660 | set_exception_intercept(svm, BP_VECTOR); | 1566 | set_exception_intercept(svm, BP_VECTOR); |
1661 | } else | 1567 | } else |
@@ -1761,7 +1667,6 @@ static int db_interception(struct vcpu_svm *svm) | |||
1761 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) | 1667 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) |
1762 | svm->vmcb->save.rflags &= | 1668 | svm->vmcb->save.rflags &= |
1763 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | 1669 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); |
1764 | update_db_bp_intercept(&svm->vcpu); | ||
1765 | } | 1670 | } |
1766 | 1671 | ||
1767 | if (svm->vcpu.guest_debug & | 1672 | if (svm->vcpu.guest_debug & |
@@ -1796,6 +1701,12 @@ static int ud_interception(struct vcpu_svm *svm) | |||
1796 | return 1; | 1701 | return 1; |
1797 | } | 1702 | } |
1798 | 1703 | ||
1704 | static int ac_interception(struct vcpu_svm *svm) | ||
1705 | { | ||
1706 | kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0); | ||
1707 | return 1; | ||
1708 | } | ||
1709 | |||
1799 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) | 1710 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) |
1800 | { | 1711 | { |
1801 | struct vcpu_svm *svm = to_svm(vcpu); | 1712 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -1890,7 +1801,7 @@ static int shutdown_interception(struct vcpu_svm *svm) | |||
1890 | * so reinitialize it. | 1801 | * so reinitialize it. |
1891 | */ | 1802 | */ |
1892 | clear_page(svm->vmcb); | 1803 | clear_page(svm->vmcb); |
1893 | init_vmcb(svm, false); | 1804 | init_vmcb(svm); |
1894 | 1805 | ||
1895 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | 1806 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; |
1896 | return 0; | 1807 | return 0; |
@@ -2365,7 +2276,9 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
2365 | nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; | 2276 | nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; |
2366 | nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; | 2277 | nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; |
2367 | nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; | 2278 | nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; |
2368 | nested_vmcb->control.next_rip = vmcb->control.next_rip; | 2279 | |
2280 | if (svm->nrips_enabled) | ||
2281 | nested_vmcb->control.next_rip = vmcb->control.next_rip; | ||
2369 | 2282 | ||
2370 | /* | 2283 | /* |
2371 | * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have | 2284 | * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have |
@@ -3060,7 +2973,7 @@ static int cr8_write_interception(struct vcpu_svm *svm) | |||
3060 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); | 2973 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); |
3061 | /* instruction emulation calls kvm_set_cr8() */ | 2974 | /* instruction emulation calls kvm_set_cr8() */ |
3062 | r = cr_interception(svm); | 2975 | r = cr_interception(svm); |
3063 | if (irqchip_in_kernel(svm->vcpu.kvm)) | 2976 | if (lapic_in_kernel(&svm->vcpu)) |
3064 | return r; | 2977 | return r; |
3065 | if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) | 2978 | if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) |
3066 | return r; | 2979 | return r; |
@@ -3071,8 +2984,7 @@ static int cr8_write_interception(struct vcpu_svm *svm) | |||
3071 | static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) | 2984 | static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) |
3072 | { | 2985 | { |
3073 | struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); | 2986 | struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); |
3074 | return vmcb->control.tsc_offset + | 2987 | return vmcb->control.tsc_offset + host_tsc; |
3075 | svm_scale_tsc(vcpu, host_tsc); | ||
3076 | } | 2988 | } |
3077 | 2989 | ||
3078 | static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | 2990 | static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
@@ -3082,7 +2994,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
3082 | switch (msr_info->index) { | 2994 | switch (msr_info->index) { |
3083 | case MSR_IA32_TSC: { | 2995 | case MSR_IA32_TSC: { |
3084 | msr_info->data = svm->vmcb->control.tsc_offset + | 2996 | msr_info->data = svm->vmcb->control.tsc_offset + |
3085 | svm_scale_tsc(vcpu, rdtsc()); | 2997 | kvm_scale_tsc(vcpu, rdtsc()); |
3086 | 2998 | ||
3087 | break; | 2999 | break; |
3088 | } | 3000 | } |
@@ -3294,24 +3206,11 @@ static int msr_interception(struct vcpu_svm *svm) | |||
3294 | 3206 | ||
3295 | static int interrupt_window_interception(struct vcpu_svm *svm) | 3207 | static int interrupt_window_interception(struct vcpu_svm *svm) |
3296 | { | 3208 | { |
3297 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
3298 | |||
3299 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); | 3209 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); |
3300 | svm_clear_vintr(svm); | 3210 | svm_clear_vintr(svm); |
3301 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | 3211 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; |
3302 | mark_dirty(svm->vmcb, VMCB_INTR); | 3212 | mark_dirty(svm->vmcb, VMCB_INTR); |
3303 | ++svm->vcpu.stat.irq_window_exits; | 3213 | ++svm->vcpu.stat.irq_window_exits; |
3304 | /* | ||
3305 | * If the user space waits to inject interrupts, exit as soon as | ||
3306 | * possible | ||
3307 | */ | ||
3308 | if (!irqchip_in_kernel(svm->vcpu.kvm) && | ||
3309 | kvm_run->request_interrupt_window && | ||
3310 | !kvm_cpu_has_interrupt(&svm->vcpu)) { | ||
3311 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | ||
3312 | return 0; | ||
3313 | } | ||
3314 | |||
3315 | return 1; | 3214 | return 1; |
3316 | } | 3215 | } |
3317 | 3216 | ||
@@ -3371,6 +3270,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
3371 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, | 3270 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, |
3372 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, | 3271 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, |
3373 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, | 3272 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, |
3273 | [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, | ||
3374 | [SVM_EXIT_INTR] = intr_interception, | 3274 | [SVM_EXIT_INTR] = intr_interception, |
3375 | [SVM_EXIT_NMI] = nmi_interception, | 3275 | [SVM_EXIT_NMI] = nmi_interception, |
3376 | [SVM_EXIT_SMI] = nop_on_interception, | 3276 | [SVM_EXIT_SMI] = nop_on_interception, |
@@ -3659,12 +3559,12 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | |||
3659 | return; | 3559 | return; |
3660 | } | 3560 | } |
3661 | 3561 | ||
3662 | static int svm_vm_has_apicv(struct kvm *kvm) | 3562 | static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu) |
3663 | { | 3563 | { |
3664 | return 0; | 3564 | return 0; |
3665 | } | 3565 | } |
3666 | 3566 | ||
3667 | static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | 3567 | static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu) |
3668 | { | 3568 | { |
3669 | return; | 3569 | return; |
3670 | } | 3570 | } |
@@ -3754,7 +3654,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
3754 | */ | 3654 | */ |
3755 | svm->nmi_singlestep = true; | 3655 | svm->nmi_singlestep = true; |
3756 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 3656 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
3757 | update_db_bp_intercept(vcpu); | ||
3758 | } | 3657 | } |
3759 | 3658 | ||
3760 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) | 3659 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) |
@@ -4098,6 +3997,10 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
4098 | 3997 | ||
4099 | static void svm_cpuid_update(struct kvm_vcpu *vcpu) | 3998 | static void svm_cpuid_update(struct kvm_vcpu *vcpu) |
4100 | { | 3999 | { |
4000 | struct vcpu_svm *svm = to_svm(vcpu); | ||
4001 | |||
4002 | /* Update nrips enabled cache */ | ||
4003 | svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu); | ||
4101 | } | 4004 | } |
4102 | 4005 | ||
4103 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | 4006 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) |
@@ -4376,7 +4279,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4376 | .vcpu_load = svm_vcpu_load, | 4279 | .vcpu_load = svm_vcpu_load, |
4377 | .vcpu_put = svm_vcpu_put, | 4280 | .vcpu_put = svm_vcpu_put, |
4378 | 4281 | ||
4379 | .update_db_bp_intercept = update_db_bp_intercept, | 4282 | .update_bp_intercept = update_bp_intercept, |
4380 | .get_msr = svm_get_msr, | 4283 | .get_msr = svm_get_msr, |
4381 | .set_msr = svm_set_msr, | 4284 | .set_msr = svm_set_msr, |
4382 | .get_segment_base = svm_get_segment_base, | 4285 | .get_segment_base = svm_get_segment_base, |
@@ -4425,7 +4328,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4425 | .enable_irq_window = enable_irq_window, | 4328 | .enable_irq_window = enable_irq_window, |
4426 | .update_cr8_intercept = update_cr8_intercept, | 4329 | .update_cr8_intercept = update_cr8_intercept, |
4427 | .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, | 4330 | .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, |
4428 | .vm_has_apicv = svm_vm_has_apicv, | 4331 | .cpu_uses_apicv = svm_cpu_uses_apicv, |
4429 | .load_eoi_exitmap = svm_load_eoi_exitmap, | 4332 | .load_eoi_exitmap = svm_load_eoi_exitmap, |
4430 | .sync_pir_to_irr = svm_sync_pir_to_irr, | 4333 | .sync_pir_to_irr = svm_sync_pir_to_irr, |
4431 | 4334 | ||
@@ -4448,11 +4351,9 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4448 | 4351 | ||
4449 | .has_wbinvd_exit = svm_has_wbinvd_exit, | 4352 | .has_wbinvd_exit = svm_has_wbinvd_exit, |
4450 | 4353 | ||
4451 | .set_tsc_khz = svm_set_tsc_khz, | ||
4452 | .read_tsc_offset = svm_read_tsc_offset, | 4354 | .read_tsc_offset = svm_read_tsc_offset, |
4453 | .write_tsc_offset = svm_write_tsc_offset, | 4355 | .write_tsc_offset = svm_write_tsc_offset, |
4454 | .adjust_tsc_offset = svm_adjust_tsc_offset, | 4356 | .adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest, |
4455 | .compute_tsc_offset = svm_compute_tsc_offset, | ||
4456 | .read_l1_tsc = svm_read_l1_tsc, | 4357 | .read_l1_tsc = svm_read_l1_tsc, |
4457 | 4358 | ||
4458 | .set_tdp_cr3 = set_tdp_cr3, | 4359 | .set_tdp_cr3 = set_tdp_cr3, |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 4eae7c35ddf5..120302511802 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -129,6 +129,24 @@ TRACE_EVENT(kvm_pio, | |||
129 | ); | 129 | ); |
130 | 130 | ||
131 | /* | 131 | /* |
132 | * Tracepoint for fast mmio. | ||
133 | */ | ||
134 | TRACE_EVENT(kvm_fast_mmio, | ||
135 | TP_PROTO(u64 gpa), | ||
136 | TP_ARGS(gpa), | ||
137 | |||
138 | TP_STRUCT__entry( | ||
139 | __field(u64, gpa) | ||
140 | ), | ||
141 | |||
142 | TP_fast_assign( | ||
143 | __entry->gpa = gpa; | ||
144 | ), | ||
145 | |||
146 | TP_printk("fast mmio at gpa 0x%llx", __entry->gpa) | ||
147 | ); | ||
148 | |||
149 | /* | ||
132 | * Tracepoint for cpuid. | 150 | * Tracepoint for cpuid. |
133 | */ | 151 | */ |
134 | TRACE_EVENT(kvm_cpuid, | 152 | TRACE_EVENT(kvm_cpuid, |
@@ -974,6 +992,39 @@ TRACE_EVENT(kvm_enter_smm, | |||
974 | __entry->smbase) | 992 | __entry->smbase) |
975 | ); | 993 | ); |
976 | 994 | ||
995 | /* | ||
996 | * Tracepoint for VT-d posted-interrupts. | ||
997 | */ | ||
998 | TRACE_EVENT(kvm_pi_irte_update, | ||
999 | TP_PROTO(unsigned int vcpu_id, unsigned int gsi, | ||
1000 | unsigned int gvec, u64 pi_desc_addr, bool set), | ||
1001 | TP_ARGS(vcpu_id, gsi, gvec, pi_desc_addr, set), | ||
1002 | |||
1003 | TP_STRUCT__entry( | ||
1004 | __field( unsigned int, vcpu_id ) | ||
1005 | __field( unsigned int, gsi ) | ||
1006 | __field( unsigned int, gvec ) | ||
1007 | __field( u64, pi_desc_addr ) | ||
1008 | __field( bool, set ) | ||
1009 | ), | ||
1010 | |||
1011 | TP_fast_assign( | ||
1012 | __entry->vcpu_id = vcpu_id; | ||
1013 | __entry->gsi = gsi; | ||
1014 | __entry->gvec = gvec; | ||
1015 | __entry->pi_desc_addr = pi_desc_addr; | ||
1016 | __entry->set = set; | ||
1017 | ), | ||
1018 | |||
1019 | TP_printk("VT-d PI is %s for this irq, vcpu %u, gsi: 0x%x, " | ||
1020 | "gvec: 0x%x, pi_desc_addr: 0x%llx", | ||
1021 | __entry->set ? "enabled and being updated" : "disabled", | ||
1022 | __entry->vcpu_id, | ||
1023 | __entry->gsi, | ||
1024 | __entry->gvec, | ||
1025 | __entry->pi_desc_addr) | ||
1026 | ); | ||
1027 | |||
977 | #endif /* _TRACE_KVM_H */ | 1028 | #endif /* _TRACE_KVM_H */ |
978 | 1029 | ||
979 | #undef TRACE_INCLUDE_PATH | 1030 | #undef TRACE_INCLUDE_PATH |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6a8bc64566ab..af823a388c19 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include "kvm_cache_regs.h" | 35 | #include "kvm_cache_regs.h" |
36 | #include "x86.h" | 36 | #include "x86.h" |
37 | 37 | ||
38 | #include <asm/cpu.h> | ||
38 | #include <asm/io.h> | 39 | #include <asm/io.h> |
39 | #include <asm/desc.h> | 40 | #include <asm/desc.h> |
40 | #include <asm/vmx.h> | 41 | #include <asm/vmx.h> |
@@ -45,6 +46,7 @@ | |||
45 | #include <asm/debugreg.h> | 46 | #include <asm/debugreg.h> |
46 | #include <asm/kexec.h> | 47 | #include <asm/kexec.h> |
47 | #include <asm/apic.h> | 48 | #include <asm/apic.h> |
49 | #include <asm/irq_remapping.h> | ||
48 | 50 | ||
49 | #include "trace.h" | 51 | #include "trace.h" |
50 | #include "pmu.h" | 52 | #include "pmu.h" |
@@ -105,6 +107,8 @@ static u64 __read_mostly host_xss; | |||
105 | static bool __read_mostly enable_pml = 1; | 107 | static bool __read_mostly enable_pml = 1; |
106 | module_param_named(pml, enable_pml, bool, S_IRUGO); | 108 | module_param_named(pml, enable_pml, bool, S_IRUGO); |
107 | 109 | ||
110 | #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL | ||
111 | |||
108 | #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) | 112 | #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) |
109 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) | 113 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) |
110 | #define KVM_VM_CR0_ALWAYS_ON \ | 114 | #define KVM_VM_CR0_ALWAYS_ON \ |
@@ -424,6 +428,9 @@ struct nested_vmx { | |||
424 | /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ | 428 | /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ |
425 | u64 vmcs01_debugctl; | 429 | u64 vmcs01_debugctl; |
426 | 430 | ||
431 | u16 vpid02; | ||
432 | u16 last_vpid; | ||
433 | |||
427 | u32 nested_vmx_procbased_ctls_low; | 434 | u32 nested_vmx_procbased_ctls_low; |
428 | u32 nested_vmx_procbased_ctls_high; | 435 | u32 nested_vmx_procbased_ctls_high; |
429 | u32 nested_vmx_true_procbased_ctls_low; | 436 | u32 nested_vmx_true_procbased_ctls_low; |
@@ -440,14 +447,33 @@ struct nested_vmx { | |||
440 | u32 nested_vmx_misc_low; | 447 | u32 nested_vmx_misc_low; |
441 | u32 nested_vmx_misc_high; | 448 | u32 nested_vmx_misc_high; |
442 | u32 nested_vmx_ept_caps; | 449 | u32 nested_vmx_ept_caps; |
450 | u32 nested_vmx_vpid_caps; | ||
443 | }; | 451 | }; |
444 | 452 | ||
445 | #define POSTED_INTR_ON 0 | 453 | #define POSTED_INTR_ON 0 |
454 | #define POSTED_INTR_SN 1 | ||
455 | |||
446 | /* Posted-Interrupt Descriptor */ | 456 | /* Posted-Interrupt Descriptor */ |
447 | struct pi_desc { | 457 | struct pi_desc { |
448 | u32 pir[8]; /* Posted interrupt requested */ | 458 | u32 pir[8]; /* Posted interrupt requested */ |
449 | u32 control; /* bit 0 of control is outstanding notification bit */ | 459 | union { |
450 | u32 rsvd[7]; | 460 | struct { |
461 | /* bit 256 - Outstanding Notification */ | ||
462 | u16 on : 1, | ||
463 | /* bit 257 - Suppress Notification */ | ||
464 | sn : 1, | ||
465 | /* bit 271:258 - Reserved */ | ||
466 | rsvd_1 : 14; | ||
467 | /* bit 279:272 - Notification Vector */ | ||
468 | u8 nv; | ||
469 | /* bit 287:280 - Reserved */ | ||
470 | u8 rsvd_2; | ||
471 | /* bit 319:288 - Notification Destination */ | ||
472 | u32 ndst; | ||
473 | }; | ||
474 | u64 control; | ||
475 | }; | ||
476 | u32 rsvd[6]; | ||
451 | } __aligned(64); | 477 | } __aligned(64); |
452 | 478 | ||
453 | static bool pi_test_and_set_on(struct pi_desc *pi_desc) | 479 | static bool pi_test_and_set_on(struct pi_desc *pi_desc) |
@@ -467,6 +493,30 @@ static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) | |||
467 | return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); | 493 | return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); |
468 | } | 494 | } |
469 | 495 | ||
496 | static inline void pi_clear_sn(struct pi_desc *pi_desc) | ||
497 | { | ||
498 | return clear_bit(POSTED_INTR_SN, | ||
499 | (unsigned long *)&pi_desc->control); | ||
500 | } | ||
501 | |||
502 | static inline void pi_set_sn(struct pi_desc *pi_desc) | ||
503 | { | ||
504 | return set_bit(POSTED_INTR_SN, | ||
505 | (unsigned long *)&pi_desc->control); | ||
506 | } | ||
507 | |||
508 | static inline int pi_test_on(struct pi_desc *pi_desc) | ||
509 | { | ||
510 | return test_bit(POSTED_INTR_ON, | ||
511 | (unsigned long *)&pi_desc->control); | ||
512 | } | ||
513 | |||
514 | static inline int pi_test_sn(struct pi_desc *pi_desc) | ||
515 | { | ||
516 | return test_bit(POSTED_INTR_SN, | ||
517 | (unsigned long *)&pi_desc->control); | ||
518 | } | ||
519 | |||
470 | struct vcpu_vmx { | 520 | struct vcpu_vmx { |
471 | struct kvm_vcpu vcpu; | 521 | struct kvm_vcpu vcpu; |
472 | unsigned long host_rsp; | 522 | unsigned long host_rsp; |
@@ -532,8 +582,6 @@ struct vcpu_vmx { | |||
532 | s64 vnmi_blocked_time; | 582 | s64 vnmi_blocked_time; |
533 | u32 exit_reason; | 583 | u32 exit_reason; |
534 | 584 | ||
535 | bool rdtscp_enabled; | ||
536 | |||
537 | /* Posted interrupt descriptor */ | 585 | /* Posted interrupt descriptor */ |
538 | struct pi_desc pi_desc; | 586 | struct pi_desc pi_desc; |
539 | 587 | ||
@@ -563,6 +611,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
563 | return container_of(vcpu, struct vcpu_vmx, vcpu); | 611 | return container_of(vcpu, struct vcpu_vmx, vcpu); |
564 | } | 612 | } |
565 | 613 | ||
614 | static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) | ||
615 | { | ||
616 | return &(to_vmx(vcpu)->pi_desc); | ||
617 | } | ||
618 | |||
566 | #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x) | 619 | #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x) |
567 | #define FIELD(number, name) [number] = VMCS12_OFFSET(name) | 620 | #define FIELD(number, name) [number] = VMCS12_OFFSET(name) |
568 | #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ | 621 | #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ |
@@ -809,7 +862,7 @@ static void kvm_cpu_vmxon(u64 addr); | |||
809 | static void kvm_cpu_vmxoff(void); | 862 | static void kvm_cpu_vmxoff(void); |
810 | static bool vmx_mpx_supported(void); | 863 | static bool vmx_mpx_supported(void); |
811 | static bool vmx_xsaves_supported(void); | 864 | static bool vmx_xsaves_supported(void); |
812 | static int vmx_vm_has_apicv(struct kvm *kvm); | 865 | static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu); |
813 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | 866 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); |
814 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | 867 | static void vmx_set_segment(struct kvm_vcpu *vcpu, |
815 | struct kvm_segment *var, int seg); | 868 | struct kvm_segment *var, int seg); |
@@ -831,6 +884,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | |||
831 | static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); | 884 | static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); |
832 | static DEFINE_PER_CPU(struct desc_ptr, host_gdt); | 885 | static DEFINE_PER_CPU(struct desc_ptr, host_gdt); |
833 | 886 | ||
887 | /* | ||
888 | * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we | ||
889 | * can find which vCPU should be waken up. | ||
890 | */ | ||
891 | static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); | ||
892 | static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); | ||
893 | |||
834 | static unsigned long *vmx_io_bitmap_a; | 894 | static unsigned long *vmx_io_bitmap_a; |
835 | static unsigned long *vmx_io_bitmap_b; | 895 | static unsigned long *vmx_io_bitmap_b; |
836 | static unsigned long *vmx_msr_bitmap_legacy; | 896 | static unsigned long *vmx_msr_bitmap_legacy; |
@@ -946,9 +1006,9 @@ static inline bool cpu_has_vmx_tpr_shadow(void) | |||
946 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; | 1006 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; |
947 | } | 1007 | } |
948 | 1008 | ||
949 | static inline bool vm_need_tpr_shadow(struct kvm *kvm) | 1009 | static inline bool cpu_need_tpr_shadow(struct kvm_vcpu *vcpu) |
950 | { | 1010 | { |
951 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); | 1011 | return cpu_has_vmx_tpr_shadow() && lapic_in_kernel(vcpu); |
952 | } | 1012 | } |
953 | 1013 | ||
954 | static inline bool cpu_has_secondary_exec_ctrls(void) | 1014 | static inline bool cpu_has_secondary_exec_ctrls(void) |
@@ -983,7 +1043,8 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) | |||
983 | 1043 | ||
984 | static inline bool cpu_has_vmx_posted_intr(void) | 1044 | static inline bool cpu_has_vmx_posted_intr(void) |
985 | { | 1045 | { |
986 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; | 1046 | return IS_ENABLED(CONFIG_X86_LOCAL_APIC) && |
1047 | vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; | ||
987 | } | 1048 | } |
988 | 1049 | ||
989 | static inline bool cpu_has_vmx_apicv(void) | 1050 | static inline bool cpu_has_vmx_apicv(void) |
@@ -1062,9 +1123,9 @@ static inline bool cpu_has_vmx_ple(void) | |||
1062 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 1123 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
1063 | } | 1124 | } |
1064 | 1125 | ||
1065 | static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) | 1126 | static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu) |
1066 | { | 1127 | { |
1067 | return flexpriority_enabled && irqchip_in_kernel(kvm); | 1128 | return flexpriority_enabled && lapic_in_kernel(vcpu); |
1068 | } | 1129 | } |
1069 | 1130 | ||
1070 | static inline bool cpu_has_vmx_vpid(void) | 1131 | static inline bool cpu_has_vmx_vpid(void) |
@@ -1113,6 +1174,12 @@ static inline bool cpu_has_vmx_pml(void) | |||
1113 | return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML; | 1174 | return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML; |
1114 | } | 1175 | } |
1115 | 1176 | ||
1177 | static inline bool cpu_has_vmx_tsc_scaling(void) | ||
1178 | { | ||
1179 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
1180 | SECONDARY_EXEC_TSC_SCALING; | ||
1181 | } | ||
1182 | |||
1116 | static inline bool report_flexpriority(void) | 1183 | static inline bool report_flexpriority(void) |
1117 | { | 1184 | { |
1118 | return flexpriority_enabled; | 1185 | return flexpriority_enabled; |
@@ -1157,6 +1224,11 @@ static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12) | |||
1157 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); | 1224 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); |
1158 | } | 1225 | } |
1159 | 1226 | ||
1227 | static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12) | ||
1228 | { | ||
1229 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID); | ||
1230 | } | ||
1231 | |||
1160 | static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12) | 1232 | static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12) |
1161 | { | 1233 | { |
1162 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT); | 1234 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT); |
@@ -1337,13 +1409,13 @@ static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) | |||
1337 | __loaded_vmcs_clear, loaded_vmcs, 1); | 1409 | __loaded_vmcs_clear, loaded_vmcs, 1); |
1338 | } | 1410 | } |
1339 | 1411 | ||
1340 | static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx) | 1412 | static inline void vpid_sync_vcpu_single(int vpid) |
1341 | { | 1413 | { |
1342 | if (vmx->vpid == 0) | 1414 | if (vpid == 0) |
1343 | return; | 1415 | return; |
1344 | 1416 | ||
1345 | if (cpu_has_vmx_invvpid_single()) | 1417 | if (cpu_has_vmx_invvpid_single()) |
1346 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); | 1418 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0); |
1347 | } | 1419 | } |
1348 | 1420 | ||
1349 | static inline void vpid_sync_vcpu_global(void) | 1421 | static inline void vpid_sync_vcpu_global(void) |
@@ -1352,10 +1424,10 @@ static inline void vpid_sync_vcpu_global(void) | |||
1352 | __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); | 1424 | __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); |
1353 | } | 1425 | } |
1354 | 1426 | ||
1355 | static inline void vpid_sync_context(struct vcpu_vmx *vmx) | 1427 | static inline void vpid_sync_context(int vpid) |
1356 | { | 1428 | { |
1357 | if (cpu_has_vmx_invvpid_single()) | 1429 | if (cpu_has_vmx_invvpid_single()) |
1358 | vpid_sync_vcpu_single(vmx); | 1430 | vpid_sync_vcpu_single(vpid); |
1359 | else | 1431 | else |
1360 | vpid_sync_vcpu_global(); | 1432 | vpid_sync_vcpu_global(); |
1361 | } | 1433 | } |
@@ -1567,7 +1639,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
1567 | u32 eb; | 1639 | u32 eb; |
1568 | 1640 | ||
1569 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | | 1641 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | |
1570 | (1u << NM_VECTOR) | (1u << DB_VECTOR); | 1642 | (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); |
1571 | if ((vcpu->guest_debug & | 1643 | if ((vcpu->guest_debug & |
1572 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == | 1644 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == |
1573 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) | 1645 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) |
@@ -1895,6 +1967,52 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) | |||
1895 | preempt_enable(); | 1967 | preempt_enable(); |
1896 | } | 1968 | } |
1897 | 1969 | ||
1970 | static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) | ||
1971 | { | ||
1972 | struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); | ||
1973 | struct pi_desc old, new; | ||
1974 | unsigned int dest; | ||
1975 | |||
1976 | if (!kvm_arch_has_assigned_device(vcpu->kvm) || | ||
1977 | !irq_remapping_cap(IRQ_POSTING_CAP)) | ||
1978 | return; | ||
1979 | |||
1980 | do { | ||
1981 | old.control = new.control = pi_desc->control; | ||
1982 | |||
1983 | /* | ||
1984 | * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there | ||
1985 | * are two possible cases: | ||
1986 | * 1. After running 'pre_block', context switch | ||
1987 | * happened. For this case, 'sn' was set in | ||
1988 | * vmx_vcpu_put(), so we need to clear it here. | ||
1989 | * 2. After running 'pre_block', we were blocked, | ||
1990 | * and woken up by some other guy. For this case, | ||
1991 | * we don't need to do anything, 'pi_post_block' | ||
1992 | * will do everything for us. However, we cannot | ||
1993 | * check whether it is case #1 or case #2 here | ||
1994 | * (maybe, not needed), so we also clear sn here, | ||
1995 | * I think it is not a big deal. | ||
1996 | */ | ||
1997 | if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) { | ||
1998 | if (vcpu->cpu != cpu) { | ||
1999 | dest = cpu_physical_id(cpu); | ||
2000 | |||
2001 | if (x2apic_enabled()) | ||
2002 | new.ndst = dest; | ||
2003 | else | ||
2004 | new.ndst = (dest << 8) & 0xFF00; | ||
2005 | } | ||
2006 | |||
2007 | /* set 'NV' to 'notification vector' */ | ||
2008 | new.nv = POSTED_INTR_VECTOR; | ||
2009 | } | ||
2010 | |||
2011 | /* Allow posting non-urgent interrupts */ | ||
2012 | new.sn = 0; | ||
2013 | } while (cmpxchg(&pi_desc->control, old.control, | ||
2014 | new.control) != old.control); | ||
2015 | } | ||
1898 | /* | 2016 | /* |
1899 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes | 2017 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes |
1900 | * vcpu mutex is already taken. | 2018 | * vcpu mutex is already taken. |
@@ -1943,12 +2061,35 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1943 | 2061 | ||
1944 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); | 2062 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); |
1945 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ | 2063 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ |
2064 | |||
2065 | /* Setup TSC multiplier */ | ||
2066 | if (cpu_has_vmx_tsc_scaling()) | ||
2067 | vmcs_write64(TSC_MULTIPLIER, | ||
2068 | vcpu->arch.tsc_scaling_ratio); | ||
2069 | |||
1946 | vmx->loaded_vmcs->cpu = cpu; | 2070 | vmx->loaded_vmcs->cpu = cpu; |
1947 | } | 2071 | } |
2072 | |||
2073 | vmx_vcpu_pi_load(vcpu, cpu); | ||
2074 | } | ||
2075 | |||
2076 | static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) | ||
2077 | { | ||
2078 | struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); | ||
2079 | |||
2080 | if (!kvm_arch_has_assigned_device(vcpu->kvm) || | ||
2081 | !irq_remapping_cap(IRQ_POSTING_CAP)) | ||
2082 | return; | ||
2083 | |||
2084 | /* Set SN when the vCPU is preempted */ | ||
2085 | if (vcpu->preempted) | ||
2086 | pi_set_sn(pi_desc); | ||
1948 | } | 2087 | } |
1949 | 2088 | ||
1950 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | 2089 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) |
1951 | { | 2090 | { |
2091 | vmx_vcpu_pi_put(vcpu); | ||
2092 | |||
1952 | __vmx_load_host_state(to_vmx(vcpu)); | 2093 | __vmx_load_host_state(to_vmx(vcpu)); |
1953 | if (!vmm_exclusive) { | 2094 | if (!vmm_exclusive) { |
1954 | __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs); | 2095 | __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs); |
@@ -2207,7 +2348,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
2207 | if (index >= 0) | 2348 | if (index >= 0) |
2208 | move_msr_up(vmx, index, save_nmsrs++); | 2349 | move_msr_up(vmx, index, save_nmsrs++); |
2209 | index = __find_msr_index(vmx, MSR_TSC_AUX); | 2350 | index = __find_msr_index(vmx, MSR_TSC_AUX); |
2210 | if (index >= 0 && vmx->rdtscp_enabled) | 2351 | if (index >= 0 && guest_cpuid_has_rdtscp(&vmx->vcpu)) |
2211 | move_msr_up(vmx, index, save_nmsrs++); | 2352 | move_msr_up(vmx, index, save_nmsrs++); |
2212 | /* | 2353 | /* |
2213 | * MSR_STAR is only needed on long mode guests, and only | 2354 | * MSR_STAR is only needed on long mode guests, and only |
@@ -2230,15 +2371,16 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
2230 | 2371 | ||
2231 | /* | 2372 | /* |
2232 | * reads and returns guest's timestamp counter "register" | 2373 | * reads and returns guest's timestamp counter "register" |
2233 | * guest_tsc = host_tsc + tsc_offset -- 21.3 | 2374 | * guest_tsc = (host_tsc * tsc multiplier) >> 48 + tsc_offset |
2375 | * -- Intel TSC Scaling for Virtualization White Paper, sec 1.3 | ||
2234 | */ | 2376 | */ |
2235 | static u64 guest_read_tsc(void) | 2377 | static u64 guest_read_tsc(struct kvm_vcpu *vcpu) |
2236 | { | 2378 | { |
2237 | u64 host_tsc, tsc_offset; | 2379 | u64 host_tsc, tsc_offset; |
2238 | 2380 | ||
2239 | host_tsc = rdtsc(); | 2381 | host_tsc = rdtsc(); |
2240 | tsc_offset = vmcs_read64(TSC_OFFSET); | 2382 | tsc_offset = vmcs_read64(TSC_OFFSET); |
2241 | return host_tsc + tsc_offset; | 2383 | return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset; |
2242 | } | 2384 | } |
2243 | 2385 | ||
2244 | /* | 2386 | /* |
@@ -2255,22 +2397,6 @@ static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) | |||
2255 | return host_tsc + tsc_offset; | 2397 | return host_tsc + tsc_offset; |
2256 | } | 2398 | } |
2257 | 2399 | ||
2258 | /* | ||
2259 | * Engage any workarounds for mis-matched TSC rates. Currently limited to | ||
2260 | * software catchup for faster rates on slower CPUs. | ||
2261 | */ | ||
2262 | static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) | ||
2263 | { | ||
2264 | if (!scale) | ||
2265 | return; | ||
2266 | |||
2267 | if (user_tsc_khz > tsc_khz) { | ||
2268 | vcpu->arch.tsc_catchup = 1; | ||
2269 | vcpu->arch.tsc_always_catchup = 1; | ||
2270 | } else | ||
2271 | WARN(1, "user requested TSC rate below hardware speed\n"); | ||
2272 | } | ||
2273 | |||
2274 | static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu) | 2400 | static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu) |
2275 | { | 2401 | { |
2276 | return vmcs_read64(TSC_OFFSET); | 2402 | return vmcs_read64(TSC_OFFSET); |
@@ -2302,7 +2428,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | |||
2302 | } | 2428 | } |
2303 | } | 2429 | } |
2304 | 2430 | ||
2305 | static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) | 2431 | static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) |
2306 | { | 2432 | { |
2307 | u64 offset = vmcs_read64(TSC_OFFSET); | 2433 | u64 offset = vmcs_read64(TSC_OFFSET); |
2308 | 2434 | ||
@@ -2315,11 +2441,6 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho | |||
2315 | offset + adjustment); | 2441 | offset + adjustment); |
2316 | } | 2442 | } |
2317 | 2443 | ||
2318 | static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | ||
2319 | { | ||
2320 | return target_tsc - rdtsc(); | ||
2321 | } | ||
2322 | |||
2323 | static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) | 2444 | static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) |
2324 | { | 2445 | { |
2325 | struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); | 2446 | struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); |
@@ -2377,7 +2498,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
2377 | vmx->nested.nested_vmx_pinbased_ctls_high |= | 2498 | vmx->nested.nested_vmx_pinbased_ctls_high |= |
2378 | PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | | 2499 | PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | |
2379 | PIN_BASED_VMX_PREEMPTION_TIMER; | 2500 | PIN_BASED_VMX_PREEMPTION_TIMER; |
2380 | if (vmx_vm_has_apicv(vmx->vcpu.kvm)) | 2501 | if (vmx_cpu_uses_apicv(&vmx->vcpu)) |
2381 | vmx->nested.nested_vmx_pinbased_ctls_high |= | 2502 | vmx->nested.nested_vmx_pinbased_ctls_high |= |
2382 | PIN_BASED_POSTED_INTR; | 2503 | PIN_BASED_POSTED_INTR; |
2383 | 2504 | ||
@@ -2471,10 +2592,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
2471 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2592 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2472 | SECONDARY_EXEC_RDTSCP | | 2593 | SECONDARY_EXEC_RDTSCP | |
2473 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | 2594 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | |
2595 | SECONDARY_EXEC_ENABLE_VPID | | ||
2474 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | 2596 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
2475 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 2597 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
2476 | SECONDARY_EXEC_WBINVD_EXITING | | 2598 | SECONDARY_EXEC_WBINVD_EXITING | |
2477 | SECONDARY_EXEC_XSAVES; | 2599 | SECONDARY_EXEC_XSAVES | |
2600 | SECONDARY_EXEC_PCOMMIT; | ||
2478 | 2601 | ||
2479 | if (enable_ept) { | 2602 | if (enable_ept) { |
2480 | /* nested EPT: emulate EPT also to L1 */ | 2603 | /* nested EPT: emulate EPT also to L1 */ |
@@ -2493,6 +2616,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
2493 | } else | 2616 | } else |
2494 | vmx->nested.nested_vmx_ept_caps = 0; | 2617 | vmx->nested.nested_vmx_ept_caps = 0; |
2495 | 2618 | ||
2619 | if (enable_vpid) | ||
2620 | vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | | ||
2621 | VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; | ||
2622 | else | ||
2623 | vmx->nested.nested_vmx_vpid_caps = 0; | ||
2624 | |||
2496 | if (enable_unrestricted_guest) | 2625 | if (enable_unrestricted_guest) |
2497 | vmx->nested.nested_vmx_secondary_ctls_high |= | 2626 | vmx->nested.nested_vmx_secondary_ctls_high |= |
2498 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 2627 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
@@ -2608,7 +2737,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2608 | break; | 2737 | break; |
2609 | case MSR_IA32_VMX_EPT_VPID_CAP: | 2738 | case MSR_IA32_VMX_EPT_VPID_CAP: |
2610 | /* Currently, no nested vpid support */ | 2739 | /* Currently, no nested vpid support */ |
2611 | *pdata = vmx->nested.nested_vmx_ept_caps; | 2740 | *pdata = vmx->nested.nested_vmx_ept_caps | |
2741 | ((u64)vmx->nested.nested_vmx_vpid_caps << 32); | ||
2612 | break; | 2742 | break; |
2613 | default: | 2743 | default: |
2614 | return 1; | 2744 | return 1; |
@@ -2642,7 +2772,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2642 | case MSR_EFER: | 2772 | case MSR_EFER: |
2643 | return kvm_get_msr_common(vcpu, msr_info); | 2773 | return kvm_get_msr_common(vcpu, msr_info); |
2644 | case MSR_IA32_TSC: | 2774 | case MSR_IA32_TSC: |
2645 | msr_info->data = guest_read_tsc(); | 2775 | msr_info->data = guest_read_tsc(vcpu); |
2646 | break; | 2776 | break; |
2647 | case MSR_IA32_SYSENTER_CS: | 2777 | case MSR_IA32_SYSENTER_CS: |
2648 | msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); | 2778 | msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); |
@@ -2673,7 +2803,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2673 | msr_info->data = vcpu->arch.ia32_xss; | 2803 | msr_info->data = vcpu->arch.ia32_xss; |
2674 | break; | 2804 | break; |
2675 | case MSR_TSC_AUX: | 2805 | case MSR_TSC_AUX: |
2676 | if (!to_vmx(vcpu)->rdtscp_enabled) | 2806 | if (!guest_cpuid_has_rdtscp(vcpu)) |
2677 | return 1; | 2807 | return 1; |
2678 | /* Otherwise falls through */ | 2808 | /* Otherwise falls through */ |
2679 | default: | 2809 | default: |
@@ -2779,7 +2909,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2779 | clear_atomic_switch_msr(vmx, MSR_IA32_XSS); | 2909 | clear_atomic_switch_msr(vmx, MSR_IA32_XSS); |
2780 | break; | 2910 | break; |
2781 | case MSR_TSC_AUX: | 2911 | case MSR_TSC_AUX: |
2782 | if (!vmx->rdtscp_enabled) | 2912 | if (!guest_cpuid_has_rdtscp(vcpu)) |
2783 | return 1; | 2913 | return 1; |
2784 | /* Check reserved bit, higher 32 bits should be zero */ | 2914 | /* Check reserved bit, higher 32 bits should be zero */ |
2785 | if ((data >> 32) != 0) | 2915 | if ((data >> 32) != 0) |
@@ -2874,6 +3004,8 @@ static int hardware_enable(void) | |||
2874 | return -EBUSY; | 3004 | return -EBUSY; |
2875 | 3005 | ||
2876 | INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); | 3006 | INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); |
3007 | INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); | ||
3008 | spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); | ||
2877 | 3009 | ||
2878 | /* | 3010 | /* |
2879 | * Now we can enable the vmclear operation in kdump | 3011 | * Now we can enable the vmclear operation in kdump |
@@ -3015,7 +3147,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
3015 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 3147 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
3016 | SECONDARY_EXEC_SHADOW_VMCS | | 3148 | SECONDARY_EXEC_SHADOW_VMCS | |
3017 | SECONDARY_EXEC_XSAVES | | 3149 | SECONDARY_EXEC_XSAVES | |
3018 | SECONDARY_EXEC_ENABLE_PML; | 3150 | SECONDARY_EXEC_ENABLE_PML | |
3151 | SECONDARY_EXEC_PCOMMIT | | ||
3152 | SECONDARY_EXEC_TSC_SCALING; | ||
3019 | if (adjust_vmx_controls(min2, opt2, | 3153 | if (adjust_vmx_controls(min2, opt2, |
3020 | MSR_IA32_VMX_PROCBASED_CTLS2, | 3154 | MSR_IA32_VMX_PROCBASED_CTLS2, |
3021 | &_cpu_based_2nd_exec_control) < 0) | 3155 | &_cpu_based_2nd_exec_control) < 0) |
@@ -3441,9 +3575,9 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
3441 | 3575 | ||
3442 | #endif | 3576 | #endif |
3443 | 3577 | ||
3444 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | 3578 | static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid) |
3445 | { | 3579 | { |
3446 | vpid_sync_context(to_vmx(vcpu)); | 3580 | vpid_sync_context(vpid); |
3447 | if (enable_ept) { | 3581 | if (enable_ept) { |
3448 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 3582 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
3449 | return; | 3583 | return; |
@@ -3451,6 +3585,11 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | |||
3451 | } | 3585 | } |
3452 | } | 3586 | } |
3453 | 3587 | ||
3588 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | ||
3589 | { | ||
3590 | __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid); | ||
3591 | } | ||
3592 | |||
3454 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | 3593 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) |
3455 | { | 3594 | { |
3456 | ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; | 3595 | ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; |
@@ -3644,20 +3783,21 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
3644 | if (!is_paging(vcpu)) { | 3783 | if (!is_paging(vcpu)) { |
3645 | hw_cr4 &= ~X86_CR4_PAE; | 3784 | hw_cr4 &= ~X86_CR4_PAE; |
3646 | hw_cr4 |= X86_CR4_PSE; | 3785 | hw_cr4 |= X86_CR4_PSE; |
3647 | /* | ||
3648 | * SMEP/SMAP is disabled if CPU is in non-paging mode | ||
3649 | * in hardware. However KVM always uses paging mode to | ||
3650 | * emulate guest non-paging mode with TDP. | ||
3651 | * To emulate this behavior, SMEP/SMAP needs to be | ||
3652 | * manually disabled when guest switches to non-paging | ||
3653 | * mode. | ||
3654 | */ | ||
3655 | hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP); | ||
3656 | } else if (!(cr4 & X86_CR4_PAE)) { | 3786 | } else if (!(cr4 & X86_CR4_PAE)) { |
3657 | hw_cr4 &= ~X86_CR4_PAE; | 3787 | hw_cr4 &= ~X86_CR4_PAE; |
3658 | } | 3788 | } |
3659 | } | 3789 | } |
3660 | 3790 | ||
3791 | if (!enable_unrestricted_guest && !is_paging(vcpu)) | ||
3792 | /* | ||
3793 | * SMEP/SMAP is disabled if CPU is in non-paging mode in | ||
3794 | * hardware. However KVM always uses paging mode without | ||
3795 | * unrestricted guest. | ||
3796 | * To emulate this behavior, SMEP/SMAP needs to be manually | ||
3797 | * disabled when guest switches to non-paging mode. | ||
3798 | */ | ||
3799 | hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP); | ||
3800 | |||
3661 | vmcs_writel(CR4_READ_SHADOW, cr4); | 3801 | vmcs_writel(CR4_READ_SHADOW, cr4); |
3662 | vmcs_writel(GUEST_CR4, hw_cr4); | 3802 | vmcs_writel(GUEST_CR4, hw_cr4); |
3663 | return 0; | 3803 | return 0; |
@@ -4146,29 +4286,28 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
4146 | return r; | 4286 | return r; |
4147 | } | 4287 | } |
4148 | 4288 | ||
4149 | static void allocate_vpid(struct vcpu_vmx *vmx) | 4289 | static int allocate_vpid(void) |
4150 | { | 4290 | { |
4151 | int vpid; | 4291 | int vpid; |
4152 | 4292 | ||
4153 | vmx->vpid = 0; | ||
4154 | if (!enable_vpid) | 4293 | if (!enable_vpid) |
4155 | return; | 4294 | return 0; |
4156 | spin_lock(&vmx_vpid_lock); | 4295 | spin_lock(&vmx_vpid_lock); |
4157 | vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); | 4296 | vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); |
4158 | if (vpid < VMX_NR_VPIDS) { | 4297 | if (vpid < VMX_NR_VPIDS) |
4159 | vmx->vpid = vpid; | ||
4160 | __set_bit(vpid, vmx_vpid_bitmap); | 4298 | __set_bit(vpid, vmx_vpid_bitmap); |
4161 | } | 4299 | else |
4300 | vpid = 0; | ||
4162 | spin_unlock(&vmx_vpid_lock); | 4301 | spin_unlock(&vmx_vpid_lock); |
4302 | return vpid; | ||
4163 | } | 4303 | } |
4164 | 4304 | ||
4165 | static void free_vpid(struct vcpu_vmx *vmx) | 4305 | static void free_vpid(int vpid) |
4166 | { | 4306 | { |
4167 | if (!enable_vpid) | 4307 | if (!enable_vpid || vpid == 0) |
4168 | return; | 4308 | return; |
4169 | spin_lock(&vmx_vpid_lock); | 4309 | spin_lock(&vmx_vpid_lock); |
4170 | if (vmx->vpid != 0) | 4310 | __clear_bit(vpid, vmx_vpid_bitmap); |
4171 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | ||
4172 | spin_unlock(&vmx_vpid_lock); | 4311 | spin_unlock(&vmx_vpid_lock); |
4173 | } | 4312 | } |
4174 | 4313 | ||
@@ -4323,9 +4462,9 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) | |||
4323 | msr, MSR_TYPE_W); | 4462 | msr, MSR_TYPE_W); |
4324 | } | 4463 | } |
4325 | 4464 | ||
4326 | static int vmx_vm_has_apicv(struct kvm *kvm) | 4465 | static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu) |
4327 | { | 4466 | { |
4328 | return enable_apicv && irqchip_in_kernel(kvm); | 4467 | return enable_apicv && lapic_in_kernel(vcpu); |
4329 | } | 4468 | } |
4330 | 4469 | ||
4331 | static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) | 4470 | static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) |
@@ -4369,6 +4508,22 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) | |||
4369 | { | 4508 | { |
4370 | #ifdef CONFIG_SMP | 4509 | #ifdef CONFIG_SMP |
4371 | if (vcpu->mode == IN_GUEST_MODE) { | 4510 | if (vcpu->mode == IN_GUEST_MODE) { |
4511 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4512 | |||
4513 | /* | ||
4514 | * Currently, we don't support urgent interrupt, | ||
4515 | * all interrupts are recognized as non-urgent | ||
4516 | * interrupt, so we cannot post interrupts when | ||
4517 | * 'SN' is set. | ||
4518 | * | ||
4519 | * If the vcpu is in guest mode, it means it is | ||
4520 | * running instead of being scheduled out and | ||
4521 | * waiting in the run queue, and that's the only | ||
4522 | * case when 'SN' is set currently, warning if | ||
4523 | * 'SN' is set. | ||
4524 | */ | ||
4525 | WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc)); | ||
4526 | |||
4372 | apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), | 4527 | apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), |
4373 | POSTED_INTR_VECTOR); | 4528 | POSTED_INTR_VECTOR); |
4374 | return true; | 4529 | return true; |
@@ -4505,7 +4660,7 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) | |||
4505 | { | 4660 | { |
4506 | u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; | 4661 | u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; |
4507 | 4662 | ||
4508 | if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) | 4663 | if (!vmx_cpu_uses_apicv(&vmx->vcpu)) |
4509 | pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; | 4664 | pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; |
4510 | return pin_based_exec_ctrl; | 4665 | return pin_based_exec_ctrl; |
4511 | } | 4666 | } |
@@ -4517,7 +4672,7 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) | |||
4517 | if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT) | 4672 | if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT) |
4518 | exec_control &= ~CPU_BASED_MOV_DR_EXITING; | 4673 | exec_control &= ~CPU_BASED_MOV_DR_EXITING; |
4519 | 4674 | ||
4520 | if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) { | 4675 | if (!cpu_need_tpr_shadow(&vmx->vcpu)) { |
4521 | exec_control &= ~CPU_BASED_TPR_SHADOW; | 4676 | exec_control &= ~CPU_BASED_TPR_SHADOW; |
4522 | #ifdef CONFIG_X86_64 | 4677 | #ifdef CONFIG_X86_64 |
4523 | exec_control |= CPU_BASED_CR8_STORE_EXITING | | 4678 | exec_control |= CPU_BASED_CR8_STORE_EXITING | |
@@ -4534,7 +4689,7 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) | |||
4534 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | 4689 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) |
4535 | { | 4690 | { |
4536 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; | 4691 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; |
4537 | if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) | 4692 | if (!cpu_need_virtualize_apic_accesses(&vmx->vcpu)) |
4538 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 4693 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
4539 | if (vmx->vpid == 0) | 4694 | if (vmx->vpid == 0) |
4540 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | 4695 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
@@ -4548,7 +4703,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
4548 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 4703 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
4549 | if (!ple_gap) | 4704 | if (!ple_gap) |
4550 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 4705 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
4551 | if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) | 4706 | if (!vmx_cpu_uses_apicv(&vmx->vcpu)) |
4552 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | | 4707 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | |
4553 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | 4708 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); |
4554 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | 4709 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; |
@@ -4558,8 +4713,12 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
4558 | a current VMCS12 | 4713 | a current VMCS12 |
4559 | */ | 4714 | */ |
4560 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; | 4715 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; |
4561 | /* PML is enabled/disabled in creating/destorying vcpu */ | 4716 | |
4562 | exec_control &= ~SECONDARY_EXEC_ENABLE_PML; | 4717 | if (!enable_pml) |
4718 | exec_control &= ~SECONDARY_EXEC_ENABLE_PML; | ||
4719 | |||
4720 | /* Currently, we allow L1 guest to directly run pcommit instruction. */ | ||
4721 | exec_control &= ~SECONDARY_EXEC_PCOMMIT; | ||
4563 | 4722 | ||
4564 | return exec_control; | 4723 | return exec_control; |
4565 | } | 4724 | } |
@@ -4604,12 +4763,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4604 | 4763 | ||
4605 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); | 4764 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); |
4606 | 4765 | ||
4607 | if (cpu_has_secondary_exec_ctrls()) { | 4766 | if (cpu_has_secondary_exec_ctrls()) |
4608 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | 4767 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, |
4609 | vmx_secondary_exec_control(vmx)); | 4768 | vmx_secondary_exec_control(vmx)); |
4610 | } | ||
4611 | 4769 | ||
4612 | if (vmx_vm_has_apicv(vmx->vcpu.kvm)) { | 4770 | if (vmx_cpu_uses_apicv(&vmx->vcpu)) { |
4613 | vmcs_write64(EOI_EXIT_BITMAP0, 0); | 4771 | vmcs_write64(EOI_EXIT_BITMAP0, 0); |
4614 | vmcs_write64(EOI_EXIT_BITMAP1, 0); | 4772 | vmcs_write64(EOI_EXIT_BITMAP1, 0); |
4615 | vmcs_write64(EOI_EXIT_BITMAP2, 0); | 4773 | vmcs_write64(EOI_EXIT_BITMAP2, 0); |
@@ -4753,7 +4911,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
4753 | 4911 | ||
4754 | if (cpu_has_vmx_tpr_shadow() && !init_event) { | 4912 | if (cpu_has_vmx_tpr_shadow() && !init_event) { |
4755 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); | 4913 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); |
4756 | if (vm_need_tpr_shadow(vcpu->kvm)) | 4914 | if (cpu_need_tpr_shadow(vcpu)) |
4757 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, | 4915 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, |
4758 | __pa(vcpu->arch.apic->regs)); | 4916 | __pa(vcpu->arch.apic->regs)); |
4759 | vmcs_write32(TPR_THRESHOLD, 0); | 4917 | vmcs_write32(TPR_THRESHOLD, 0); |
@@ -4761,7 +4919,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
4761 | 4919 | ||
4762 | kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); | 4920 | kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); |
4763 | 4921 | ||
4764 | if (vmx_vm_has_apicv(vcpu->kvm)) | 4922 | if (vmx_cpu_uses_apicv(vcpu)) |
4765 | memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); | 4923 | memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); |
4766 | 4924 | ||
4767 | if (vmx->vpid != 0) | 4925 | if (vmx->vpid != 0) |
@@ -4771,12 +4929,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
4771 | vmx_set_cr0(vcpu, cr0); /* enter rmode */ | 4929 | vmx_set_cr0(vcpu, cr0); /* enter rmode */ |
4772 | vmx->vcpu.arch.cr0 = cr0; | 4930 | vmx->vcpu.arch.cr0 = cr0; |
4773 | vmx_set_cr4(vcpu, 0); | 4931 | vmx_set_cr4(vcpu, 0); |
4774 | if (!init_event) | 4932 | vmx_set_efer(vcpu, 0); |
4775 | vmx_set_efer(vcpu, 0); | ||
4776 | vmx_fpu_activate(vcpu); | 4933 | vmx_fpu_activate(vcpu); |
4777 | update_exception_bitmap(vcpu); | 4934 | update_exception_bitmap(vcpu); |
4778 | 4935 | ||
4779 | vpid_sync_context(vmx); | 4936 | vpid_sync_context(vmx->vpid); |
4780 | } | 4937 | } |
4781 | 4938 | ||
4782 | /* | 4939 | /* |
@@ -5104,6 +5261,9 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
5104 | return handle_rmode_exception(vcpu, ex_no, error_code); | 5261 | return handle_rmode_exception(vcpu, ex_no, error_code); |
5105 | 5262 | ||
5106 | switch (ex_no) { | 5263 | switch (ex_no) { |
5264 | case AC_VECTOR: | ||
5265 | kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); | ||
5266 | return 1; | ||
5107 | case DB_VECTOR: | 5267 | case DB_VECTOR: |
5108 | dr6 = vmcs_readl(EXIT_QUALIFICATION); | 5268 | dr6 = vmcs_readl(EXIT_QUALIFICATION); |
5109 | if (!(vcpu->guest_debug & | 5269 | if (!(vcpu->guest_debug & |
@@ -5296,7 +5456,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
5296 | u8 cr8 = (u8)val; | 5456 | u8 cr8 = (u8)val; |
5297 | err = kvm_set_cr8(vcpu, cr8); | 5457 | err = kvm_set_cr8(vcpu, cr8); |
5298 | kvm_complete_insn_gp(vcpu, err); | 5458 | kvm_complete_insn_gp(vcpu, err); |
5299 | if (irqchip_in_kernel(vcpu->kvm)) | 5459 | if (lapic_in_kernel(vcpu)) |
5300 | return 1; | 5460 | return 1; |
5301 | if (cr8_prev <= cr8) | 5461 | if (cr8_prev <= cr8) |
5302 | return 1; | 5462 | return 1; |
@@ -5510,17 +5670,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) | |||
5510 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5670 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5511 | 5671 | ||
5512 | ++vcpu->stat.irq_window_exits; | 5672 | ++vcpu->stat.irq_window_exits; |
5513 | |||
5514 | /* | ||
5515 | * If the user space waits to inject interrupts, exit as soon as | ||
5516 | * possible | ||
5517 | */ | ||
5518 | if (!irqchip_in_kernel(vcpu->kvm) && | ||
5519 | vcpu->run->request_interrupt_window && | ||
5520 | !kvm_cpu_has_interrupt(vcpu)) { | ||
5521 | vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | ||
5522 | return 0; | ||
5523 | } | ||
5524 | return 1; | 5673 | return 1; |
5525 | } | 5674 | } |
5526 | 5675 | ||
@@ -5753,10 +5902,11 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) | |||
5753 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | 5902 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); |
5754 | if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { | 5903 | if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { |
5755 | skip_emulated_instruction(vcpu); | 5904 | skip_emulated_instruction(vcpu); |
5905 | trace_kvm_fast_mmio(gpa); | ||
5756 | return 1; | 5906 | return 1; |
5757 | } | 5907 | } |
5758 | 5908 | ||
5759 | ret = handle_mmio_page_fault_common(vcpu, gpa, true); | 5909 | ret = handle_mmio_page_fault(vcpu, gpa, true); |
5760 | if (likely(ret == RET_MMIO_PF_EMULATE)) | 5910 | if (likely(ret == RET_MMIO_PF_EMULATE)) |
5761 | return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == | 5911 | return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == |
5762 | EMULATE_DONE; | 5912 | EMULATE_DONE; |
@@ -5910,6 +6060,25 @@ static void update_ple_window_actual_max(void) | |||
5910 | ple_window_grow, INT_MIN); | 6060 | ple_window_grow, INT_MIN); |
5911 | } | 6061 | } |
5912 | 6062 | ||
6063 | /* | ||
6064 | * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. | ||
6065 | */ | ||
6066 | static void wakeup_handler(void) | ||
6067 | { | ||
6068 | struct kvm_vcpu *vcpu; | ||
6069 | int cpu = smp_processor_id(); | ||
6070 | |||
6071 | spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); | ||
6072 | list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu), | ||
6073 | blocked_vcpu_list) { | ||
6074 | struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); | ||
6075 | |||
6076 | if (pi_test_on(pi_desc) == 1) | ||
6077 | kvm_vcpu_kick(vcpu); | ||
6078 | } | ||
6079 | spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); | ||
6080 | } | ||
6081 | |||
5913 | static __init int hardware_setup(void) | 6082 | static __init int hardware_setup(void) |
5914 | { | 6083 | { |
5915 | int r = -ENOMEM, i, msr; | 6084 | int r = -ENOMEM, i, msr; |
@@ -6028,6 +6197,12 @@ static __init int hardware_setup(void) | |||
6028 | if (!cpu_has_vmx_apicv()) | 6197 | if (!cpu_has_vmx_apicv()) |
6029 | enable_apicv = 0; | 6198 | enable_apicv = 0; |
6030 | 6199 | ||
6200 | if (cpu_has_vmx_tsc_scaling()) { | ||
6201 | kvm_has_tsc_control = true; | ||
6202 | kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; | ||
6203 | kvm_tsc_scaling_ratio_frac_bits = 48; | ||
6204 | } | ||
6205 | |||
6031 | if (enable_apicv) | 6206 | if (enable_apicv) |
6032 | kvm_x86_ops->update_cr8_intercept = NULL; | 6207 | kvm_x86_ops->update_cr8_intercept = NULL; |
6033 | else { | 6208 | else { |
@@ -6096,6 +6271,8 @@ static __init int hardware_setup(void) | |||
6096 | kvm_x86_ops->enable_log_dirty_pt_masked = NULL; | 6271 | kvm_x86_ops->enable_log_dirty_pt_masked = NULL; |
6097 | } | 6272 | } |
6098 | 6273 | ||
6274 | kvm_set_posted_intr_wakeup_handler(wakeup_handler); | ||
6275 | |||
6099 | return alloc_kvm_area(); | 6276 | return alloc_kvm_area(); |
6100 | 6277 | ||
6101 | out8: | 6278 | out8: |
@@ -6627,7 +6804,6 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) | |||
6627 | 6804 | ||
6628 | static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) | 6805 | static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) |
6629 | { | 6806 | { |
6630 | u32 exec_control; | ||
6631 | if (vmx->nested.current_vmptr == -1ull) | 6807 | if (vmx->nested.current_vmptr == -1ull) |
6632 | return; | 6808 | return; |
6633 | 6809 | ||
@@ -6640,9 +6816,8 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) | |||
6640 | they were modified */ | 6816 | they were modified */ |
6641 | copy_shadow_to_vmcs12(vmx); | 6817 | copy_shadow_to_vmcs12(vmx); |
6642 | vmx->nested.sync_shadow_vmcs = false; | 6818 | vmx->nested.sync_shadow_vmcs = false; |
6643 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | 6819 | vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, |
6644 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; | 6820 | SECONDARY_EXEC_SHADOW_VMCS); |
6645 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
6646 | vmcs_write64(VMCS_LINK_POINTER, -1ull); | 6821 | vmcs_write64(VMCS_LINK_POINTER, -1ull); |
6647 | } | 6822 | } |
6648 | vmx->nested.posted_intr_nv = -1; | 6823 | vmx->nested.posted_intr_nv = -1; |
@@ -6662,6 +6837,7 @@ static void free_nested(struct vcpu_vmx *vmx) | |||
6662 | return; | 6837 | return; |
6663 | 6838 | ||
6664 | vmx->nested.vmxon = false; | 6839 | vmx->nested.vmxon = false; |
6840 | free_vpid(vmx->nested.vpid02); | ||
6665 | nested_release_vmcs12(vmx); | 6841 | nested_release_vmcs12(vmx); |
6666 | if (enable_shadow_vmcs) | 6842 | if (enable_shadow_vmcs) |
6667 | free_vmcs(vmx->nested.current_shadow_vmcs); | 6843 | free_vmcs(vmx->nested.current_shadow_vmcs); |
@@ -7038,7 +7214,6 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
7038 | { | 7214 | { |
7039 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7215 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7040 | gpa_t vmptr; | 7216 | gpa_t vmptr; |
7041 | u32 exec_control; | ||
7042 | 7217 | ||
7043 | if (!nested_vmx_check_permission(vcpu)) | 7218 | if (!nested_vmx_check_permission(vcpu)) |
7044 | return 1; | 7219 | return 1; |
@@ -7070,9 +7245,8 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
7070 | vmx->nested.current_vmcs12 = new_vmcs12; | 7245 | vmx->nested.current_vmcs12 = new_vmcs12; |
7071 | vmx->nested.current_vmcs12_page = page; | 7246 | vmx->nested.current_vmcs12_page = page; |
7072 | if (enable_shadow_vmcs) { | 7247 | if (enable_shadow_vmcs) { |
7073 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | 7248 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, |
7074 | exec_control |= SECONDARY_EXEC_SHADOW_VMCS; | 7249 | SECONDARY_EXEC_SHADOW_VMCS); |
7075 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
7076 | vmcs_write64(VMCS_LINK_POINTER, | 7250 | vmcs_write64(VMCS_LINK_POINTER, |
7077 | __pa(vmx->nested.current_shadow_vmcs)); | 7251 | __pa(vmx->nested.current_shadow_vmcs)); |
7078 | vmx->nested.sync_shadow_vmcs = true; | 7252 | vmx->nested.sync_shadow_vmcs = true; |
@@ -7178,7 +7352,58 @@ static int handle_invept(struct kvm_vcpu *vcpu) | |||
7178 | 7352 | ||
7179 | static int handle_invvpid(struct kvm_vcpu *vcpu) | 7353 | static int handle_invvpid(struct kvm_vcpu *vcpu) |
7180 | { | 7354 | { |
7181 | kvm_queue_exception(vcpu, UD_VECTOR); | 7355 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7356 | u32 vmx_instruction_info; | ||
7357 | unsigned long type, types; | ||
7358 | gva_t gva; | ||
7359 | struct x86_exception e; | ||
7360 | int vpid; | ||
7361 | |||
7362 | if (!(vmx->nested.nested_vmx_secondary_ctls_high & | ||
7363 | SECONDARY_EXEC_ENABLE_VPID) || | ||
7364 | !(vmx->nested.nested_vmx_vpid_caps & VMX_VPID_INVVPID_BIT)) { | ||
7365 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
7366 | return 1; | ||
7367 | } | ||
7368 | |||
7369 | if (!nested_vmx_check_permission(vcpu)) | ||
7370 | return 1; | ||
7371 | |||
7372 | vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | ||
7373 | type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); | ||
7374 | |||
7375 | types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7; | ||
7376 | |||
7377 | if (!(types & (1UL << type))) { | ||
7378 | nested_vmx_failValid(vcpu, | ||
7379 | VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); | ||
7380 | return 1; | ||
7381 | } | ||
7382 | |||
7383 | /* according to the intel vmx instruction reference, the memory | ||
7384 | * operand is read even if it isn't needed (e.g., for type==global) | ||
7385 | */ | ||
7386 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), | ||
7387 | vmx_instruction_info, false, &gva)) | ||
7388 | return 1; | ||
7389 | if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid, | ||
7390 | sizeof(u32), &e)) { | ||
7391 | kvm_inject_page_fault(vcpu, &e); | ||
7392 | return 1; | ||
7393 | } | ||
7394 | |||
7395 | switch (type) { | ||
7396 | case VMX_VPID_EXTENT_ALL_CONTEXT: | ||
7397 | __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02); | ||
7398 | nested_vmx_succeed(vcpu); | ||
7399 | break; | ||
7400 | default: | ||
7401 | /* Trap single context invalidation invvpid calls */ | ||
7402 | BUG_ON(1); | ||
7403 | break; | ||
7404 | } | ||
7405 | |||
7406 | skip_emulated_instruction(vcpu); | ||
7182 | return 1; | 7407 | return 1; |
7183 | } | 7408 | } |
7184 | 7409 | ||
@@ -7207,6 +7432,13 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) | |||
7207 | return 1; | 7432 | return 1; |
7208 | } | 7433 | } |
7209 | 7434 | ||
7435 | static int handle_pcommit(struct kvm_vcpu *vcpu) | ||
7436 | { | ||
7437 | /* we never catch pcommit instruct for L1 guest. */ | ||
7438 | WARN_ON(1); | ||
7439 | return 1; | ||
7440 | } | ||
7441 | |||
7210 | /* | 7442 | /* |
7211 | * The exit handlers return 1 if the exit was handled fully and guest execution | 7443 | * The exit handlers return 1 if the exit was handled fully and guest execution |
7212 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 7444 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -7257,6 +7489,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
7257 | [EXIT_REASON_XSAVES] = handle_xsaves, | 7489 | [EXIT_REASON_XSAVES] = handle_xsaves, |
7258 | [EXIT_REASON_XRSTORS] = handle_xrstors, | 7490 | [EXIT_REASON_XRSTORS] = handle_xrstors, |
7259 | [EXIT_REASON_PML_FULL] = handle_pml_full, | 7491 | [EXIT_REASON_PML_FULL] = handle_pml_full, |
7492 | [EXIT_REASON_PCOMMIT] = handle_pcommit, | ||
7260 | }; | 7493 | }; |
7261 | 7494 | ||
7262 | static const int kvm_vmx_max_exit_handlers = | 7495 | static const int kvm_vmx_max_exit_handlers = |
@@ -7558,6 +7791,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7558 | * the XSS exit bitmap in vmcs12. | 7791 | * the XSS exit bitmap in vmcs12. |
7559 | */ | 7792 | */ |
7560 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); | 7793 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); |
7794 | case EXIT_REASON_PCOMMIT: | ||
7795 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT); | ||
7561 | default: | 7796 | default: |
7562 | return true; | 7797 | return true; |
7563 | } | 7798 | } |
@@ -7569,10 +7804,9 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) | |||
7569 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); | 7804 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); |
7570 | } | 7805 | } |
7571 | 7806 | ||
7572 | static int vmx_enable_pml(struct vcpu_vmx *vmx) | 7807 | static int vmx_create_pml_buffer(struct vcpu_vmx *vmx) |
7573 | { | 7808 | { |
7574 | struct page *pml_pg; | 7809 | struct page *pml_pg; |
7575 | u32 exec_control; | ||
7576 | 7810 | ||
7577 | pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); | 7811 | pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); |
7578 | if (!pml_pg) | 7812 | if (!pml_pg) |
@@ -7583,24 +7817,15 @@ static int vmx_enable_pml(struct vcpu_vmx *vmx) | |||
7583 | vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); | 7817 | vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); |
7584 | vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); | 7818 | vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); |
7585 | 7819 | ||
7586 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
7587 | exec_control |= SECONDARY_EXEC_ENABLE_PML; | ||
7588 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
7589 | |||
7590 | return 0; | 7820 | return 0; |
7591 | } | 7821 | } |
7592 | 7822 | ||
7593 | static void vmx_disable_pml(struct vcpu_vmx *vmx) | 7823 | static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) |
7594 | { | 7824 | { |
7595 | u32 exec_control; | 7825 | if (vmx->pml_pg) { |
7596 | 7826 | __free_page(vmx->pml_pg); | |
7597 | ASSERT(vmx->pml_pg); | 7827 | vmx->pml_pg = NULL; |
7598 | __free_page(vmx->pml_pg); | 7828 | } |
7599 | vmx->pml_pg = NULL; | ||
7600 | |||
7601 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
7602 | exec_control &= ~SECONDARY_EXEC_ENABLE_PML; | ||
7603 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
7604 | } | 7829 | } |
7605 | 7830 | ||
7606 | static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) | 7831 | static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) |
@@ -7782,6 +8007,9 @@ static void dump_vmcs(void) | |||
7782 | vmcs_read32(IDT_VECTORING_INFO_FIELD), | 8007 | vmcs_read32(IDT_VECTORING_INFO_FIELD), |
7783 | vmcs_read32(IDT_VECTORING_ERROR_CODE)); | 8008 | vmcs_read32(IDT_VECTORING_ERROR_CODE)); |
7784 | pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET)); | 8009 | pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET)); |
8010 | if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) | ||
8011 | pr_err("TSC Multiplier = 0x%016lx\n", | ||
8012 | vmcs_readl(TSC_MULTIPLIER)); | ||
7785 | if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) | 8013 | if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) |
7786 | pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); | 8014 | pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); |
7787 | if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) | 8015 | if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) |
@@ -7924,10 +8152,10 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | |||
7924 | * apicv | 8152 | * apicv |
7925 | */ | 8153 | */ |
7926 | if (!cpu_has_vmx_virtualize_x2apic_mode() || | 8154 | if (!cpu_has_vmx_virtualize_x2apic_mode() || |
7927 | !vmx_vm_has_apicv(vcpu->kvm)) | 8155 | !vmx_cpu_uses_apicv(vcpu)) |
7928 | return; | 8156 | return; |
7929 | 8157 | ||
7930 | if (!vm_need_tpr_shadow(vcpu->kvm)) | 8158 | if (!cpu_need_tpr_shadow(vcpu)) |
7931 | return; | 8159 | return; |
7932 | 8160 | ||
7933 | sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | 8161 | sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); |
@@ -8029,9 +8257,10 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | |||
8029 | } | 8257 | } |
8030 | } | 8258 | } |
8031 | 8259 | ||
8032 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | 8260 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu) |
8033 | { | 8261 | { |
8034 | if (!vmx_vm_has_apicv(vcpu->kvm)) | 8262 | u64 *eoi_exit_bitmap = vcpu->arch.eoi_exit_bitmap; |
8263 | if (!vmx_cpu_uses_apicv(vcpu)) | ||
8035 | return; | 8264 | return; |
8036 | 8265 | ||
8037 | vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); | 8266 | vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); |
@@ -8477,8 +8706,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
8477 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 8706 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
8478 | 8707 | ||
8479 | if (enable_pml) | 8708 | if (enable_pml) |
8480 | vmx_disable_pml(vmx); | 8709 | vmx_destroy_pml_buffer(vmx); |
8481 | free_vpid(vmx); | 8710 | free_vpid(vmx->vpid); |
8482 | leave_guest_mode(vcpu); | 8711 | leave_guest_mode(vcpu); |
8483 | vmx_load_vmcs01(vcpu); | 8712 | vmx_load_vmcs01(vcpu); |
8484 | free_nested(vmx); | 8713 | free_nested(vmx); |
@@ -8497,7 +8726,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
8497 | if (!vmx) | 8726 | if (!vmx) |
8498 | return ERR_PTR(-ENOMEM); | 8727 | return ERR_PTR(-ENOMEM); |
8499 | 8728 | ||
8500 | allocate_vpid(vmx); | 8729 | vmx->vpid = allocate_vpid(); |
8501 | 8730 | ||
8502 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); | 8731 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); |
8503 | if (err) | 8732 | if (err) |
@@ -8530,7 +8759,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
8530 | put_cpu(); | 8759 | put_cpu(); |
8531 | if (err) | 8760 | if (err) |
8532 | goto free_vmcs; | 8761 | goto free_vmcs; |
8533 | if (vm_need_virtualize_apic_accesses(kvm)) { | 8762 | if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { |
8534 | err = alloc_apic_access_page(kvm); | 8763 | err = alloc_apic_access_page(kvm); |
8535 | if (err) | 8764 | if (err) |
8536 | goto free_vmcs; | 8765 | goto free_vmcs; |
@@ -8545,8 +8774,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
8545 | goto free_vmcs; | 8774 | goto free_vmcs; |
8546 | } | 8775 | } |
8547 | 8776 | ||
8548 | if (nested) | 8777 | if (nested) { |
8549 | nested_vmx_setup_ctls_msrs(vmx); | 8778 | nested_vmx_setup_ctls_msrs(vmx); |
8779 | vmx->nested.vpid02 = allocate_vpid(); | ||
8780 | } | ||
8550 | 8781 | ||
8551 | vmx->nested.posted_intr_nv = -1; | 8782 | vmx->nested.posted_intr_nv = -1; |
8552 | vmx->nested.current_vmptr = -1ull; | 8783 | vmx->nested.current_vmptr = -1ull; |
@@ -8559,7 +8790,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
8559 | * for the guest, etc. | 8790 | * for the guest, etc. |
8560 | */ | 8791 | */ |
8561 | if (enable_pml) { | 8792 | if (enable_pml) { |
8562 | err = vmx_enable_pml(vmx); | 8793 | err = vmx_create_pml_buffer(vmx); |
8563 | if (err) | 8794 | if (err) |
8564 | goto free_vmcs; | 8795 | goto free_vmcs; |
8565 | } | 8796 | } |
@@ -8567,13 +8798,14 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
8567 | return &vmx->vcpu; | 8798 | return &vmx->vcpu; |
8568 | 8799 | ||
8569 | free_vmcs: | 8800 | free_vmcs: |
8801 | free_vpid(vmx->nested.vpid02); | ||
8570 | free_loaded_vmcs(vmx->loaded_vmcs); | 8802 | free_loaded_vmcs(vmx->loaded_vmcs); |
8571 | free_msrs: | 8803 | free_msrs: |
8572 | kfree(vmx->guest_msrs); | 8804 | kfree(vmx->guest_msrs); |
8573 | uninit_vcpu: | 8805 | uninit_vcpu: |
8574 | kvm_vcpu_uninit(&vmx->vcpu); | 8806 | kvm_vcpu_uninit(&vmx->vcpu); |
8575 | free_vcpu: | 8807 | free_vcpu: |
8576 | free_vpid(vmx); | 8808 | free_vpid(vmx->vpid); |
8577 | kmem_cache_free(kvm_vcpu_cache, vmx); | 8809 | kmem_cache_free(kvm_vcpu_cache, vmx); |
8578 | return ERR_PTR(err); | 8810 | return ERR_PTR(err); |
8579 | } | 8811 | } |
@@ -8648,49 +8880,67 @@ static int vmx_get_lpage_level(void) | |||
8648 | return PT_PDPE_LEVEL; | 8880 | return PT_PDPE_LEVEL; |
8649 | } | 8881 | } |
8650 | 8882 | ||
8883 | static void vmcs_set_secondary_exec_control(u32 new_ctl) | ||
8884 | { | ||
8885 | /* | ||
8886 | * These bits in the secondary execution controls field | ||
8887 | * are dynamic, the others are mostly based on the hypervisor | ||
8888 | * architecture and the guest's CPUID. Do not touch the | ||
8889 | * dynamic bits. | ||
8890 | */ | ||
8891 | u32 mask = | ||
8892 | SECONDARY_EXEC_SHADOW_VMCS | | ||
8893 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | ||
8894 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
8895 | |||
8896 | u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
8897 | |||
8898 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | ||
8899 | (new_ctl & ~mask) | (cur_ctl & mask)); | ||
8900 | } | ||
8901 | |||
8651 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | 8902 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) |
8652 | { | 8903 | { |
8653 | struct kvm_cpuid_entry2 *best; | 8904 | struct kvm_cpuid_entry2 *best; |
8654 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 8905 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
8655 | u32 exec_control; | 8906 | u32 secondary_exec_ctl = vmx_secondary_exec_control(vmx); |
8656 | 8907 | ||
8657 | vmx->rdtscp_enabled = false; | ||
8658 | if (vmx_rdtscp_supported()) { | 8908 | if (vmx_rdtscp_supported()) { |
8659 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | 8909 | bool rdtscp_enabled = guest_cpuid_has_rdtscp(vcpu); |
8660 | if (exec_control & SECONDARY_EXEC_RDTSCP) { | 8910 | if (!rdtscp_enabled) |
8661 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 8911 | secondary_exec_ctl &= ~SECONDARY_EXEC_RDTSCP; |
8662 | if (best && (best->edx & bit(X86_FEATURE_RDTSCP))) | 8912 | |
8663 | vmx->rdtscp_enabled = true; | 8913 | if (nested) { |
8664 | else { | 8914 | if (rdtscp_enabled) |
8665 | exec_control &= ~SECONDARY_EXEC_RDTSCP; | 8915 | vmx->nested.nested_vmx_secondary_ctls_high |= |
8666 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | 8916 | SECONDARY_EXEC_RDTSCP; |
8667 | exec_control); | 8917 | else |
8668 | } | 8918 | vmx->nested.nested_vmx_secondary_ctls_high &= |
8919 | ~SECONDARY_EXEC_RDTSCP; | ||
8669 | } | 8920 | } |
8670 | if (nested && !vmx->rdtscp_enabled) | ||
8671 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
8672 | ~SECONDARY_EXEC_RDTSCP; | ||
8673 | } | 8921 | } |
8674 | 8922 | ||
8675 | /* Exposing INVPCID only when PCID is exposed */ | 8923 | /* Exposing INVPCID only when PCID is exposed */ |
8676 | best = kvm_find_cpuid_entry(vcpu, 0x7, 0); | 8924 | best = kvm_find_cpuid_entry(vcpu, 0x7, 0); |
8677 | if (vmx_invpcid_supported() && | 8925 | if (vmx_invpcid_supported() && |
8678 | best && (best->ebx & bit(X86_FEATURE_INVPCID)) && | 8926 | (!best || !(best->ebx & bit(X86_FEATURE_INVPCID)) || |
8679 | guest_cpuid_has_pcid(vcpu)) { | 8927 | !guest_cpuid_has_pcid(vcpu))) { |
8680 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | 8928 | secondary_exec_ctl &= ~SECONDARY_EXEC_ENABLE_INVPCID; |
8681 | exec_control |= SECONDARY_EXEC_ENABLE_INVPCID; | 8929 | |
8682 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | ||
8683 | exec_control); | ||
8684 | } else { | ||
8685 | if (cpu_has_secondary_exec_ctrls()) { | ||
8686 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
8687 | exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; | ||
8688 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | ||
8689 | exec_control); | ||
8690 | } | ||
8691 | if (best) | 8930 | if (best) |
8692 | best->ebx &= ~bit(X86_FEATURE_INVPCID); | 8931 | best->ebx &= ~bit(X86_FEATURE_INVPCID); |
8693 | } | 8932 | } |
8933 | |||
8934 | vmcs_set_secondary_exec_control(secondary_exec_ctl); | ||
8935 | |||
8936 | if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) { | ||
8937 | if (guest_cpuid_has_pcommit(vcpu)) | ||
8938 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
8939 | SECONDARY_EXEC_PCOMMIT; | ||
8940 | else | ||
8941 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
8942 | ~SECONDARY_EXEC_PCOMMIT; | ||
8943 | } | ||
8694 | } | 8944 | } |
8695 | 8945 | ||
8696 | static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | 8946 | static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) |
@@ -9298,13 +9548,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
9298 | 9548 | ||
9299 | if (cpu_has_secondary_exec_ctrls()) { | 9549 | if (cpu_has_secondary_exec_ctrls()) { |
9300 | exec_control = vmx_secondary_exec_control(vmx); | 9550 | exec_control = vmx_secondary_exec_control(vmx); |
9301 | if (!vmx->rdtscp_enabled) | 9551 | |
9302 | exec_control &= ~SECONDARY_EXEC_RDTSCP; | ||
9303 | /* Take the following fields only from vmcs12 */ | 9552 | /* Take the following fields only from vmcs12 */ |
9304 | exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 9553 | exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
9305 | SECONDARY_EXEC_RDTSCP | | 9554 | SECONDARY_EXEC_RDTSCP | |
9306 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 9555 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
9307 | SECONDARY_EXEC_APIC_REGISTER_VIRT); | 9556 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
9557 | SECONDARY_EXEC_PCOMMIT); | ||
9308 | if (nested_cpu_has(vmcs12, | 9558 | if (nested_cpu_has(vmcs12, |
9309 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) | 9559 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) |
9310 | exec_control |= vmcs12->secondary_vm_exec_control; | 9560 | exec_control |= vmcs12->secondary_vm_exec_control; |
@@ -9323,7 +9573,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
9323 | vmcs_write64(APIC_ACCESS_ADDR, | 9573 | vmcs_write64(APIC_ACCESS_ADDR, |
9324 | page_to_phys(vmx->nested.apic_access_page)); | 9574 | page_to_phys(vmx->nested.apic_access_page)); |
9325 | } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) && | 9575 | } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) && |
9326 | (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))) { | 9576 | cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { |
9327 | exec_control |= | 9577 | exec_control |= |
9328 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 9578 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
9329 | kvm_vcpu_reload_apic_access_page(vcpu); | 9579 | kvm_vcpu_reload_apic_access_page(vcpu); |
@@ -9433,12 +9683,24 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
9433 | 9683 | ||
9434 | if (enable_vpid) { | 9684 | if (enable_vpid) { |
9435 | /* | 9685 | /* |
9436 | * Trivially support vpid by letting L2s share their parent | 9686 | * There is no direct mapping between vpid02 and vpid12, the |
9437 | * L1's vpid. TODO: move to a more elaborate solution, giving | 9687 | * vpid02 is per-vCPU for L0 and reused while the value of |
9438 | * each L2 its own vpid and exposing the vpid feature to L1. | 9688 | * vpid12 is changed w/ one invvpid during nested vmentry. |
9689 | * The vpid12 is allocated by L1 for L2, so it will not | ||
9690 | * influence global bitmap(for vpid01 and vpid02 allocation) | ||
9691 | * even if spawn a lot of nested vCPUs. | ||
9439 | */ | 9692 | */ |
9440 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | 9693 | if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) { |
9441 | vmx_flush_tlb(vcpu); | 9694 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02); |
9695 | if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) { | ||
9696 | vmx->nested.last_vpid = vmcs12->virtual_processor_id; | ||
9697 | __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02); | ||
9698 | } | ||
9699 | } else { | ||
9700 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | ||
9701 | vmx_flush_tlb(vcpu); | ||
9702 | } | ||
9703 | |||
9442 | } | 9704 | } |
9443 | 9705 | ||
9444 | if (nested_cpu_has_ept(vmcs12)) { | 9706 | if (nested_cpu_has_ept(vmcs12)) { |
@@ -10278,6 +10540,201 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, | |||
10278 | kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); | 10540 | kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); |
10279 | } | 10541 | } |
10280 | 10542 | ||
10543 | /* | ||
10544 | * This routine does the following things for vCPU which is going | ||
10545 | * to be blocked if VT-d PI is enabled. | ||
10546 | * - Store the vCPU to the wakeup list, so when interrupts happen | ||
10547 | * we can find the right vCPU to wake up. | ||
10548 | * - Change the Posted-interrupt descriptor as below: | ||
10549 | * 'NDST' <-- vcpu->pre_pcpu | ||
10550 | * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR | ||
10551 | * - If 'ON' is set during this process, which means at least one | ||
10552 | * interrupt is posted for this vCPU, we cannot block it, in | ||
10553 | * this case, return 1, otherwise, return 0. | ||
10554 | * | ||
10555 | */ | ||
10556 | static int vmx_pre_block(struct kvm_vcpu *vcpu) | ||
10557 | { | ||
10558 | unsigned long flags; | ||
10559 | unsigned int dest; | ||
10560 | struct pi_desc old, new; | ||
10561 | struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); | ||
10562 | |||
10563 | if (!kvm_arch_has_assigned_device(vcpu->kvm) || | ||
10564 | !irq_remapping_cap(IRQ_POSTING_CAP)) | ||
10565 | return 0; | ||
10566 | |||
10567 | vcpu->pre_pcpu = vcpu->cpu; | ||
10568 | spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, | ||
10569 | vcpu->pre_pcpu), flags); | ||
10570 | list_add_tail(&vcpu->blocked_vcpu_list, | ||
10571 | &per_cpu(blocked_vcpu_on_cpu, | ||
10572 | vcpu->pre_pcpu)); | ||
10573 | spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, | ||
10574 | vcpu->pre_pcpu), flags); | ||
10575 | |||
10576 | do { | ||
10577 | old.control = new.control = pi_desc->control; | ||
10578 | |||
10579 | /* | ||
10580 | * We should not block the vCPU if | ||
10581 | * an interrupt is posted for it. | ||
10582 | */ | ||
10583 | if (pi_test_on(pi_desc) == 1) { | ||
10584 | spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, | ||
10585 | vcpu->pre_pcpu), flags); | ||
10586 | list_del(&vcpu->blocked_vcpu_list); | ||
10587 | spin_unlock_irqrestore( | ||
10588 | &per_cpu(blocked_vcpu_on_cpu_lock, | ||
10589 | vcpu->pre_pcpu), flags); | ||
10590 | vcpu->pre_pcpu = -1; | ||
10591 | |||
10592 | return 1; | ||
10593 | } | ||
10594 | |||
10595 | WARN((pi_desc->sn == 1), | ||
10596 | "Warning: SN field of posted-interrupts " | ||
10597 | "is set before blocking\n"); | ||
10598 | |||
10599 | /* | ||
10600 | * Since vCPU can be preempted during this process, | ||
10601 | * vcpu->cpu could be different with pre_pcpu, we | ||
10602 | * need to set pre_pcpu as the destination of wakeup | ||
10603 | * notification event, then we can find the right vCPU | ||
10604 | * to wakeup in wakeup handler if interrupts happen | ||
10605 | * when the vCPU is in blocked state. | ||
10606 | */ | ||
10607 | dest = cpu_physical_id(vcpu->pre_pcpu); | ||
10608 | |||
10609 | if (x2apic_enabled()) | ||
10610 | new.ndst = dest; | ||
10611 | else | ||
10612 | new.ndst = (dest << 8) & 0xFF00; | ||
10613 | |||
10614 | /* set 'NV' to 'wakeup vector' */ | ||
10615 | new.nv = POSTED_INTR_WAKEUP_VECTOR; | ||
10616 | } while (cmpxchg(&pi_desc->control, old.control, | ||
10617 | new.control) != old.control); | ||
10618 | |||
10619 | return 0; | ||
10620 | } | ||
10621 | |||
10622 | static void vmx_post_block(struct kvm_vcpu *vcpu) | ||
10623 | { | ||
10624 | struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); | ||
10625 | struct pi_desc old, new; | ||
10626 | unsigned int dest; | ||
10627 | unsigned long flags; | ||
10628 | |||
10629 | if (!kvm_arch_has_assigned_device(vcpu->kvm) || | ||
10630 | !irq_remapping_cap(IRQ_POSTING_CAP)) | ||
10631 | return; | ||
10632 | |||
10633 | do { | ||
10634 | old.control = new.control = pi_desc->control; | ||
10635 | |||
10636 | dest = cpu_physical_id(vcpu->cpu); | ||
10637 | |||
10638 | if (x2apic_enabled()) | ||
10639 | new.ndst = dest; | ||
10640 | else | ||
10641 | new.ndst = (dest << 8) & 0xFF00; | ||
10642 | |||
10643 | /* Allow posting non-urgent interrupts */ | ||
10644 | new.sn = 0; | ||
10645 | |||
10646 | /* set 'NV' to 'notification vector' */ | ||
10647 | new.nv = POSTED_INTR_VECTOR; | ||
10648 | } while (cmpxchg(&pi_desc->control, old.control, | ||
10649 | new.control) != old.control); | ||
10650 | |||
10651 | if(vcpu->pre_pcpu != -1) { | ||
10652 | spin_lock_irqsave( | ||
10653 | &per_cpu(blocked_vcpu_on_cpu_lock, | ||
10654 | vcpu->pre_pcpu), flags); | ||
10655 | list_del(&vcpu->blocked_vcpu_list); | ||
10656 | spin_unlock_irqrestore( | ||
10657 | &per_cpu(blocked_vcpu_on_cpu_lock, | ||
10658 | vcpu->pre_pcpu), flags); | ||
10659 | vcpu->pre_pcpu = -1; | ||
10660 | } | ||
10661 | } | ||
10662 | |||
10663 | /* | ||
10664 | * vmx_update_pi_irte - set IRTE for Posted-Interrupts | ||
10665 | * | ||
10666 | * @kvm: kvm | ||
10667 | * @host_irq: host irq of the interrupt | ||
10668 | * @guest_irq: gsi of the interrupt | ||
10669 | * @set: set or unset PI | ||
10670 | * returns 0 on success, < 0 on failure | ||
10671 | */ | ||
10672 | static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, | ||
10673 | uint32_t guest_irq, bool set) | ||
10674 | { | ||
10675 | struct kvm_kernel_irq_routing_entry *e; | ||
10676 | struct kvm_irq_routing_table *irq_rt; | ||
10677 | struct kvm_lapic_irq irq; | ||
10678 | struct kvm_vcpu *vcpu; | ||
10679 | struct vcpu_data vcpu_info; | ||
10680 | int idx, ret = -EINVAL; | ||
10681 | |||
10682 | if (!kvm_arch_has_assigned_device(kvm) || | ||
10683 | !irq_remapping_cap(IRQ_POSTING_CAP)) | ||
10684 | return 0; | ||
10685 | |||
10686 | idx = srcu_read_lock(&kvm->irq_srcu); | ||
10687 | irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); | ||
10688 | BUG_ON(guest_irq >= irq_rt->nr_rt_entries); | ||
10689 | |||
10690 | hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { | ||
10691 | if (e->type != KVM_IRQ_ROUTING_MSI) | ||
10692 | continue; | ||
10693 | /* | ||
10694 | * VT-d PI cannot support posting multicast/broadcast | ||
10695 | * interrupts to a vCPU, we still use interrupt remapping | ||
10696 | * for these kind of interrupts. | ||
10697 | * | ||
10698 | * For lowest-priority interrupts, we only support | ||
10699 | * those with single CPU as the destination, e.g. user | ||
10700 | * configures the interrupts via /proc/irq or uses | ||
10701 | * irqbalance to make the interrupts single-CPU. | ||
10702 | * | ||
10703 | * We will support full lowest-priority interrupt later. | ||
10704 | */ | ||
10705 | |||
10706 | kvm_set_msi_irq(e, &irq); | ||
10707 | if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) | ||
10708 | continue; | ||
10709 | |||
10710 | vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); | ||
10711 | vcpu_info.vector = irq.vector; | ||
10712 | |||
10713 | trace_kvm_pi_irte_update(vcpu->vcpu_id, e->gsi, | ||
10714 | vcpu_info.vector, vcpu_info.pi_desc_addr, set); | ||
10715 | |||
10716 | if (set) | ||
10717 | ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); | ||
10718 | else { | ||
10719 | /* suppress notification event before unposting */ | ||
10720 | pi_set_sn(vcpu_to_pi_desc(vcpu)); | ||
10721 | ret = irq_set_vcpu_affinity(host_irq, NULL); | ||
10722 | pi_clear_sn(vcpu_to_pi_desc(vcpu)); | ||
10723 | } | ||
10724 | |||
10725 | if (ret < 0) { | ||
10726 | printk(KERN_INFO "%s: failed to update PI IRTE\n", | ||
10727 | __func__); | ||
10728 | goto out; | ||
10729 | } | ||
10730 | } | ||
10731 | |||
10732 | ret = 0; | ||
10733 | out: | ||
10734 | srcu_read_unlock(&kvm->irq_srcu, idx); | ||
10735 | return ret; | ||
10736 | } | ||
10737 | |||
10281 | static struct kvm_x86_ops vmx_x86_ops = { | 10738 | static struct kvm_x86_ops vmx_x86_ops = { |
10282 | .cpu_has_kvm_support = cpu_has_kvm_support, | 10739 | .cpu_has_kvm_support = cpu_has_kvm_support, |
10283 | .disabled_by_bios = vmx_disabled_by_bios, | 10740 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -10297,7 +10754,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
10297 | .vcpu_load = vmx_vcpu_load, | 10754 | .vcpu_load = vmx_vcpu_load, |
10298 | .vcpu_put = vmx_vcpu_put, | 10755 | .vcpu_put = vmx_vcpu_put, |
10299 | 10756 | ||
10300 | .update_db_bp_intercept = update_exception_bitmap, | 10757 | .update_bp_intercept = update_exception_bitmap, |
10301 | .get_msr = vmx_get_msr, | 10758 | .get_msr = vmx_get_msr, |
10302 | .set_msr = vmx_set_msr, | 10759 | .set_msr = vmx_set_msr, |
10303 | .get_segment_base = vmx_get_segment_base, | 10760 | .get_segment_base = vmx_get_segment_base, |
@@ -10347,7 +10804,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
10347 | .update_cr8_intercept = update_cr8_intercept, | 10804 | .update_cr8_intercept = update_cr8_intercept, |
10348 | .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, | 10805 | .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, |
10349 | .set_apic_access_page_addr = vmx_set_apic_access_page_addr, | 10806 | .set_apic_access_page_addr = vmx_set_apic_access_page_addr, |
10350 | .vm_has_apicv = vmx_vm_has_apicv, | 10807 | .cpu_uses_apicv = vmx_cpu_uses_apicv, |
10351 | .load_eoi_exitmap = vmx_load_eoi_exitmap, | 10808 | .load_eoi_exitmap = vmx_load_eoi_exitmap, |
10352 | .hwapic_irr_update = vmx_hwapic_irr_update, | 10809 | .hwapic_irr_update = vmx_hwapic_irr_update, |
10353 | .hwapic_isr_update = vmx_hwapic_isr_update, | 10810 | .hwapic_isr_update = vmx_hwapic_isr_update, |
@@ -10371,11 +10828,9 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
10371 | 10828 | ||
10372 | .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, | 10829 | .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, |
10373 | 10830 | ||
10374 | .set_tsc_khz = vmx_set_tsc_khz, | ||
10375 | .read_tsc_offset = vmx_read_tsc_offset, | 10831 | .read_tsc_offset = vmx_read_tsc_offset, |
10376 | .write_tsc_offset = vmx_write_tsc_offset, | 10832 | .write_tsc_offset = vmx_write_tsc_offset, |
10377 | .adjust_tsc_offset = vmx_adjust_tsc_offset, | 10833 | .adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest, |
10378 | .compute_tsc_offset = vmx_compute_tsc_offset, | ||
10379 | .read_l1_tsc = vmx_read_l1_tsc, | 10834 | .read_l1_tsc = vmx_read_l1_tsc, |
10380 | 10835 | ||
10381 | .set_tdp_cr3 = vmx_set_cr3, | 10836 | .set_tdp_cr3 = vmx_set_cr3, |
@@ -10394,7 +10849,12 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
10394 | .flush_log_dirty = vmx_flush_log_dirty, | 10849 | .flush_log_dirty = vmx_flush_log_dirty, |
10395 | .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, | 10850 | .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, |
10396 | 10851 | ||
10852 | .pre_block = vmx_pre_block, | ||
10853 | .post_block = vmx_post_block, | ||
10854 | |||
10397 | .pmu_ops = &intel_pmu_ops, | 10855 | .pmu_ops = &intel_pmu_ops, |
10856 | |||
10857 | .update_pi_irte = vmx_update_pi_irte, | ||
10398 | }; | 10858 | }; |
10399 | 10859 | ||
10400 | static int __init vmx_init(void) | 10860 | static int __init vmx_init(void) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bda65690788e..eed32283d22c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -51,6 +51,8 @@ | |||
51 | #include <linux/pci.h> | 51 | #include <linux/pci.h> |
52 | #include <linux/timekeeper_internal.h> | 52 | #include <linux/timekeeper_internal.h> |
53 | #include <linux/pvclock_gtod.h> | 53 | #include <linux/pvclock_gtod.h> |
54 | #include <linux/kvm_irqfd.h> | ||
55 | #include <linux/irqbypass.h> | ||
54 | #include <trace/events/kvm.h> | 56 | #include <trace/events/kvm.h> |
55 | 57 | ||
56 | #define CREATE_TRACE_POINTS | 58 | #define CREATE_TRACE_POINTS |
@@ -64,6 +66,7 @@ | |||
64 | #include <asm/fpu/internal.h> /* Ugh! */ | 66 | #include <asm/fpu/internal.h> /* Ugh! */ |
65 | #include <asm/pvclock.h> | 67 | #include <asm/pvclock.h> |
66 | #include <asm/div64.h> | 68 | #include <asm/div64.h> |
69 | #include <asm/irq_remapping.h> | ||
67 | 70 | ||
68 | #define MAX_IO_MSRS 256 | 71 | #define MAX_IO_MSRS 256 |
69 | #define KVM_MAX_MCE_BANKS 32 | 72 | #define KVM_MAX_MCE_BANKS 32 |
@@ -90,10 +93,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu); | |||
90 | static void process_nmi(struct kvm_vcpu *vcpu); | 93 | static void process_nmi(struct kvm_vcpu *vcpu); |
91 | static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | 94 | static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); |
92 | 95 | ||
93 | struct kvm_x86_ops *kvm_x86_ops; | 96 | struct kvm_x86_ops *kvm_x86_ops __read_mostly; |
94 | EXPORT_SYMBOL_GPL(kvm_x86_ops); | 97 | EXPORT_SYMBOL_GPL(kvm_x86_ops); |
95 | 98 | ||
96 | static bool ignore_msrs = 0; | 99 | static bool __read_mostly ignore_msrs = 0; |
97 | module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); | 100 | module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); |
98 | 101 | ||
99 | unsigned int min_timer_period_us = 500; | 102 | unsigned int min_timer_period_us = 500; |
@@ -102,20 +105,25 @@ module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); | |||
102 | static bool __read_mostly kvmclock_periodic_sync = true; | 105 | static bool __read_mostly kvmclock_periodic_sync = true; |
103 | module_param(kvmclock_periodic_sync, bool, S_IRUGO); | 106 | module_param(kvmclock_periodic_sync, bool, S_IRUGO); |
104 | 107 | ||
105 | bool kvm_has_tsc_control; | 108 | bool __read_mostly kvm_has_tsc_control; |
106 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); | 109 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); |
107 | u32 kvm_max_guest_tsc_khz; | 110 | u32 __read_mostly kvm_max_guest_tsc_khz; |
108 | EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); | 111 | EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); |
112 | u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits; | ||
113 | EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits); | ||
114 | u64 __read_mostly kvm_max_tsc_scaling_ratio; | ||
115 | EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio); | ||
116 | static u64 __read_mostly kvm_default_tsc_scaling_ratio; | ||
109 | 117 | ||
110 | /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ | 118 | /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ |
111 | static u32 tsc_tolerance_ppm = 250; | 119 | static u32 __read_mostly tsc_tolerance_ppm = 250; |
112 | module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); | 120 | module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); |
113 | 121 | ||
114 | /* lapic timer advance (tscdeadline mode only) in nanoseconds */ | 122 | /* lapic timer advance (tscdeadline mode only) in nanoseconds */ |
115 | unsigned int lapic_timer_advance_ns = 0; | 123 | unsigned int __read_mostly lapic_timer_advance_ns = 0; |
116 | module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); | 124 | module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); |
117 | 125 | ||
118 | static bool backwards_tsc_observed = false; | 126 | static bool __read_mostly backwards_tsc_observed = false; |
119 | 127 | ||
120 | #define KVM_NR_SHARED_MSRS 16 | 128 | #define KVM_NR_SHARED_MSRS 16 |
121 | 129 | ||
@@ -622,7 +630,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
622 | if ((cr0 ^ old_cr0) & update_bits) | 630 | if ((cr0 ^ old_cr0) & update_bits) |
623 | kvm_mmu_reset_context(vcpu); | 631 | kvm_mmu_reset_context(vcpu); |
624 | 632 | ||
625 | if ((cr0 ^ old_cr0) & X86_CR0_CD) | 633 | if (((cr0 ^ old_cr0) & X86_CR0_CD) && |
634 | kvm_arch_has_noncoherent_dma(vcpu->kvm) && | ||
635 | !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) | ||
626 | kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL); | 636 | kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL); |
627 | 637 | ||
628 | return 0; | 638 | return 0; |
@@ -789,7 +799,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | |||
789 | { | 799 | { |
790 | if (cr8 & CR8_RESERVED_BITS) | 800 | if (cr8 & CR8_RESERVED_BITS) |
791 | return 1; | 801 | return 1; |
792 | if (irqchip_in_kernel(vcpu->kvm)) | 802 | if (lapic_in_kernel(vcpu)) |
793 | kvm_lapic_set_tpr(vcpu, cr8); | 803 | kvm_lapic_set_tpr(vcpu, cr8); |
794 | else | 804 | else |
795 | vcpu->arch.cr8 = cr8; | 805 | vcpu->arch.cr8 = cr8; |
@@ -799,7 +809,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr8); | |||
799 | 809 | ||
800 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | 810 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) |
801 | { | 811 | { |
802 | if (irqchip_in_kernel(vcpu->kvm)) | 812 | if (lapic_in_kernel(vcpu)) |
803 | return kvm_lapic_get_cr8(vcpu); | 813 | return kvm_lapic_get_cr8(vcpu); |
804 | else | 814 | else |
805 | return vcpu->arch.cr8; | 815 | return vcpu->arch.cr8; |
@@ -953,6 +963,9 @@ static u32 emulated_msrs[] = { | |||
953 | HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, | 963 | HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, |
954 | HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, | 964 | HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, |
955 | HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, | 965 | HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, |
966 | HV_X64_MSR_RESET, | ||
967 | HV_X64_MSR_VP_INDEX, | ||
968 | HV_X64_MSR_VP_RUNTIME, | ||
956 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, | 969 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, |
957 | MSR_KVM_PV_EOI_EN, | 970 | MSR_KVM_PV_EOI_EN, |
958 | 971 | ||
@@ -1241,14 +1254,53 @@ static u32 adjust_tsc_khz(u32 khz, s32 ppm) | |||
1241 | return v; | 1254 | return v; |
1242 | } | 1255 | } |
1243 | 1256 | ||
1244 | static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) | 1257 | static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) |
1258 | { | ||
1259 | u64 ratio; | ||
1260 | |||
1261 | /* Guest TSC same frequency as host TSC? */ | ||
1262 | if (!scale) { | ||
1263 | vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; | ||
1264 | return 0; | ||
1265 | } | ||
1266 | |||
1267 | /* TSC scaling supported? */ | ||
1268 | if (!kvm_has_tsc_control) { | ||
1269 | if (user_tsc_khz > tsc_khz) { | ||
1270 | vcpu->arch.tsc_catchup = 1; | ||
1271 | vcpu->arch.tsc_always_catchup = 1; | ||
1272 | return 0; | ||
1273 | } else { | ||
1274 | WARN(1, "user requested TSC rate below hardware speed\n"); | ||
1275 | return -1; | ||
1276 | } | ||
1277 | } | ||
1278 | |||
1279 | /* TSC scaling required - calculate ratio */ | ||
1280 | ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits, | ||
1281 | user_tsc_khz, tsc_khz); | ||
1282 | |||
1283 | if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) { | ||
1284 | WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n", | ||
1285 | user_tsc_khz); | ||
1286 | return -1; | ||
1287 | } | ||
1288 | |||
1289 | vcpu->arch.tsc_scaling_ratio = ratio; | ||
1290 | return 0; | ||
1291 | } | ||
1292 | |||
1293 | static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) | ||
1245 | { | 1294 | { |
1246 | u32 thresh_lo, thresh_hi; | 1295 | u32 thresh_lo, thresh_hi; |
1247 | int use_scaling = 0; | 1296 | int use_scaling = 0; |
1248 | 1297 | ||
1249 | /* tsc_khz can be zero if TSC calibration fails */ | 1298 | /* tsc_khz can be zero if TSC calibration fails */ |
1250 | if (this_tsc_khz == 0) | 1299 | if (this_tsc_khz == 0) { |
1251 | return; | 1300 | /* set tsc_scaling_ratio to a safe value */ |
1301 | vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; | ||
1302 | return -1; | ||
1303 | } | ||
1252 | 1304 | ||
1253 | /* Compute a scale to convert nanoseconds in TSC cycles */ | 1305 | /* Compute a scale to convert nanoseconds in TSC cycles */ |
1254 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, | 1306 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, |
@@ -1268,7 +1320,7 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) | |||
1268 | pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi); | 1320 | pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi); |
1269 | use_scaling = 1; | 1321 | use_scaling = 1; |
1270 | } | 1322 | } |
1271 | kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling); | 1323 | return set_tsc_khz(vcpu, this_tsc_khz, use_scaling); |
1272 | } | 1324 | } |
1273 | 1325 | ||
1274 | static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) | 1326 | static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) |
@@ -1314,6 +1366,48 @@ static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) | |||
1314 | vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; | 1366 | vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; |
1315 | } | 1367 | } |
1316 | 1368 | ||
1369 | /* | ||
1370 | * Multiply tsc by a fixed point number represented by ratio. | ||
1371 | * | ||
1372 | * The most significant 64-N bits (mult) of ratio represent the | ||
1373 | * integral part of the fixed point number; the remaining N bits | ||
1374 | * (frac) represent the fractional part, ie. ratio represents a fixed | ||
1375 | * point number (mult + frac * 2^(-N)). | ||
1376 | * | ||
1377 | * N equals to kvm_tsc_scaling_ratio_frac_bits. | ||
1378 | */ | ||
1379 | static inline u64 __scale_tsc(u64 ratio, u64 tsc) | ||
1380 | { | ||
1381 | return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits); | ||
1382 | } | ||
1383 | |||
1384 | u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) | ||
1385 | { | ||
1386 | u64 _tsc = tsc; | ||
1387 | u64 ratio = vcpu->arch.tsc_scaling_ratio; | ||
1388 | |||
1389 | if (ratio != kvm_default_tsc_scaling_ratio) | ||
1390 | _tsc = __scale_tsc(ratio, tsc); | ||
1391 | |||
1392 | return _tsc; | ||
1393 | } | ||
1394 | EXPORT_SYMBOL_GPL(kvm_scale_tsc); | ||
1395 | |||
1396 | static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | ||
1397 | { | ||
1398 | u64 tsc; | ||
1399 | |||
1400 | tsc = kvm_scale_tsc(vcpu, rdtsc()); | ||
1401 | |||
1402 | return target_tsc - tsc; | ||
1403 | } | ||
1404 | |||
1405 | u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) | ||
1406 | { | ||
1407 | return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc)); | ||
1408 | } | ||
1409 | EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); | ||
1410 | |||
1317 | void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | 1411 | void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) |
1318 | { | 1412 | { |
1319 | struct kvm *kvm = vcpu->kvm; | 1413 | struct kvm *kvm = vcpu->kvm; |
@@ -1325,7 +1419,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1325 | u64 data = msr->data; | 1419 | u64 data = msr->data; |
1326 | 1420 | ||
1327 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); | 1421 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); |
1328 | offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); | 1422 | offset = kvm_compute_tsc_offset(vcpu, data); |
1329 | ns = get_kernel_ns(); | 1423 | ns = get_kernel_ns(); |
1330 | elapsed = ns - kvm->arch.last_tsc_nsec; | 1424 | elapsed = ns - kvm->arch.last_tsc_nsec; |
1331 | 1425 | ||
@@ -1382,7 +1476,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1382 | } else { | 1476 | } else { |
1383 | u64 delta = nsec_to_cycles(vcpu, elapsed); | 1477 | u64 delta = nsec_to_cycles(vcpu, elapsed); |
1384 | data += delta; | 1478 | data += delta; |
1385 | offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); | 1479 | offset = kvm_compute_tsc_offset(vcpu, data); |
1386 | pr_debug("kvm: adjusted tsc offset by %llu\n", delta); | 1480 | pr_debug("kvm: adjusted tsc offset by %llu\n", delta); |
1387 | } | 1481 | } |
1388 | matched = true; | 1482 | matched = true; |
@@ -1439,6 +1533,20 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1439 | 1533 | ||
1440 | EXPORT_SYMBOL_GPL(kvm_write_tsc); | 1534 | EXPORT_SYMBOL_GPL(kvm_write_tsc); |
1441 | 1535 | ||
1536 | static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, | ||
1537 | s64 adjustment) | ||
1538 | { | ||
1539 | kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); | ||
1540 | } | ||
1541 | |||
1542 | static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) | ||
1543 | { | ||
1544 | if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio) | ||
1545 | WARN_ON(adjustment < 0); | ||
1546 | adjustment = kvm_scale_tsc(vcpu, (u64) adjustment); | ||
1547 | kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); | ||
1548 | } | ||
1549 | |||
1442 | #ifdef CONFIG_X86_64 | 1550 | #ifdef CONFIG_X86_64 |
1443 | 1551 | ||
1444 | static cycle_t read_tsc(void) | 1552 | static cycle_t read_tsc(void) |
@@ -1600,7 +1708,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) | |||
1600 | 1708 | ||
1601 | static int kvm_guest_time_update(struct kvm_vcpu *v) | 1709 | static int kvm_guest_time_update(struct kvm_vcpu *v) |
1602 | { | 1710 | { |
1603 | unsigned long flags, this_tsc_khz; | 1711 | unsigned long flags, this_tsc_khz, tgt_tsc_khz; |
1604 | struct kvm_vcpu_arch *vcpu = &v->arch; | 1712 | struct kvm_vcpu_arch *vcpu = &v->arch; |
1605 | struct kvm_arch *ka = &v->kvm->arch; | 1713 | struct kvm_arch *ka = &v->kvm->arch; |
1606 | s64 kernel_ns; | 1714 | s64 kernel_ns; |
@@ -1637,7 +1745,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1637 | kernel_ns = get_kernel_ns(); | 1745 | kernel_ns = get_kernel_ns(); |
1638 | } | 1746 | } |
1639 | 1747 | ||
1640 | tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc); | 1748 | tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); |
1641 | 1749 | ||
1642 | /* | 1750 | /* |
1643 | * We may have to catch up the TSC to match elapsed wall clock | 1751 | * We may have to catch up the TSC to match elapsed wall clock |
@@ -1663,7 +1771,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1663 | return 0; | 1771 | return 0; |
1664 | 1772 | ||
1665 | if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { | 1773 | if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { |
1666 | kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, | 1774 | tgt_tsc_khz = kvm_has_tsc_control ? |
1775 | vcpu->virtual_tsc_khz : this_tsc_khz; | ||
1776 | kvm_get_time_scale(NSEC_PER_SEC / 1000, tgt_tsc_khz, | ||
1667 | &vcpu->hv_clock.tsc_shift, | 1777 | &vcpu->hv_clock.tsc_shift, |
1668 | &vcpu->hv_clock.tsc_to_system_mul); | 1778 | &vcpu->hv_clock.tsc_to_system_mul); |
1669 | vcpu->hw_tsc_khz = this_tsc_khz; | 1779 | vcpu->hw_tsc_khz = this_tsc_khz; |
@@ -1898,6 +2008,8 @@ static void accumulate_steal_time(struct kvm_vcpu *vcpu) | |||
1898 | 2008 | ||
1899 | static void record_steal_time(struct kvm_vcpu *vcpu) | 2009 | static void record_steal_time(struct kvm_vcpu *vcpu) |
1900 | { | 2010 | { |
2011 | accumulate_steal_time(vcpu); | ||
2012 | |||
1901 | if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) | 2013 | if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) |
1902 | return; | 2014 | return; |
1903 | 2015 | ||
@@ -2048,12 +2160,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2048 | if (!(data & KVM_MSR_ENABLED)) | 2160 | if (!(data & KVM_MSR_ENABLED)) |
2049 | break; | 2161 | break; |
2050 | 2162 | ||
2051 | vcpu->arch.st.last_steal = current->sched_info.run_delay; | ||
2052 | |||
2053 | preempt_disable(); | ||
2054 | accumulate_steal_time(vcpu); | ||
2055 | preempt_enable(); | ||
2056 | |||
2057 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); | 2163 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); |
2058 | 2164 | ||
2059 | break; | 2165 | break; |
@@ -2449,6 +2555,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
2449 | case KVM_CAP_ENABLE_CAP_VM: | 2555 | case KVM_CAP_ENABLE_CAP_VM: |
2450 | case KVM_CAP_DISABLE_QUIRKS: | 2556 | case KVM_CAP_DISABLE_QUIRKS: |
2451 | case KVM_CAP_SET_BOOT_CPU_ID: | 2557 | case KVM_CAP_SET_BOOT_CPU_ID: |
2558 | case KVM_CAP_SPLIT_IRQCHIP: | ||
2452 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | 2559 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT |
2453 | case KVM_CAP_ASSIGN_DEV_IRQ: | 2560 | case KVM_CAP_ASSIGN_DEV_IRQ: |
2454 | case KVM_CAP_PCI_2_3: | 2561 | case KVM_CAP_PCI_2_3: |
@@ -2612,7 +2719,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2612 | if (tsc_delta < 0) | 2719 | if (tsc_delta < 0) |
2613 | mark_tsc_unstable("KVM discovered backwards TSC"); | 2720 | mark_tsc_unstable("KVM discovered backwards TSC"); |
2614 | if (check_tsc_unstable()) { | 2721 | if (check_tsc_unstable()) { |
2615 | u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu, | 2722 | u64 offset = kvm_compute_tsc_offset(vcpu, |
2616 | vcpu->arch.last_guest_tsc); | 2723 | vcpu->arch.last_guest_tsc); |
2617 | kvm_x86_ops->write_tsc_offset(vcpu, offset); | 2724 | kvm_x86_ops->write_tsc_offset(vcpu, offset); |
2618 | vcpu->arch.tsc_catchup = 1; | 2725 | vcpu->arch.tsc_catchup = 1; |
@@ -2628,7 +2735,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2628 | vcpu->cpu = cpu; | 2735 | vcpu->cpu = cpu; |
2629 | } | 2736 | } |
2630 | 2737 | ||
2631 | accumulate_steal_time(vcpu); | ||
2632 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); | 2738 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); |
2633 | } | 2739 | } |
2634 | 2740 | ||
@@ -2657,17 +2763,50 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, | |||
2657 | return 0; | 2763 | return 0; |
2658 | } | 2764 | } |
2659 | 2765 | ||
2766 | static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) | ||
2767 | { | ||
2768 | return (!lapic_in_kernel(vcpu) || | ||
2769 | kvm_apic_accept_pic_intr(vcpu)); | ||
2770 | } | ||
2771 | |||
2772 | /* | ||
2773 | * if userspace requested an interrupt window, check that the | ||
2774 | * interrupt window is open. | ||
2775 | * | ||
2776 | * No need to exit to userspace if we already have an interrupt queued. | ||
2777 | */ | ||
2778 | static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) | ||
2779 | { | ||
2780 | return kvm_arch_interrupt_allowed(vcpu) && | ||
2781 | !kvm_cpu_has_interrupt(vcpu) && | ||
2782 | !kvm_event_needs_reinjection(vcpu) && | ||
2783 | kvm_cpu_accept_dm_intr(vcpu); | ||
2784 | } | ||
2785 | |||
2660 | static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | 2786 | static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, |
2661 | struct kvm_interrupt *irq) | 2787 | struct kvm_interrupt *irq) |
2662 | { | 2788 | { |
2663 | if (irq->irq >= KVM_NR_INTERRUPTS) | 2789 | if (irq->irq >= KVM_NR_INTERRUPTS) |
2664 | return -EINVAL; | 2790 | return -EINVAL; |
2665 | if (irqchip_in_kernel(vcpu->kvm)) | 2791 | |
2792 | if (!irqchip_in_kernel(vcpu->kvm)) { | ||
2793 | kvm_queue_interrupt(vcpu, irq->irq, false); | ||
2794 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
2795 | return 0; | ||
2796 | } | ||
2797 | |||
2798 | /* | ||
2799 | * With in-kernel LAPIC, we only use this to inject EXTINT, so | ||
2800 | * fail for in-kernel 8259. | ||
2801 | */ | ||
2802 | if (pic_in_kernel(vcpu->kvm)) | ||
2666 | return -ENXIO; | 2803 | return -ENXIO; |
2667 | 2804 | ||
2668 | kvm_queue_interrupt(vcpu, irq->irq, false); | 2805 | if (vcpu->arch.pending_external_vector != -1) |
2669 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 2806 | return -EEXIST; |
2670 | 2807 | ||
2808 | vcpu->arch.pending_external_vector = irq->irq; | ||
2809 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
2671 | return 0; | 2810 | return 0; |
2672 | } | 2811 | } |
2673 | 2812 | ||
@@ -3176,7 +3315,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3176 | struct kvm_vapic_addr va; | 3315 | struct kvm_vapic_addr va; |
3177 | 3316 | ||
3178 | r = -EINVAL; | 3317 | r = -EINVAL; |
3179 | if (!irqchip_in_kernel(vcpu->kvm)) | 3318 | if (!lapic_in_kernel(vcpu)) |
3180 | goto out; | 3319 | goto out; |
3181 | r = -EFAULT; | 3320 | r = -EFAULT; |
3182 | if (copy_from_user(&va, argp, sizeof va)) | 3321 | if (copy_from_user(&va, argp, sizeof va)) |
@@ -3303,9 +3442,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
3303 | if (user_tsc_khz == 0) | 3442 | if (user_tsc_khz == 0) |
3304 | user_tsc_khz = tsc_khz; | 3443 | user_tsc_khz = tsc_khz; |
3305 | 3444 | ||
3306 | kvm_set_tsc_khz(vcpu, user_tsc_khz); | 3445 | if (!kvm_set_tsc_khz(vcpu, user_tsc_khz)) |
3446 | r = 0; | ||
3307 | 3447 | ||
3308 | r = 0; | ||
3309 | goto out; | 3448 | goto out; |
3310 | } | 3449 | } |
3311 | case KVM_GET_TSC_KHZ: { | 3450 | case KVM_GET_TSC_KHZ: { |
@@ -3425,41 +3564,35 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
3425 | 3564 | ||
3426 | static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) | 3565 | static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) |
3427 | { | 3566 | { |
3428 | int r = 0; | ||
3429 | |||
3430 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 3567 | mutex_lock(&kvm->arch.vpit->pit_state.lock); |
3431 | memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state)); | 3568 | memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state)); |
3432 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 3569 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); |
3433 | return r; | 3570 | return 0; |
3434 | } | 3571 | } |
3435 | 3572 | ||
3436 | static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) | 3573 | static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) |
3437 | { | 3574 | { |
3438 | int r = 0; | ||
3439 | |||
3440 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 3575 | mutex_lock(&kvm->arch.vpit->pit_state.lock); |
3441 | memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); | 3576 | memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); |
3442 | kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0); | 3577 | kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0); |
3443 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 3578 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); |
3444 | return r; | 3579 | return 0; |
3445 | } | 3580 | } |
3446 | 3581 | ||
3447 | static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) | 3582 | static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) |
3448 | { | 3583 | { |
3449 | int r = 0; | ||
3450 | |||
3451 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 3584 | mutex_lock(&kvm->arch.vpit->pit_state.lock); |
3452 | memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels, | 3585 | memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels, |
3453 | sizeof(ps->channels)); | 3586 | sizeof(ps->channels)); |
3454 | ps->flags = kvm->arch.vpit->pit_state.flags; | 3587 | ps->flags = kvm->arch.vpit->pit_state.flags; |
3455 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 3588 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); |
3456 | memset(&ps->reserved, 0, sizeof(ps->reserved)); | 3589 | memset(&ps->reserved, 0, sizeof(ps->reserved)); |
3457 | return r; | 3590 | return 0; |
3458 | } | 3591 | } |
3459 | 3592 | ||
3460 | static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) | 3593 | static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) |
3461 | { | 3594 | { |
3462 | int r = 0, start = 0; | 3595 | int start = 0; |
3463 | u32 prev_legacy, cur_legacy; | 3596 | u32 prev_legacy, cur_legacy; |
3464 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 3597 | mutex_lock(&kvm->arch.vpit->pit_state.lock); |
3465 | prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; | 3598 | prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; |
@@ -3471,7 +3604,7 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) | |||
3471 | kvm->arch.vpit->pit_state.flags = ps->flags; | 3604 | kvm->arch.vpit->pit_state.flags = ps->flags; |
3472 | kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start); | 3605 | kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start); |
3473 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 3606 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); |
3474 | return r; | 3607 | return 0; |
3475 | } | 3608 | } |
3476 | 3609 | ||
3477 | static int kvm_vm_ioctl_reinject(struct kvm *kvm, | 3610 | static int kvm_vm_ioctl_reinject(struct kvm *kvm, |
@@ -3556,6 +3689,28 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, | |||
3556 | kvm->arch.disabled_quirks = cap->args[0]; | 3689 | kvm->arch.disabled_quirks = cap->args[0]; |
3557 | r = 0; | 3690 | r = 0; |
3558 | break; | 3691 | break; |
3692 | case KVM_CAP_SPLIT_IRQCHIP: { | ||
3693 | mutex_lock(&kvm->lock); | ||
3694 | r = -EINVAL; | ||
3695 | if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS) | ||
3696 | goto split_irqchip_unlock; | ||
3697 | r = -EEXIST; | ||
3698 | if (irqchip_in_kernel(kvm)) | ||
3699 | goto split_irqchip_unlock; | ||
3700 | if (atomic_read(&kvm->online_vcpus)) | ||
3701 | goto split_irqchip_unlock; | ||
3702 | r = kvm_setup_empty_irq_routing(kvm); | ||
3703 | if (r) | ||
3704 | goto split_irqchip_unlock; | ||
3705 | /* Pairs with irqchip_in_kernel. */ | ||
3706 | smp_wmb(); | ||
3707 | kvm->arch.irqchip_split = true; | ||
3708 | kvm->arch.nr_reserved_ioapic_pins = cap->args[0]; | ||
3709 | r = 0; | ||
3710 | split_irqchip_unlock: | ||
3711 | mutex_unlock(&kvm->lock); | ||
3712 | break; | ||
3713 | } | ||
3559 | default: | 3714 | default: |
3560 | r = -EINVAL; | 3715 | r = -EINVAL; |
3561 | break; | 3716 | break; |
@@ -3669,7 +3824,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3669 | } | 3824 | } |
3670 | 3825 | ||
3671 | r = -ENXIO; | 3826 | r = -ENXIO; |
3672 | if (!irqchip_in_kernel(kvm)) | 3827 | if (!irqchip_in_kernel(kvm) || irqchip_split(kvm)) |
3673 | goto get_irqchip_out; | 3828 | goto get_irqchip_out; |
3674 | r = kvm_vm_ioctl_get_irqchip(kvm, chip); | 3829 | r = kvm_vm_ioctl_get_irqchip(kvm, chip); |
3675 | if (r) | 3830 | if (r) |
@@ -3693,7 +3848,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3693 | } | 3848 | } |
3694 | 3849 | ||
3695 | r = -ENXIO; | 3850 | r = -ENXIO; |
3696 | if (!irqchip_in_kernel(kvm)) | 3851 | if (!irqchip_in_kernel(kvm) || irqchip_split(kvm)) |
3697 | goto set_irqchip_out; | 3852 | goto set_irqchip_out; |
3698 | r = kvm_vm_ioctl_set_irqchip(kvm, chip); | 3853 | r = kvm_vm_ioctl_set_irqchip(kvm, chip); |
3699 | if (r) | 3854 | if (r) |
@@ -4060,6 +4215,15 @@ static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
4060 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); | 4215 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); |
4061 | } | 4216 | } |
4062 | 4217 | ||
4218 | static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt, | ||
4219 | unsigned long addr, void *val, unsigned int bytes) | ||
4220 | { | ||
4221 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4222 | int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes); | ||
4223 | |||
4224 | return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE; | ||
4225 | } | ||
4226 | |||
4063 | int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | 4227 | int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, |
4064 | gva_t addr, void *val, | 4228 | gva_t addr, void *val, |
4065 | unsigned int bytes, | 4229 | unsigned int bytes, |
@@ -4795,6 +4959,7 @@ static const struct x86_emulate_ops emulate_ops = { | |||
4795 | .write_gpr = emulator_write_gpr, | 4959 | .write_gpr = emulator_write_gpr, |
4796 | .read_std = kvm_read_guest_virt_system, | 4960 | .read_std = kvm_read_guest_virt_system, |
4797 | .write_std = kvm_write_guest_virt_system, | 4961 | .write_std = kvm_write_guest_virt_system, |
4962 | .read_phys = kvm_read_guest_phys_system, | ||
4798 | .fetch = kvm_fetch_guest_virt, | 4963 | .fetch = kvm_fetch_guest_virt, |
4799 | .read_emulated = emulator_read_emulated, | 4964 | .read_emulated = emulator_read_emulated, |
4800 | .write_emulated = emulator_write_emulated, | 4965 | .write_emulated = emulator_write_emulated, |
@@ -5667,7 +5832,7 @@ void kvm_arch_exit(void) | |||
5667 | int kvm_vcpu_halt(struct kvm_vcpu *vcpu) | 5832 | int kvm_vcpu_halt(struct kvm_vcpu *vcpu) |
5668 | { | 5833 | { |
5669 | ++vcpu->stat.halt_exits; | 5834 | ++vcpu->stat.halt_exits; |
5670 | if (irqchip_in_kernel(vcpu->kvm)) { | 5835 | if (lapic_in_kernel(vcpu)) { |
5671 | vcpu->arch.mp_state = KVM_MP_STATE_HALTED; | 5836 | vcpu->arch.mp_state = KVM_MP_STATE_HALTED; |
5672 | return 1; | 5837 | return 1; |
5673 | } else { | 5838 | } else { |
@@ -5766,17 +5931,10 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) | |||
5766 | return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); | 5931 | return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); |
5767 | } | 5932 | } |
5768 | 5933 | ||
5769 | /* | ||
5770 | * Check if userspace requested an interrupt window, and that the | ||
5771 | * interrupt window is open. | ||
5772 | * | ||
5773 | * No need to exit to userspace if we already have an interrupt queued. | ||
5774 | */ | ||
5775 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) | 5934 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) |
5776 | { | 5935 | { |
5777 | return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && | 5936 | return vcpu->run->request_interrupt_window && |
5778 | vcpu->run->request_interrupt_window && | 5937 | likely(!pic_in_kernel(vcpu->kvm)); |
5779 | kvm_arch_interrupt_allowed(vcpu)); | ||
5780 | } | 5938 | } |
5781 | 5939 | ||
5782 | static void post_kvm_run_save(struct kvm_vcpu *vcpu) | 5940 | static void post_kvm_run_save(struct kvm_vcpu *vcpu) |
@@ -5787,13 +5945,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) | |||
5787 | kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0; | 5945 | kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0; |
5788 | kvm_run->cr8 = kvm_get_cr8(vcpu); | 5946 | kvm_run->cr8 = kvm_get_cr8(vcpu); |
5789 | kvm_run->apic_base = kvm_get_apic_base(vcpu); | 5947 | kvm_run->apic_base = kvm_get_apic_base(vcpu); |
5790 | if (irqchip_in_kernel(vcpu->kvm)) | 5948 | kvm_run->ready_for_interrupt_injection = |
5791 | kvm_run->ready_for_interrupt_injection = 1; | 5949 | pic_in_kernel(vcpu->kvm) || |
5792 | else | 5950 | kvm_vcpu_ready_for_interrupt_injection(vcpu); |
5793 | kvm_run->ready_for_interrupt_injection = | ||
5794 | kvm_arch_interrupt_allowed(vcpu) && | ||
5795 | !kvm_cpu_has_interrupt(vcpu) && | ||
5796 | !kvm_event_needs_reinjection(vcpu); | ||
5797 | } | 5951 | } |
5798 | 5952 | ||
5799 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) | 5953 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) |
@@ -6144,18 +6298,18 @@ static void process_smi(struct kvm_vcpu *vcpu) | |||
6144 | 6298 | ||
6145 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) | 6299 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) |
6146 | { | 6300 | { |
6147 | u64 eoi_exit_bitmap[4]; | ||
6148 | u32 tmr[8]; | ||
6149 | |||
6150 | if (!kvm_apic_hw_enabled(vcpu->arch.apic)) | 6301 | if (!kvm_apic_hw_enabled(vcpu->arch.apic)) |
6151 | return; | 6302 | return; |
6152 | 6303 | ||
6153 | memset(eoi_exit_bitmap, 0, 32); | 6304 | memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8); |
6154 | memset(tmr, 0, 32); | ||
6155 | 6305 | ||
6156 | kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr); | 6306 | if (irqchip_split(vcpu->kvm)) |
6157 | kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); | 6307 | kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap); |
6158 | kvm_apic_update_tmr(vcpu, tmr); | 6308 | else { |
6309 | kvm_x86_ops->sync_pir_to_irr(vcpu); | ||
6310 | kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap); | ||
6311 | } | ||
6312 | kvm_x86_ops->load_eoi_exitmap(vcpu); | ||
6159 | } | 6313 | } |
6160 | 6314 | ||
6161 | static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) | 6315 | static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) |
@@ -6168,7 +6322,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) | |||
6168 | { | 6322 | { |
6169 | struct page *page = NULL; | 6323 | struct page *page = NULL; |
6170 | 6324 | ||
6171 | if (!irqchip_in_kernel(vcpu->kvm)) | 6325 | if (!lapic_in_kernel(vcpu)) |
6172 | return; | 6326 | return; |
6173 | 6327 | ||
6174 | if (!kvm_x86_ops->set_apic_access_page_addr) | 6328 | if (!kvm_x86_ops->set_apic_access_page_addr) |
@@ -6206,8 +6360,10 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | |||
6206 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 6360 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
6207 | { | 6361 | { |
6208 | int r; | 6362 | int r; |
6209 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 6363 | bool req_int_win = |
6210 | vcpu->run->request_interrupt_window; | 6364 | dm_request_for_irq_injection(vcpu) && |
6365 | kvm_cpu_accept_dm_intr(vcpu); | ||
6366 | |||
6211 | bool req_immediate_exit = false; | 6367 | bool req_immediate_exit = false; |
6212 | 6368 | ||
6213 | if (vcpu->requests) { | 6369 | if (vcpu->requests) { |
@@ -6258,6 +6414,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6258 | kvm_pmu_handle_event(vcpu); | 6414 | kvm_pmu_handle_event(vcpu); |
6259 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) | 6415 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) |
6260 | kvm_pmu_deliver_pmi(vcpu); | 6416 | kvm_pmu_deliver_pmi(vcpu); |
6417 | if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) { | ||
6418 | BUG_ON(vcpu->arch.pending_ioapic_eoi > 255); | ||
6419 | if (test_bit(vcpu->arch.pending_ioapic_eoi, | ||
6420 | (void *) vcpu->arch.eoi_exit_bitmap)) { | ||
6421 | vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI; | ||
6422 | vcpu->run->eoi.vector = | ||
6423 | vcpu->arch.pending_ioapic_eoi; | ||
6424 | r = 0; | ||
6425 | goto out; | ||
6426 | } | ||
6427 | } | ||
6261 | if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) | 6428 | if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) |
6262 | vcpu_scan_ioapic(vcpu); | 6429 | vcpu_scan_ioapic(vcpu); |
6263 | if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) | 6430 | if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) |
@@ -6268,6 +6435,26 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6268 | r = 0; | 6435 | r = 0; |
6269 | goto out; | 6436 | goto out; |
6270 | } | 6437 | } |
6438 | if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) { | ||
6439 | vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; | ||
6440 | vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET; | ||
6441 | r = 0; | ||
6442 | goto out; | ||
6443 | } | ||
6444 | } | ||
6445 | |||
6446 | /* | ||
6447 | * KVM_REQ_EVENT is not set when posted interrupts are set by | ||
6448 | * VT-d hardware, so we have to update RVI unconditionally. | ||
6449 | */ | ||
6450 | if (kvm_lapic_enabled(vcpu)) { | ||
6451 | /* | ||
6452 | * Update architecture specific hints for APIC | ||
6453 | * virtual interrupt delivery. | ||
6454 | */ | ||
6455 | if (kvm_x86_ops->hwapic_irr_update) | ||
6456 | kvm_x86_ops->hwapic_irr_update(vcpu, | ||
6457 | kvm_lapic_find_highest_irr(vcpu)); | ||
6271 | } | 6458 | } |
6272 | 6459 | ||
6273 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 6460 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
@@ -6286,13 +6473,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6286 | kvm_x86_ops->enable_irq_window(vcpu); | 6473 | kvm_x86_ops->enable_irq_window(vcpu); |
6287 | 6474 | ||
6288 | if (kvm_lapic_enabled(vcpu)) { | 6475 | if (kvm_lapic_enabled(vcpu)) { |
6289 | /* | ||
6290 | * Update architecture specific hints for APIC | ||
6291 | * virtual interrupt delivery. | ||
6292 | */ | ||
6293 | if (kvm_x86_ops->hwapic_irr_update) | ||
6294 | kvm_x86_ops->hwapic_irr_update(vcpu, | ||
6295 | kvm_lapic_find_highest_irr(vcpu)); | ||
6296 | update_cr8_intercept(vcpu); | 6476 | update_cr8_intercept(vcpu); |
6297 | kvm_lapic_sync_to_vapic(vcpu); | 6477 | kvm_lapic_sync_to_vapic(vcpu); |
6298 | } | 6478 | } |
@@ -6376,8 +6556,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6376 | if (hw_breakpoint_active()) | 6556 | if (hw_breakpoint_active()) |
6377 | hw_breakpoint_restore(); | 6557 | hw_breakpoint_restore(); |
6378 | 6558 | ||
6379 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, | 6559 | vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); |
6380 | rdtsc()); | ||
6381 | 6560 | ||
6382 | vcpu->mode = OUTSIDE_GUEST_MODE; | 6561 | vcpu->mode = OUTSIDE_GUEST_MODE; |
6383 | smp_wmb(); | 6562 | smp_wmb(); |
@@ -6428,10 +6607,15 @@ out: | |||
6428 | 6607 | ||
6429 | static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) | 6608 | static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) |
6430 | { | 6609 | { |
6431 | if (!kvm_arch_vcpu_runnable(vcpu)) { | 6610 | if (!kvm_arch_vcpu_runnable(vcpu) && |
6611 | (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) { | ||
6432 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 6612 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
6433 | kvm_vcpu_block(vcpu); | 6613 | kvm_vcpu_block(vcpu); |
6434 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 6614 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
6615 | |||
6616 | if (kvm_x86_ops->post_block) | ||
6617 | kvm_x86_ops->post_block(vcpu); | ||
6618 | |||
6435 | if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) | 6619 | if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) |
6436 | return 1; | 6620 | return 1; |
6437 | } | 6621 | } |
@@ -6468,10 +6652,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu) | |||
6468 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 6652 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
6469 | 6653 | ||
6470 | for (;;) { | 6654 | for (;;) { |
6471 | if (kvm_vcpu_running(vcpu)) | 6655 | if (kvm_vcpu_running(vcpu)) { |
6472 | r = vcpu_enter_guest(vcpu); | 6656 | r = vcpu_enter_guest(vcpu); |
6473 | else | 6657 | } else { |
6474 | r = vcpu_block(kvm, vcpu); | 6658 | r = vcpu_block(kvm, vcpu); |
6659 | } | ||
6660 | |||
6475 | if (r <= 0) | 6661 | if (r <= 0) |
6476 | break; | 6662 | break; |
6477 | 6663 | ||
@@ -6479,9 +6665,10 @@ static int vcpu_run(struct kvm_vcpu *vcpu) | |||
6479 | if (kvm_cpu_has_pending_timer(vcpu)) | 6665 | if (kvm_cpu_has_pending_timer(vcpu)) |
6480 | kvm_inject_pending_timer_irqs(vcpu); | 6666 | kvm_inject_pending_timer_irqs(vcpu); |
6481 | 6667 | ||
6482 | if (dm_request_for_irq_injection(vcpu)) { | 6668 | if (dm_request_for_irq_injection(vcpu) && |
6483 | r = -EINTR; | 6669 | kvm_vcpu_ready_for_interrupt_injection(vcpu)) { |
6484 | vcpu->run->exit_reason = KVM_EXIT_INTR; | 6670 | r = 0; |
6671 | vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | ||
6485 | ++vcpu->stat.request_irq_exits; | 6672 | ++vcpu->stat.request_irq_exits; |
6486 | break; | 6673 | break; |
6487 | } | 6674 | } |
@@ -6608,7 +6795,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
6608 | } | 6795 | } |
6609 | 6796 | ||
6610 | /* re-sync apic's tpr */ | 6797 | /* re-sync apic's tpr */ |
6611 | if (!irqchip_in_kernel(vcpu->kvm)) { | 6798 | if (!lapic_in_kernel(vcpu)) { |
6612 | if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { | 6799 | if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { |
6613 | r = -EINVAL; | 6800 | r = -EINVAL; |
6614 | goto out; | 6801 | goto out; |
@@ -6932,7 +7119,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
6932 | */ | 7119 | */ |
6933 | kvm_set_rflags(vcpu, rflags); | 7120 | kvm_set_rflags(vcpu, rflags); |
6934 | 7121 | ||
6935 | kvm_x86_ops->update_db_bp_intercept(vcpu); | 7122 | kvm_x86_ops->update_bp_intercept(vcpu); |
6936 | 7123 | ||
6937 | r = 0; | 7124 | r = 0; |
6938 | 7125 | ||
@@ -7281,6 +7468,20 @@ int kvm_arch_hardware_setup(void) | |||
7281 | if (r != 0) | 7468 | if (r != 0) |
7282 | return r; | 7469 | return r; |
7283 | 7470 | ||
7471 | if (kvm_has_tsc_control) { | ||
7472 | /* | ||
7473 | * Make sure the user can only configure tsc_khz values that | ||
7474 | * fit into a signed integer. | ||
7475 | * A min value is not calculated needed because it will always | ||
7476 | * be 1 on all machines. | ||
7477 | */ | ||
7478 | u64 max = min(0x7fffffffULL, | ||
7479 | __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz)); | ||
7480 | kvm_max_guest_tsc_khz = max; | ||
7481 | |||
7482 | kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits; | ||
7483 | } | ||
7484 | |||
7284 | kvm_init_msr_list(); | 7485 | kvm_init_msr_list(); |
7285 | return 0; | 7486 | return 0; |
7286 | } | 7487 | } |
@@ -7308,7 +7509,7 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) | |||
7308 | 7509 | ||
7309 | bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) | 7510 | bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) |
7310 | { | 7511 | { |
7311 | return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); | 7512 | return irqchip_in_kernel(vcpu->kvm) == lapic_in_kernel(vcpu); |
7312 | } | 7513 | } |
7313 | 7514 | ||
7314 | struct static_key kvm_no_apic_vcpu __read_mostly; | 7515 | struct static_key kvm_no_apic_vcpu __read_mostly; |
@@ -7377,6 +7578,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
7377 | kvm_async_pf_hash_reset(vcpu); | 7578 | kvm_async_pf_hash_reset(vcpu); |
7378 | kvm_pmu_init(vcpu); | 7579 | kvm_pmu_init(vcpu); |
7379 | 7580 | ||
7581 | vcpu->arch.pending_external_vector = -1; | ||
7582 | |||
7380 | return 0; | 7583 | return 0; |
7381 | 7584 | ||
7382 | fail_free_mce_banks: | 7585 | fail_free_mce_banks: |
@@ -7402,7 +7605,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
7402 | kvm_mmu_destroy(vcpu); | 7605 | kvm_mmu_destroy(vcpu); |
7403 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | 7606 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
7404 | free_page((unsigned long)vcpu->arch.pio_data); | 7607 | free_page((unsigned long)vcpu->arch.pio_data); |
7405 | if (!irqchip_in_kernel(vcpu->kvm)) | 7608 | if (!lapic_in_kernel(vcpu)) |
7406 | static_key_slow_dec(&kvm_no_apic_vcpu); | 7609 | static_key_slow_dec(&kvm_no_apic_vcpu); |
7407 | } | 7610 | } |
7408 | 7611 | ||
@@ -8029,7 +8232,59 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) | |||
8029 | } | 8232 | } |
8030 | EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); | 8233 | EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); |
8031 | 8234 | ||
8235 | int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, | ||
8236 | struct irq_bypass_producer *prod) | ||
8237 | { | ||
8238 | struct kvm_kernel_irqfd *irqfd = | ||
8239 | container_of(cons, struct kvm_kernel_irqfd, consumer); | ||
8240 | |||
8241 | if (kvm_x86_ops->update_pi_irte) { | ||
8242 | irqfd->producer = prod; | ||
8243 | return kvm_x86_ops->update_pi_irte(irqfd->kvm, | ||
8244 | prod->irq, irqfd->gsi, 1); | ||
8245 | } | ||
8246 | |||
8247 | return -EINVAL; | ||
8248 | } | ||
8249 | |||
8250 | void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, | ||
8251 | struct irq_bypass_producer *prod) | ||
8252 | { | ||
8253 | int ret; | ||
8254 | struct kvm_kernel_irqfd *irqfd = | ||
8255 | container_of(cons, struct kvm_kernel_irqfd, consumer); | ||
8256 | |||
8257 | if (!kvm_x86_ops->update_pi_irte) { | ||
8258 | WARN_ON(irqfd->producer != NULL); | ||
8259 | return; | ||
8260 | } | ||
8261 | |||
8262 | WARN_ON(irqfd->producer != prod); | ||
8263 | irqfd->producer = NULL; | ||
8264 | |||
8265 | /* | ||
8266 | * When producer of consumer is unregistered, we change back to | ||
8267 | * remapped mode, so we can re-use the current implementation | ||
8268 | * when the irq is masked/disabed or the consumer side (KVM | ||
8269 | * int this case doesn't want to receive the interrupts. | ||
8270 | */ | ||
8271 | ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0); | ||
8272 | if (ret) | ||
8273 | printk(KERN_INFO "irq bypass consumer (token %p) unregistration" | ||
8274 | " fails: %d\n", irqfd->consumer.token, ret); | ||
8275 | } | ||
8276 | |||
8277 | int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, | ||
8278 | uint32_t guest_irq, bool set) | ||
8279 | { | ||
8280 | if (!kvm_x86_ops->update_pi_irte) | ||
8281 | return -EINVAL; | ||
8282 | |||
8283 | return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set); | ||
8284 | } | ||
8285 | |||
8032 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); | 8286 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); |
8287 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); | ||
8033 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); | 8288 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); |
8034 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); | 8289 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); |
8035 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); | 8290 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); |
@@ -8044,3 +8299,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); | |||
8044 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); | 8299 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); |
8045 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); | 8300 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); |
8046 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full); | 8301 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full); |
8302 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update); | ||
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 1bf417e9cc13..0f1c6fc3ddd8 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -89,7 +89,7 @@ static struct addr_marker address_markers[] = { | |||
89 | { 0/* VMALLOC_START */, "vmalloc() Area" }, | 89 | { 0/* VMALLOC_START */, "vmalloc() Area" }, |
90 | { 0/*VMALLOC_END*/, "vmalloc() End" }, | 90 | { 0/*VMALLOC_END*/, "vmalloc() End" }, |
91 | # ifdef CONFIG_HIGHMEM | 91 | # ifdef CONFIG_HIGHMEM |
92 | { 0/*PKMAP_BASE*/, "Persisent kmap() Area" }, | 92 | { 0/*PKMAP_BASE*/, "Persistent kmap() Area" }, |
93 | # endif | 93 | # endif |
94 | { 0/*FIXADDR_START*/, "Fixmap Area" }, | 94 | { 0/*FIXADDR_START*/, "Fixmap Area" }, |
95 | #endif | 95 | #endif |
@@ -358,6 +358,21 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, | |||
358 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) | 358 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) |
359 | #endif | 359 | #endif |
360 | 360 | ||
361 | #ifdef CONFIG_X86_64 | ||
362 | static inline bool is_hypervisor_range(int idx) | ||
363 | { | ||
364 | /* | ||
365 | * ffff800000000000 - ffff87ffffffffff is reserved for | ||
366 | * the hypervisor. | ||
367 | */ | ||
368 | return paravirt_enabled() && | ||
369 | (idx >= pgd_index(__PAGE_OFFSET) - 16) && | ||
370 | (idx < pgd_index(__PAGE_OFFSET)); | ||
371 | } | ||
372 | #else | ||
373 | static inline bool is_hypervisor_range(int idx) { return false; } | ||
374 | #endif | ||
375 | |||
361 | static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, | 376 | static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, |
362 | bool checkwx) | 377 | bool checkwx) |
363 | { | 378 | { |
@@ -381,7 +396,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, | |||
381 | 396 | ||
382 | for (i = 0; i < PTRS_PER_PGD; i++) { | 397 | for (i = 0; i < PTRS_PER_PGD; i++) { |
383 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); | 398 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); |
384 | if (!pgd_none(*start)) { | 399 | if (!pgd_none(*start) && !is_hypervisor_range(i)) { |
385 | if (pgd_large(*start) || !pgd_present(*start)) { | 400 | if (pgd_large(*start) || !pgd_present(*start)) { |
386 | prot = pgd_flags(*start); | 401 | prot = pgd_flags(*start); |
387 | note_page(m, &st, __pgprot(prot), 1); | 402 | note_page(m, &st, __pgprot(prot), 1); |
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index eecb207a2037..a6d739258137 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c | |||
@@ -104,20 +104,6 @@ void __kunmap_atomic(void *kvaddr) | |||
104 | } | 104 | } |
105 | EXPORT_SYMBOL(__kunmap_atomic); | 105 | EXPORT_SYMBOL(__kunmap_atomic); |
106 | 106 | ||
107 | struct page *kmap_atomic_to_page(void *ptr) | ||
108 | { | ||
109 | unsigned long idx, vaddr = (unsigned long)ptr; | ||
110 | pte_t *pte; | ||
111 | |||
112 | if (vaddr < FIXADDR_START) | ||
113 | return virt_to_page(ptr); | ||
114 | |||
115 | idx = virt_to_fix(vaddr); | ||
116 | pte = kmap_pte - (idx - FIX_KMAP_BEGIN); | ||
117 | return pte_page(*pte); | ||
118 | } | ||
119 | EXPORT_SYMBOL(kmap_atomic_to_page); | ||
120 | |||
121 | void __init set_highmem_pages_init(void) | 107 | void __init set_highmem_pages_init(void) |
122 | { | 108 | { |
123 | struct zone *zone; | 109 | struct zone *zone; |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 1f37cb2b56a9..493f54172b4a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -354,7 +354,7 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, | |||
354 | } | 354 | } |
355 | 355 | ||
356 | for (i = 0; i < nr_range; i++) | 356 | for (i = 0; i < nr_range; i++) |
357 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", | 357 | pr_debug(" [mem %#010lx-%#010lx] page %s\n", |
358 | mr[i].start, mr[i].end - 1, | 358 | mr[i].start, mr[i].end - 1, |
359 | page_size_string(&mr[i])); | 359 | page_size_string(&mr[i])); |
360 | 360 | ||
@@ -401,7 +401,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
401 | unsigned long ret = 0; | 401 | unsigned long ret = 0; |
402 | int nr_range, i; | 402 | int nr_range, i; |
403 | 403 | ||
404 | pr_info("init_memory_mapping: [mem %#010lx-%#010lx]\n", | 404 | pr_debug("init_memory_mapping: [mem %#010lx-%#010lx]\n", |
405 | start, end - 1); | 405 | start, end - 1); |
406 | 406 | ||
407 | memset(mr, 0, sizeof(mr)); | 407 | memset(mr, 0, sizeof(mr)); |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5ed62eff31bd..ec081fe0ce2c 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -1270,7 +1270,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, | |||
1270 | /* check to see if we have contiguous blocks */ | 1270 | /* check to see if we have contiguous blocks */ |
1271 | if (p_end != p || node_start != node) { | 1271 | if (p_end != p || node_start != node) { |
1272 | if (p_start) | 1272 | if (p_start) |
1273 | printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", | 1273 | pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n", |
1274 | addr_start, addr_end-1, p_start, p_end-1, node_start); | 1274 | addr_start, addr_end-1, p_start, p_end-1, node_start); |
1275 | addr_start = addr; | 1275 | addr_start = addr; |
1276 | node_start = node; | 1276 | node_start = node; |
@@ -1368,7 +1368,7 @@ void register_page_bootmem_memmap(unsigned long section_nr, | |||
1368 | void __meminit vmemmap_populate_print_last(void) | 1368 | void __meminit vmemmap_populate_print_last(void) |
1369 | { | 1369 | { |
1370 | if (p_start) { | 1370 | if (p_start) { |
1371 | printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", | 1371 | pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n", |
1372 | addr_start, addr_end-1, p_start, p_end-1, node_start); | 1372 | addr_start, addr_end-1, p_start, p_end-1, node_start); |
1373 | p_start = NULL; | 1373 | p_start = NULL; |
1374 | p_end = NULL; | 1374 | p_end = NULL; |
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 9ce5da27b136..d470cf219a2d 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c | |||
@@ -126,5 +126,5 @@ void __init kasan_init(void) | |||
126 | __flush_tlb_all(); | 126 | __flush_tlb_all(); |
127 | init_task.kasan_depth = 0; | 127 | init_task.kasan_depth = 0; |
128 | 128 | ||
129 | pr_info("Kernel address sanitizer initialized\n"); | 129 | pr_info("KernelAddressSanitizer initialized\n"); |
130 | } | 130 | } |
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index b0ae85f90f10..b2fd67da1701 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c | |||
@@ -101,19 +101,19 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs, | |||
101 | switch (type) { | 101 | switch (type) { |
102 | case REG_TYPE_RM: | 102 | case REG_TYPE_RM: |
103 | regno = X86_MODRM_RM(insn->modrm.value); | 103 | regno = X86_MODRM_RM(insn->modrm.value); |
104 | if (X86_REX_B(insn->rex_prefix.value) == 1) | 104 | if (X86_REX_B(insn->rex_prefix.value)) |
105 | regno += 8; | 105 | regno += 8; |
106 | break; | 106 | break; |
107 | 107 | ||
108 | case REG_TYPE_INDEX: | 108 | case REG_TYPE_INDEX: |
109 | regno = X86_SIB_INDEX(insn->sib.value); | 109 | regno = X86_SIB_INDEX(insn->sib.value); |
110 | if (X86_REX_X(insn->rex_prefix.value) == 1) | 110 | if (X86_REX_X(insn->rex_prefix.value)) |
111 | regno += 8; | 111 | regno += 8; |
112 | break; | 112 | break; |
113 | 113 | ||
114 | case REG_TYPE_BASE: | 114 | case REG_TYPE_BASE: |
115 | regno = X86_SIB_BASE(insn->sib.value); | 115 | regno = X86_SIB_BASE(insn->sib.value); |
116 | if (X86_REX_B(insn->rex_prefix.value) == 1) | 116 | if (X86_REX_B(insn->rex_prefix.value)) |
117 | regno += 8; | 117 | regno += 8; |
118 | break; | 118 | break; |
119 | 119 | ||
@@ -586,6 +586,29 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm, | |||
586 | } | 586 | } |
587 | 587 | ||
588 | /* | 588 | /* |
589 | * We only want to do a 4-byte get_user() on 32-bit. Otherwise, | ||
590 | * we might run off the end of the bounds table if we are on | ||
591 | * a 64-bit kernel and try to get 8 bytes. | ||
592 | */ | ||
593 | int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret, | ||
594 | long __user *bd_entry_ptr) | ||
595 | { | ||
596 | u32 bd_entry_32; | ||
597 | int ret; | ||
598 | |||
599 | if (is_64bit_mm(mm)) | ||
600 | return get_user(*bd_entry_ret, bd_entry_ptr); | ||
601 | |||
602 | /* | ||
603 | * Note that get_user() uses the type of the *pointer* to | ||
604 | * establish the size of the get, not the destination. | ||
605 | */ | ||
606 | ret = get_user(bd_entry_32, (u32 __user *)bd_entry_ptr); | ||
607 | *bd_entry_ret = bd_entry_32; | ||
608 | return ret; | ||
609 | } | ||
610 | |||
611 | /* | ||
589 | * Get the base of bounds tables pointed by specific bounds | 612 | * Get the base of bounds tables pointed by specific bounds |
590 | * directory entry. | 613 | * directory entry. |
591 | */ | 614 | */ |
@@ -605,7 +628,7 @@ static int get_bt_addr(struct mm_struct *mm, | |||
605 | int need_write = 0; | 628 | int need_write = 0; |
606 | 629 | ||
607 | pagefault_disable(); | 630 | pagefault_disable(); |
608 | ret = get_user(bd_entry, bd_entry_ptr); | 631 | ret = get_user_bd_entry(mm, &bd_entry, bd_entry_ptr); |
609 | pagefault_enable(); | 632 | pagefault_enable(); |
610 | if (!ret) | 633 | if (!ret) |
611 | break; | 634 | break; |
@@ -700,11 +723,23 @@ static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm, | |||
700 | */ | 723 | */ |
701 | static inline unsigned long bd_entry_virt_space(struct mm_struct *mm) | 724 | static inline unsigned long bd_entry_virt_space(struct mm_struct *mm) |
702 | { | 725 | { |
703 | unsigned long long virt_space = (1ULL << boot_cpu_data.x86_virt_bits); | 726 | unsigned long long virt_space; |
704 | if (is_64bit_mm(mm)) | 727 | unsigned long long GB = (1ULL << 30); |
705 | return virt_space / MPX_BD_NR_ENTRIES_64; | 728 | |
706 | else | 729 | /* |
707 | return virt_space / MPX_BD_NR_ENTRIES_32; | 730 | * This covers 32-bit emulation as well as 32-bit kernels |
731 | * running on 64-bit harware. | ||
732 | */ | ||
733 | if (!is_64bit_mm(mm)) | ||
734 | return (4ULL * GB) / MPX_BD_NR_ENTRIES_32; | ||
735 | |||
736 | /* | ||
737 | * 'x86_virt_bits' returns what the hardware is capable | ||
738 | * of, and returns the full >32-bit adddress space when | ||
739 | * running 32-bit kernels on 64-bit hardware. | ||
740 | */ | ||
741 | virt_space = (1ULL << boot_cpu_data.x86_virt_bits); | ||
742 | return virt_space / MPX_BD_NR_ENTRIES_64; | ||
708 | } | 743 | } |
709 | 744 | ||
710 | /* | 745 | /* |
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 70efcd0940f9..75991979f667 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c | |||
@@ -1109,7 +1109,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog) | |||
1109 | bpf_flush_icache(header, image + proglen); | 1109 | bpf_flush_icache(header, image + proglen); |
1110 | set_memory_ro((unsigned long)header, header->pages); | 1110 | set_memory_ro((unsigned long)header, header->pages); |
1111 | prog->bpf_func = (void *)image; | 1111 | prog->bpf_func = (void *)image; |
1112 | prog->jited = true; | 1112 | prog->jited = 1; |
1113 | } | 1113 | } |
1114 | out: | 1114 | out: |
1115 | kfree(addrs); | 1115 | kfree(addrs); |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index ff9911707160..3cd69832d7f4 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -4,16 +4,15 @@ | |||
4 | #include <linux/irq.h> | 4 | #include <linux/irq.h> |
5 | #include <linux/dmi.h> | 5 | #include <linux/dmi.h> |
6 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
7 | #include <linux/pci-acpi.h> | ||
7 | #include <asm/numa.h> | 8 | #include <asm/numa.h> |
8 | #include <asm/pci_x86.h> | 9 | #include <asm/pci_x86.h> |
9 | 10 | ||
10 | struct pci_root_info { | 11 | struct pci_root_info { |
11 | struct acpi_device *bridge; | 12 | struct acpi_pci_root_info common; |
12 | char name[16]; | ||
13 | struct pci_sysdata sd; | 13 | struct pci_sysdata sd; |
14 | #ifdef CONFIG_PCI_MMCONFIG | 14 | #ifdef CONFIG_PCI_MMCONFIG |
15 | bool mcfg_added; | 15 | bool mcfg_added; |
16 | u16 segment; | ||
17 | u8 start_bus; | 16 | u8 start_bus; |
18 | u8 end_bus; | 17 | u8 end_bus; |
19 | #endif | 18 | #endif |
@@ -178,15 +177,18 @@ static int check_segment(u16 seg, struct device *dev, char *estr) | |||
178 | return 0; | 177 | return 0; |
179 | } | 178 | } |
180 | 179 | ||
181 | static int setup_mcfg_map(struct pci_root_info *info, u16 seg, u8 start, | 180 | static int setup_mcfg_map(struct acpi_pci_root_info *ci) |
182 | u8 end, phys_addr_t addr) | ||
183 | { | 181 | { |
184 | int result; | 182 | int result, seg; |
185 | struct device *dev = &info->bridge->dev; | 183 | struct pci_root_info *info; |
184 | struct acpi_pci_root *root = ci->root; | ||
185 | struct device *dev = &ci->bridge->dev; | ||
186 | 186 | ||
187 | info->start_bus = start; | 187 | info = container_of(ci, struct pci_root_info, common); |
188 | info->end_bus = end; | 188 | info->start_bus = (u8)root->secondary.start; |
189 | info->end_bus = (u8)root->secondary.end; | ||
189 | info->mcfg_added = false; | 190 | info->mcfg_added = false; |
191 | seg = info->sd.domain; | ||
190 | 192 | ||
191 | /* return success if MMCFG is not in use */ | 193 | /* return success if MMCFG is not in use */ |
192 | if (raw_pci_ext_ops && raw_pci_ext_ops != &pci_mmcfg) | 194 | if (raw_pci_ext_ops && raw_pci_ext_ops != &pci_mmcfg) |
@@ -195,7 +197,8 @@ static int setup_mcfg_map(struct pci_root_info *info, u16 seg, u8 start, | |||
195 | if (!(pci_probe & PCI_PROBE_MMCONF)) | 197 | if (!(pci_probe & PCI_PROBE_MMCONF)) |
196 | return check_segment(seg, dev, "MMCONFIG is disabled,"); | 198 | return check_segment(seg, dev, "MMCONFIG is disabled,"); |
197 | 199 | ||
198 | result = pci_mmconfig_insert(dev, seg, start, end, addr); | 200 | result = pci_mmconfig_insert(dev, seg, info->start_bus, info->end_bus, |
201 | root->mcfg_addr); | ||
199 | if (result == 0) { | 202 | if (result == 0) { |
200 | /* enable MMCFG if it hasn't been enabled yet */ | 203 | /* enable MMCFG if it hasn't been enabled yet */ |
201 | if (raw_pci_ext_ops == NULL) | 204 | if (raw_pci_ext_ops == NULL) |
@@ -208,134 +211,55 @@ static int setup_mcfg_map(struct pci_root_info *info, u16 seg, u8 start, | |||
208 | return 0; | 211 | return 0; |
209 | } | 212 | } |
210 | 213 | ||
211 | static void teardown_mcfg_map(struct pci_root_info *info) | 214 | static void teardown_mcfg_map(struct acpi_pci_root_info *ci) |
212 | { | 215 | { |
216 | struct pci_root_info *info; | ||
217 | |||
218 | info = container_of(ci, struct pci_root_info, common); | ||
213 | if (info->mcfg_added) { | 219 | if (info->mcfg_added) { |
214 | pci_mmconfig_delete(info->segment, info->start_bus, | 220 | pci_mmconfig_delete(info->sd.domain, |
215 | info->end_bus); | 221 | info->start_bus, info->end_bus); |
216 | info->mcfg_added = false; | 222 | info->mcfg_added = false; |
217 | } | 223 | } |
218 | } | 224 | } |
219 | #else | 225 | #else |
220 | static int setup_mcfg_map(struct pci_root_info *info, | 226 | static int setup_mcfg_map(struct acpi_pci_root_info *ci) |
221 | u16 seg, u8 start, u8 end, | ||
222 | phys_addr_t addr) | ||
223 | { | 227 | { |
224 | return 0; | 228 | return 0; |
225 | } | 229 | } |
226 | static void teardown_mcfg_map(struct pci_root_info *info) | 230 | |
231 | static void teardown_mcfg_map(struct acpi_pci_root_info *ci) | ||
227 | { | 232 | { |
228 | } | 233 | } |
229 | #endif | 234 | #endif |
230 | 235 | ||
231 | static void validate_resources(struct device *dev, struct list_head *crs_res, | 236 | static int pci_acpi_root_get_node(struct acpi_pci_root *root) |
232 | unsigned long type) | ||
233 | { | 237 | { |
234 | LIST_HEAD(list); | 238 | int busnum = root->secondary.start; |
235 | struct resource *res1, *res2, *root = NULL; | 239 | struct acpi_device *device = root->device; |
236 | struct resource_entry *tmp, *entry, *entry2; | 240 | int node = acpi_get_node(device->handle); |
237 | |||
238 | BUG_ON((type & (IORESOURCE_MEM | IORESOURCE_IO)) == 0); | ||
239 | root = (type & IORESOURCE_MEM) ? &iomem_resource : &ioport_resource; | ||
240 | |||
241 | list_splice_init(crs_res, &list); | ||
242 | resource_list_for_each_entry_safe(entry, tmp, &list) { | ||
243 | bool free = false; | ||
244 | resource_size_t end; | ||
245 | |||
246 | res1 = entry->res; | ||
247 | if (!(res1->flags & type)) | ||
248 | goto next; | ||
249 | |||
250 | /* Exclude non-addressable range or non-addressable portion */ | ||
251 | end = min(res1->end, root->end); | ||
252 | if (end <= res1->start) { | ||
253 | dev_info(dev, "host bridge window %pR (ignored, not CPU addressable)\n", | ||
254 | res1); | ||
255 | free = true; | ||
256 | goto next; | ||
257 | } else if (res1->end != end) { | ||
258 | dev_info(dev, "host bridge window %pR ([%#llx-%#llx] ignored, not CPU addressable)\n", | ||
259 | res1, (unsigned long long)end + 1, | ||
260 | (unsigned long long)res1->end); | ||
261 | res1->end = end; | ||
262 | } | ||
263 | |||
264 | resource_list_for_each_entry(entry2, crs_res) { | ||
265 | res2 = entry2->res; | ||
266 | if (!(res2->flags & type)) | ||
267 | continue; | ||
268 | |||
269 | /* | ||
270 | * I don't like throwing away windows because then | ||
271 | * our resources no longer match the ACPI _CRS, but | ||
272 | * the kernel resource tree doesn't allow overlaps. | ||
273 | */ | ||
274 | if (resource_overlaps(res1, res2)) { | ||
275 | res2->start = min(res1->start, res2->start); | ||
276 | res2->end = max(res1->end, res2->end); | ||
277 | dev_info(dev, "host bridge window expanded to %pR; %pR ignored\n", | ||
278 | res2, res1); | ||
279 | free = true; | ||
280 | goto next; | ||
281 | } | ||
282 | } | ||
283 | 241 | ||
284 | next: | 242 | if (node == NUMA_NO_NODE) { |
285 | resource_list_del(entry); | 243 | node = x86_pci_root_bus_node(busnum); |
286 | if (free) | 244 | if (node != 0 && node != NUMA_NO_NODE) |
287 | resource_list_free_entry(entry); | 245 | dev_info(&device->dev, FW_BUG "no _PXM; falling back to node %d from hardware (may be inconsistent with ACPI node numbers)\n", |
288 | else | 246 | node); |
289 | resource_list_add_tail(entry, crs_res); | ||
290 | } | 247 | } |
248 | if (node != NUMA_NO_NODE && !node_online(node)) | ||
249 | node = NUMA_NO_NODE; | ||
250 | |||
251 | return node; | ||
291 | } | 252 | } |
292 | 253 | ||
293 | static void add_resources(struct pci_root_info *info, | 254 | static int pci_acpi_root_init_info(struct acpi_pci_root_info *ci) |
294 | struct list_head *resources, | ||
295 | struct list_head *crs_res) | ||
296 | { | 255 | { |
297 | struct resource_entry *entry, *tmp; | 256 | return setup_mcfg_map(ci); |
298 | struct resource *res, *conflict, *root = NULL; | ||
299 | |||
300 | validate_resources(&info->bridge->dev, crs_res, IORESOURCE_MEM); | ||
301 | validate_resources(&info->bridge->dev, crs_res, IORESOURCE_IO); | ||
302 | |||
303 | resource_list_for_each_entry_safe(entry, tmp, crs_res) { | ||
304 | res = entry->res; | ||
305 | if (res->flags & IORESOURCE_MEM) | ||
306 | root = &iomem_resource; | ||
307 | else if (res->flags & IORESOURCE_IO) | ||
308 | root = &ioport_resource; | ||
309 | else | ||
310 | BUG_ON(res); | ||
311 | |||
312 | conflict = insert_resource_conflict(root, res); | ||
313 | if (conflict) { | ||
314 | dev_info(&info->bridge->dev, | ||
315 | "ignoring host bridge window %pR (conflicts with %s %pR)\n", | ||
316 | res, conflict->name, conflict); | ||
317 | resource_list_destroy_entry(entry); | ||
318 | } | ||
319 | } | ||
320 | |||
321 | list_splice_tail(crs_res, resources); | ||
322 | } | 257 | } |
323 | 258 | ||
324 | static void release_pci_root_info(struct pci_host_bridge *bridge) | 259 | static void pci_acpi_root_release_info(struct acpi_pci_root_info *ci) |
325 | { | 260 | { |
326 | struct resource *res; | 261 | teardown_mcfg_map(ci); |
327 | struct resource_entry *entry; | 262 | kfree(container_of(ci, struct pci_root_info, common)); |
328 | struct pci_root_info *info = bridge->release_data; | ||
329 | |||
330 | resource_list_for_each_entry(entry, &bridge->windows) { | ||
331 | res = entry->res; | ||
332 | if (res->parent && | ||
333 | (res->flags & (IORESOURCE_MEM | IORESOURCE_IO))) | ||
334 | release_resource(res); | ||
335 | } | ||
336 | |||
337 | teardown_mcfg_map(info); | ||
338 | kfree(info); | ||
339 | } | 263 | } |
340 | 264 | ||
341 | /* | 265 | /* |
@@ -358,50 +282,47 @@ static bool resource_is_pcicfg_ioport(struct resource *res) | |||
358 | res->start == 0xCF8 && res->end == 0xCFF; | 282 | res->start == 0xCF8 && res->end == 0xCFF; |
359 | } | 283 | } |
360 | 284 | ||
361 | static void probe_pci_root_info(struct pci_root_info *info, | 285 | static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci) |
362 | struct acpi_device *device, | ||
363 | int busnum, int domain, | ||
364 | struct list_head *list) | ||
365 | { | 286 | { |
366 | int ret; | 287 | struct acpi_device *device = ci->bridge; |
288 | int busnum = ci->root->secondary.start; | ||
367 | struct resource_entry *entry, *tmp; | 289 | struct resource_entry *entry, *tmp; |
290 | int status; | ||
368 | 291 | ||
369 | sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum); | 292 | status = acpi_pci_probe_root_resources(ci); |
370 | info->bridge = device; | 293 | if (pci_use_crs) { |
371 | ret = acpi_dev_get_resources(device, list, | 294 | resource_list_for_each_entry_safe(entry, tmp, &ci->resources) |
372 | acpi_dev_filter_resource_type_cb, | 295 | if (resource_is_pcicfg_ioport(entry->res)) |
373 | (void *)(IORESOURCE_IO | IORESOURCE_MEM)); | ||
374 | if (ret < 0) | ||
375 | dev_warn(&device->dev, | ||
376 | "failed to parse _CRS method, error code %d\n", ret); | ||
377 | else if (ret == 0) | ||
378 | dev_dbg(&device->dev, | ||
379 | "no IO and memory resources present in _CRS\n"); | ||
380 | else | ||
381 | resource_list_for_each_entry_safe(entry, tmp, list) { | ||
382 | if ((entry->res->flags & IORESOURCE_DISABLED) || | ||
383 | resource_is_pcicfg_ioport(entry->res)) | ||
384 | resource_list_destroy_entry(entry); | 296 | resource_list_destroy_entry(entry); |
385 | else | 297 | return status; |
386 | entry->res->name = info->name; | 298 | } |
387 | } | 299 | |
300 | resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { | ||
301 | dev_printk(KERN_DEBUG, &device->dev, | ||
302 | "host bridge window %pR (ignored)\n", entry->res); | ||
303 | resource_list_destroy_entry(entry); | ||
304 | } | ||
305 | x86_pci_root_bus_resources(busnum, &ci->resources); | ||
306 | |||
307 | return 0; | ||
388 | } | 308 | } |
389 | 309 | ||
310 | static struct acpi_pci_root_ops acpi_pci_root_ops = { | ||
311 | .pci_ops = &pci_root_ops, | ||
312 | .init_info = pci_acpi_root_init_info, | ||
313 | .release_info = pci_acpi_root_release_info, | ||
314 | .prepare_resources = pci_acpi_root_prepare_resources, | ||
315 | }; | ||
316 | |||
390 | struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) | 317 | struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) |
391 | { | 318 | { |
392 | struct acpi_device *device = root->device; | ||
393 | struct pci_root_info *info; | ||
394 | int domain = root->segment; | 319 | int domain = root->segment; |
395 | int busnum = root->secondary.start; | 320 | int busnum = root->secondary.start; |
396 | struct resource_entry *res_entry; | 321 | int node = pci_acpi_root_get_node(root); |
397 | LIST_HEAD(crs_res); | ||
398 | LIST_HEAD(resources); | ||
399 | struct pci_bus *bus; | 322 | struct pci_bus *bus; |
400 | struct pci_sysdata *sd; | ||
401 | int node; | ||
402 | 323 | ||
403 | if (pci_ignore_seg) | 324 | if (pci_ignore_seg) |
404 | domain = 0; | 325 | root->segment = domain = 0; |
405 | 326 | ||
406 | if (domain && !pci_domains_supported) { | 327 | if (domain && !pci_domains_supported) { |
407 | printk(KERN_WARNING "pci_bus %04x:%02x: " | 328 | printk(KERN_WARNING "pci_bus %04x:%02x: " |
@@ -410,71 +331,33 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) | |||
410 | return NULL; | 331 | return NULL; |
411 | } | 332 | } |
412 | 333 | ||
413 | node = acpi_get_node(device->handle); | ||
414 | if (node == NUMA_NO_NODE) { | ||
415 | node = x86_pci_root_bus_node(busnum); | ||
416 | if (node != 0 && node != NUMA_NO_NODE) | ||
417 | dev_info(&device->dev, FW_BUG "no _PXM; falling back to node %d from hardware (may be inconsistent with ACPI node numbers)\n", | ||
418 | node); | ||
419 | } | ||
420 | |||
421 | if (node != NUMA_NO_NODE && !node_online(node)) | ||
422 | node = NUMA_NO_NODE; | ||
423 | |||
424 | info = kzalloc_node(sizeof(*info), GFP_KERNEL, node); | ||
425 | if (!info) { | ||
426 | printk(KERN_WARNING "pci_bus %04x:%02x: " | ||
427 | "ignored (out of memory)\n", domain, busnum); | ||
428 | return NULL; | ||
429 | } | ||
430 | |||
431 | sd = &info->sd; | ||
432 | sd->domain = domain; | ||
433 | sd->node = node; | ||
434 | sd->companion = device; | ||
435 | |||
436 | bus = pci_find_bus(domain, busnum); | 334 | bus = pci_find_bus(domain, busnum); |
437 | if (bus) { | 335 | if (bus) { |
438 | /* | 336 | /* |
439 | * If the desired bus has been scanned already, replace | 337 | * If the desired bus has been scanned already, replace |
440 | * its bus->sysdata. | 338 | * its bus->sysdata. |
441 | */ | 339 | */ |
442 | memcpy(bus->sysdata, sd, sizeof(*sd)); | 340 | struct pci_sysdata sd = { |
443 | kfree(info); | 341 | .domain = domain, |
444 | } else { | 342 | .node = node, |
445 | /* insert busn res at first */ | 343 | .companion = root->device |
446 | pci_add_resource(&resources, &root->secondary); | 344 | }; |
447 | 345 | ||
448 | /* | 346 | memcpy(bus->sysdata, &sd, sizeof(sd)); |
449 | * _CRS with no apertures is normal, so only fall back to | 347 | } else { |
450 | * defaults or native bridge info if we're ignoring _CRS. | 348 | struct pci_root_info *info; |
451 | */ | 349 | |
452 | probe_pci_root_info(info, device, busnum, domain, &crs_res); | 350 | info = kzalloc_node(sizeof(*info), GFP_KERNEL, node); |
453 | if (pci_use_crs) { | 351 | if (!info) |
454 | add_resources(info, &resources, &crs_res); | 352 | dev_err(&root->device->dev, |
455 | } else { | 353 | "pci_bus %04x:%02x: ignored (out of memory)\n", |
456 | resource_list_for_each_entry(res_entry, &crs_res) | 354 | domain, busnum); |
457 | dev_printk(KERN_DEBUG, &device->dev, | 355 | else { |
458 | "host bridge window %pR (ignored)\n", | 356 | info->sd.domain = domain; |
459 | res_entry->res); | 357 | info->sd.node = node; |
460 | resource_list_free(&crs_res); | 358 | info->sd.companion = root->device; |
461 | x86_pci_root_bus_resources(busnum, &resources); | 359 | bus = acpi_pci_root_create(root, &acpi_pci_root_ops, |
462 | } | 360 | &info->common, &info->sd); |
463 | |||
464 | if (!setup_mcfg_map(info, domain, (u8)root->secondary.start, | ||
465 | (u8)root->secondary.end, root->mcfg_addr)) | ||
466 | bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, | ||
467 | sd, &resources); | ||
468 | |||
469 | if (bus) { | ||
470 | pci_scan_child_bus(bus); | ||
471 | pci_set_host_bridge_release( | ||
472 | to_pci_host_bridge(bus->bridge), | ||
473 | release_pci_root_info, info); | ||
474 | } else { | ||
475 | resource_list_free(&resources); | ||
476 | teardown_mcfg_map(info); | ||
477 | kfree(info); | ||
478 | } | 361 | } |
479 | } | 362 | } |
480 | 363 | ||
@@ -487,9 +370,6 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) | |||
487 | pcie_bus_configure_settings(child); | 370 | pcie_bus_configure_settings(child); |
488 | } | 371 | } |
489 | 372 | ||
490 | if (bus && node != NUMA_NO_NODE) | ||
491 | dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node); | ||
492 | |||
493 | return bus; | 373 | return bus; |
494 | } | 374 | } |
495 | 375 | ||
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index 7bcf06a7cd12..6eb3c8af96e2 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c | |||
@@ -50,18 +50,9 @@ void x86_pci_root_bus_resources(int bus, struct list_head *resources) | |||
50 | if (!found) | 50 | if (!found) |
51 | pci_add_resource(resources, &info->busn); | 51 | pci_add_resource(resources, &info->busn); |
52 | 52 | ||
53 | list_for_each_entry(root_res, &info->resources, list) { | 53 | list_for_each_entry(root_res, &info->resources, list) |
54 | struct resource *res; | 54 | pci_add_resource(resources, &root_res->res); |
55 | struct resource *root; | ||
56 | 55 | ||
57 | res = &root_res->res; | ||
58 | pci_add_resource(resources, res); | ||
59 | if (res->flags & IORESOURCE_IO) | ||
60 | root = &ioport_resource; | ||
61 | else | ||
62 | root = &iomem_resource; | ||
63 | insert_resource(root, res); | ||
64 | } | ||
65 | return; | 56 | return; |
66 | 57 | ||
67 | default_resources: | 58 | default_resources: |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index dc78a4a9a466..eccd4d99e6a4 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -675,6 +675,14 @@ int pcibios_add_device(struct pci_dev *dev) | |||
675 | 675 | ||
676 | int pcibios_alloc_irq(struct pci_dev *dev) | 676 | int pcibios_alloc_irq(struct pci_dev *dev) |
677 | { | 677 | { |
678 | /* | ||
679 | * If the PCI device was already claimed by core code and has | ||
680 | * MSI enabled, probing of the pcibios IRQ will overwrite | ||
681 | * dev->irq. So bail out if MSI is already enabled. | ||
682 | */ | ||
683 | if (pci_dev_msi_enabled(dev)) | ||
684 | return -EBUSY; | ||
685 | |||
678 | return pcibios_enable_irq(dev); | 686 | return pcibios_enable_irq(dev); |
679 | } | 687 | } |
680 | 688 | ||
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 5b662c0faf8c..ea6f3802c17b 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c | |||
@@ -54,7 +54,7 @@ void pcibios_scan_specific_bus(int busn) | |||
54 | } | 54 | } |
55 | EXPORT_SYMBOL_GPL(pcibios_scan_specific_bus); | 55 | EXPORT_SYMBOL_GPL(pcibios_scan_specific_bus); |
56 | 56 | ||
57 | int __init pci_subsys_init(void) | 57 | static int __init pci_subsys_init(void) |
58 | { | 58 | { |
59 | /* | 59 | /* |
60 | * The init function returns an non zero value when | 60 | * The init function returns an non zero value when |
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 06934a8a4872..e5f854ce2d72 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c | |||
@@ -211,7 +211,7 @@ static int copy_sc_from_user(struct pt_regs *regs, | |||
211 | if (err) | 211 | if (err) |
212 | return 1; | 212 | return 1; |
213 | 213 | ||
214 | err = convert_fxsr_from_user(&fpx, sc.fpstate); | 214 | err = convert_fxsr_from_user(&fpx, (void *)sc.fpstate); |
215 | if (err) | 215 | if (err) |
216 | return 1; | 216 | return 1; |
217 | 217 | ||
@@ -227,7 +227,7 @@ static int copy_sc_from_user(struct pt_regs *regs, | |||
227 | { | 227 | { |
228 | struct user_i387_struct fp; | 228 | struct user_i387_struct fp; |
229 | 229 | ||
230 | err = copy_from_user(&fp, sc.fpstate, | 230 | err = copy_from_user(&fp, (void *)sc.fpstate, |
231 | sizeof(struct user_i387_struct)); | 231 | sizeof(struct user_i387_struct)); |
232 | if (err) | 232 | if (err) |
233 | return 1; | 233 | return 1; |
@@ -291,7 +291,7 @@ static int copy_sc_to_user(struct sigcontext __user *to, | |||
291 | #endif | 291 | #endif |
292 | #undef PUTREG | 292 | #undef PUTREG |
293 | sc.oldmask = mask; | 293 | sc.oldmask = mask; |
294 | sc.fpstate = to_fp; | 294 | sc.fpstate = (unsigned long)to_fp; |
295 | 295 | ||
296 | err = copy_to_user(to, &sc, sizeof(struct sigcontext)); | 296 | err = copy_to_user(to, &sc, sizeof(struct sigcontext)); |
297 | if (err) | 297 | if (err) |
@@ -468,12 +468,10 @@ long sys_sigreturn(void) | |||
468 | struct sigframe __user *frame = (struct sigframe __user *)(sp - 8); | 468 | struct sigframe __user *frame = (struct sigframe __user *)(sp - 8); |
469 | sigset_t set; | 469 | sigset_t set; |
470 | struct sigcontext __user *sc = &frame->sc; | 470 | struct sigcontext __user *sc = &frame->sc; |
471 | unsigned long __user *oldmask = &sc->oldmask; | ||
472 | unsigned long __user *extramask = frame->extramask; | ||
473 | int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); | 471 | int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); |
474 | 472 | ||
475 | if (copy_from_user(&set.sig[0], oldmask, sizeof(set.sig[0])) || | 473 | if (copy_from_user(&set.sig[0], (void *)sc->oldmask, sizeof(set.sig[0])) || |
476 | copy_from_user(&set.sig[1], extramask, sig_size)) | 474 | copy_from_user(&set.sig[1], frame->extramask, sig_size)) |
477 | goto segfault; | 475 | goto segfault; |
478 | 476 | ||
479 | set_current_blocked(&set); | 477 | set_current_blocked(&set); |
@@ -505,6 +503,7 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, | |||
505 | { | 503 | { |
506 | struct rt_sigframe __user *frame; | 504 | struct rt_sigframe __user *frame; |
507 | int err = 0, sig = ksig->sig; | 505 | int err = 0, sig = ksig->sig; |
506 | unsigned long fp_to; | ||
508 | 507 | ||
509 | frame = (struct rt_sigframe __user *) | 508 | frame = (struct rt_sigframe __user *) |
510 | round_down(stack_top - sizeof(struct rt_sigframe), 16); | 509 | round_down(stack_top - sizeof(struct rt_sigframe), 16); |
@@ -526,7 +525,10 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, | |||
526 | err |= __save_altstack(&frame->uc.uc_stack, PT_REGS_SP(regs)); | 525 | err |= __save_altstack(&frame->uc.uc_stack, PT_REGS_SP(regs)); |
527 | err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs, | 526 | err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs, |
528 | set->sig[0]); | 527 | set->sig[0]); |
529 | err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate); | 528 | |
529 | fp_to = (unsigned long)&frame->fpstate; | ||
530 | |||
531 | err |= __put_user(fp_to, &frame->uc.uc_mcontext.fpstate); | ||
530 | if (sizeof(*set) == 16) { | 532 | if (sizeof(*set) == 16) { |
531 | err |= __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); | 533 | err |= __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); |
532 | err |= __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); | 534 | err |= __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); |
diff --git a/arch/x86/um/stub_32.S b/arch/x86/um/stub_32.S index b972649d3a18..98816804e131 100644 --- a/arch/x86/um/stub_32.S +++ b/arch/x86/um/stub_32.S | |||
@@ -1,6 +1,5 @@ | |||
1 | #include <as-layout.h> | 1 | #include <as-layout.h> |
2 | 2 | ||
3 | .globl syscall_stub | ||
4 | .section .__syscall_stub, "ax" | 3 | .section .__syscall_stub, "ax" |
5 | 4 | ||
6 | .globl batch_syscall_stub | 5 | .globl batch_syscall_stub |
diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S index 7160b20172d0..ba914b3b8cc4 100644 --- a/arch/x86/um/stub_64.S +++ b/arch/x86/um/stub_64.S | |||
@@ -1,25 +1,9 @@ | |||
1 | #include <as-layout.h> | 1 | #include <as-layout.h> |
2 | 2 | ||
3 | .globl syscall_stub | ||
4 | .section .__syscall_stub, "ax" | 3 | .section .__syscall_stub, "ax" |
5 | syscall_stub: | ||
6 | syscall | ||
7 | /* We don't have 64-bit constants, so this constructs the address | ||
8 | * we need. | ||
9 | */ | ||
10 | movq $(STUB_DATA >> 32), %rbx | ||
11 | salq $32, %rbx | ||
12 | movq $(STUB_DATA & 0xffffffff), %rcx | ||
13 | or %rcx, %rbx | ||
14 | movq %rax, (%rbx) | ||
15 | int3 | ||
16 | |||
17 | .globl batch_syscall_stub | 4 | .globl batch_syscall_stub |
18 | batch_syscall_stub: | 5 | batch_syscall_stub: |
19 | mov $(STUB_DATA >> 32), %rbx | 6 | mov $(STUB_DATA), %rbx |
20 | sal $32, %rbx | ||
21 | mov $(STUB_DATA & 0xffffffff), %rax | ||
22 | or %rax, %rbx | ||
23 | /* load pointer to first operation */ | 7 | /* load pointer to first operation */ |
24 | mov %rbx, %rsp | 8 | mov %rbx, %rsp |
25 | add $0x10, %rsp | 9 | add $0x10, %rsp |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2745e8ae93f3..4334e511cfc8 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -75,6 +75,7 @@ | |||
75 | #include <asm/mwait.h> | 75 | #include <asm/mwait.h> |
76 | #include <asm/pci_x86.h> | 76 | #include <asm/pci_x86.h> |
77 | #include <asm/pat.h> | 77 | #include <asm/pat.h> |
78 | #include <asm/cpu.h> | ||
78 | 79 | ||
79 | #ifdef CONFIG_ACPI | 80 | #ifdef CONFIG_ACPI |
80 | #include <linux/acpi.h> | 81 | #include <linux/acpi.h> |
@@ -1892,3 +1893,17 @@ const struct hypervisor_x86 x86_hyper_xen = { | |||
1892 | .set_cpu_features = xen_set_cpu_features, | 1893 | .set_cpu_features = xen_set_cpu_features, |
1893 | }; | 1894 | }; |
1894 | EXPORT_SYMBOL(x86_hyper_xen); | 1895 | EXPORT_SYMBOL(x86_hyper_xen); |
1896 | |||
1897 | #ifdef CONFIG_HOTPLUG_CPU | ||
1898 | void xen_arch_register_cpu(int num) | ||
1899 | { | ||
1900 | arch_register_cpu(num); | ||
1901 | } | ||
1902 | EXPORT_SYMBOL(xen_arch_register_cpu); | ||
1903 | |||
1904 | void xen_arch_unregister_cpu(int num) | ||
1905 | { | ||
1906 | arch_unregister_cpu(num); | ||
1907 | } | ||
1908 | EXPORT_SYMBOL(xen_arch_unregister_cpu); | ||
1909 | #endif | ||
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 1580e7a5a4cf..e079500b17f3 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c | |||
@@ -133,7 +133,7 @@ static int __init xlated_setup_gnttab_pages(void) | |||
133 | kfree(pages); | 133 | kfree(pages); |
134 | return -ENOMEM; | 134 | return -ENOMEM; |
135 | } | 135 | } |
136 | rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */); | 136 | rc = alloc_xenballooned_pages(nr_grant_frames, pages); |
137 | if (rc) { | 137 | if (rc) { |
138 | pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__, | 138 | pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__, |
139 | nr_grant_frames, rc); | 139 | nr_grant_frames, rc); |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 41ee3e25fcce..c913ca4f6958 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -2494,14 +2494,9 @@ void __init xen_init_mmu_ops(void) | |||
2494 | { | 2494 | { |
2495 | x86_init.paging.pagetable_init = xen_pagetable_init; | 2495 | x86_init.paging.pagetable_init = xen_pagetable_init; |
2496 | 2496 | ||
2497 | /* Optimization - we can use the HVM one but it has no idea which | 2497 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
2498 | * VCPUs are descheduled - which means that it will needlessly IPI | ||
2499 | * them. Xen knows so let it do the job. | ||
2500 | */ | ||
2501 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
2502 | pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others; | ||
2503 | return; | 2498 | return; |
2504 | } | 2499 | |
2505 | pv_mmu_ops = xen_mmu_ops; | 2500 | pv_mmu_ops = xen_mmu_ops; |
2506 | 2501 | ||
2507 | memset(dummy_mapping, 0xff, PAGE_SIZE); | 2502 | memset(dummy_mapping, 0xff, PAGE_SIZE); |
@@ -2887,6 +2882,7 @@ static int do_remap_gfn(struct vm_area_struct *vma, | |||
2887 | addr += range; | 2882 | addr += range; |
2888 | if (err_ptr) | 2883 | if (err_ptr) |
2889 | err_ptr += batch; | 2884 | err_ptr += batch; |
2885 | cond_resched(); | ||
2890 | } | 2886 | } |
2891 | out: | 2887 | out: |
2892 | 2888 | ||
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 660b3cfef234..cab9f766bb06 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -530,7 +530,7 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) | |||
530 | * the new pages are installed with cmpxchg; if we lose the race then | 530 | * the new pages are installed with cmpxchg; if we lose the race then |
531 | * simply free the page we allocated and use the one that's there. | 531 | * simply free the page we allocated and use the one that's there. |
532 | */ | 532 | */ |
533 | static bool alloc_p2m(unsigned long pfn) | 533 | int xen_alloc_p2m_entry(unsigned long pfn) |
534 | { | 534 | { |
535 | unsigned topidx; | 535 | unsigned topidx; |
536 | unsigned long *top_mfn_p, *mid_mfn; | 536 | unsigned long *top_mfn_p, *mid_mfn; |
@@ -540,6 +540,9 @@ static bool alloc_p2m(unsigned long pfn) | |||
540 | unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); | 540 | unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); |
541 | unsigned long p2m_pfn; | 541 | unsigned long p2m_pfn; |
542 | 542 | ||
543 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
544 | return 0; | ||
545 | |||
543 | ptep = lookup_address(addr, &level); | 546 | ptep = lookup_address(addr, &level); |
544 | BUG_ON(!ptep || level != PG_LEVEL_4K); | 547 | BUG_ON(!ptep || level != PG_LEVEL_4K); |
545 | pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | 548 | pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); |
@@ -548,7 +551,7 @@ static bool alloc_p2m(unsigned long pfn) | |||
548 | /* PMD level is missing, allocate a new one */ | 551 | /* PMD level is missing, allocate a new one */ |
549 | ptep = alloc_p2m_pmd(addr, pte_pg); | 552 | ptep = alloc_p2m_pmd(addr, pte_pg); |
550 | if (!ptep) | 553 | if (!ptep) |
551 | return false; | 554 | return -ENOMEM; |
552 | } | 555 | } |
553 | 556 | ||
554 | if (p2m_top_mfn && pfn < MAX_P2M_PFN) { | 557 | if (p2m_top_mfn && pfn < MAX_P2M_PFN) { |
@@ -566,7 +569,7 @@ static bool alloc_p2m(unsigned long pfn) | |||
566 | 569 | ||
567 | mid_mfn = alloc_p2m_page(); | 570 | mid_mfn = alloc_p2m_page(); |
568 | if (!mid_mfn) | 571 | if (!mid_mfn) |
569 | return false; | 572 | return -ENOMEM; |
570 | 573 | ||
571 | p2m_mid_mfn_init(mid_mfn, p2m_missing); | 574 | p2m_mid_mfn_init(mid_mfn, p2m_missing); |
572 | 575 | ||
@@ -592,7 +595,7 @@ static bool alloc_p2m(unsigned long pfn) | |||
592 | 595 | ||
593 | p2m = alloc_p2m_page(); | 596 | p2m = alloc_p2m_page(); |
594 | if (!p2m) | 597 | if (!p2m) |
595 | return false; | 598 | return -ENOMEM; |
596 | 599 | ||
597 | if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) | 600 | if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) |
598 | p2m_init(p2m); | 601 | p2m_init(p2m); |
@@ -625,8 +628,9 @@ static bool alloc_p2m(unsigned long pfn) | |||
625 | HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; | 628 | HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; |
626 | } | 629 | } |
627 | 630 | ||
628 | return true; | 631 | return 0; |
629 | } | 632 | } |
633 | EXPORT_SYMBOL(xen_alloc_p2m_entry); | ||
630 | 634 | ||
631 | unsigned long __init set_phys_range_identity(unsigned long pfn_s, | 635 | unsigned long __init set_phys_range_identity(unsigned long pfn_s, |
632 | unsigned long pfn_e) | 636 | unsigned long pfn_e) |
@@ -688,7 +692,10 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |||
688 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | 692 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
689 | { | 693 | { |
690 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | 694 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { |
691 | if (!alloc_p2m(pfn)) | 695 | int ret; |
696 | |||
697 | ret = xen_alloc_p2m_entry(pfn); | ||
698 | if (ret < 0) | ||
692 | return false; | 699 | return false; |
693 | 700 | ||
694 | return __set_phys_to_machine(pfn, mfn); | 701 | return __set_phys_to_machine(pfn, mfn); |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 63320b6d35bc..7ab29518a3b9 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -212,7 +212,7 @@ static unsigned long __init xen_find_pfn_range(unsigned long *min_pfn) | |||
212 | e_pfn = PFN_DOWN(entry->addr + entry->size); | 212 | e_pfn = PFN_DOWN(entry->addr + entry->size); |
213 | 213 | ||
214 | /* We only care about E820 after this */ | 214 | /* We only care about E820 after this */ |
215 | if (e_pfn < *min_pfn) | 215 | if (e_pfn <= *min_pfn) |
216 | continue; | 216 | continue; |
217 | 217 | ||
218 | s_pfn = PFN_UP(entry->addr); | 218 | s_pfn = PFN_UP(entry->addr); |
@@ -829,6 +829,8 @@ char * __init xen_memory_setup(void) | |||
829 | addr = xen_e820_map[0].addr; | 829 | addr = xen_e820_map[0].addr; |
830 | size = xen_e820_map[0].size; | 830 | size = xen_e820_map[0].size; |
831 | while (i < xen_e820_map_entries) { | 831 | while (i < xen_e820_map_entries) { |
832 | bool discard = false; | ||
833 | |||
832 | chunk_size = size; | 834 | chunk_size = size; |
833 | type = xen_e820_map[i].type; | 835 | type = xen_e820_map[i].type; |
834 | 836 | ||
@@ -843,10 +845,11 @@ char * __init xen_memory_setup(void) | |||
843 | xen_add_extra_mem(pfn_s, n_pfns); | 845 | xen_add_extra_mem(pfn_s, n_pfns); |
844 | xen_max_p2m_pfn = pfn_s + n_pfns; | 846 | xen_max_p2m_pfn = pfn_s + n_pfns; |
845 | } else | 847 | } else |
846 | type = E820_UNUSABLE; | 848 | discard = true; |
847 | } | 849 | } |
848 | 850 | ||
849 | xen_align_and_add_e820_region(addr, chunk_size, type); | 851 | if (!discard) |
852 | xen_align_and_add_e820_region(addr, chunk_size, type); | ||
850 | 853 | ||
851 | addr += chunk_size; | 854 | addr += chunk_size; |
852 | size -= chunk_size; | 855 | size -= chunk_size; |
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index feddabdab448..3705eabd7e22 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -68,26 +68,16 @@ static void xen_pv_post_suspend(int suspend_cancelled) | |||
68 | 68 | ||
69 | void xen_arch_pre_suspend(void) | 69 | void xen_arch_pre_suspend(void) |
70 | { | 70 | { |
71 | int cpu; | ||
72 | |||
73 | for_each_online_cpu(cpu) | ||
74 | xen_pmu_finish(cpu); | ||
75 | |||
76 | if (xen_pv_domain()) | 71 | if (xen_pv_domain()) |
77 | xen_pv_pre_suspend(); | 72 | xen_pv_pre_suspend(); |
78 | } | 73 | } |
79 | 74 | ||
80 | void xen_arch_post_suspend(int cancelled) | 75 | void xen_arch_post_suspend(int cancelled) |
81 | { | 76 | { |
82 | int cpu; | ||
83 | |||
84 | if (xen_pv_domain()) | 77 | if (xen_pv_domain()) |
85 | xen_pv_post_suspend(cancelled); | 78 | xen_pv_post_suspend(cancelled); |
86 | else | 79 | else |
87 | xen_hvm_post_suspend(cancelled); | 80 | xen_hvm_post_suspend(cancelled); |
88 | |||
89 | for_each_online_cpu(cpu) | ||
90 | xen_pmu_init(cpu); | ||
91 | } | 81 | } |
92 | 82 | ||
93 | static void xen_vcpu_notify_restore(void *data) | 83 | static void xen_vcpu_notify_restore(void *data) |
@@ -106,10 +96,20 @@ static void xen_vcpu_notify_suspend(void *data) | |||
106 | 96 | ||
107 | void xen_arch_resume(void) | 97 | void xen_arch_resume(void) |
108 | { | 98 | { |
99 | int cpu; | ||
100 | |||
109 | on_each_cpu(xen_vcpu_notify_restore, NULL, 1); | 101 | on_each_cpu(xen_vcpu_notify_restore, NULL, 1); |
102 | |||
103 | for_each_online_cpu(cpu) | ||
104 | xen_pmu_init(cpu); | ||
110 | } | 105 | } |
111 | 106 | ||
112 | void xen_arch_suspend(void) | 107 | void xen_arch_suspend(void) |
113 | { | 108 | { |
109 | int cpu; | ||
110 | |||
111 | for_each_online_cpu(cpu) | ||
112 | xen_pmu_finish(cpu); | ||
113 | |||
114 | on_each_cpu(xen_vcpu_notify_suspend, NULL, 1); | 114 | on_each_cpu(xen_vcpu_notify_suspend, NULL, 1); |
115 | } | 115 | } |