diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-12-09 02:24:25 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-12-09 02:24:57 -0500 |
commit | 4c68db38c85188824b21698842b42a62b4f78657 (patch) | |
tree | 3ee1c3b22af6713adf669a3bb452ce82bc7fe495 /arch/x86 | |
parent | 5c0e9f28da84c68ce0ae68b7a75faaf862e156e2 (diff) | |
parent | 2b876f95d03e226394b5d360c86127cbefaf614b (diff) |
Merge branch 'linus' into x86/urgent
Merge reason: We want to queue up a dependent patch.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
103 files changed, 3058 insertions, 1442 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b3cfd2433c34..32a1918e1b88 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -51,6 +51,7 @@ config X86 | |||
51 | select HAVE_KERNEL_LZMA | 51 | select HAVE_KERNEL_LZMA |
52 | select HAVE_HW_BREAKPOINT | 52 | select HAVE_HW_BREAKPOINT |
53 | select HAVE_ARCH_KMEMCHECK | 53 | select HAVE_ARCH_KMEMCHECK |
54 | select HAVE_USER_RETURN_NOTIFIER | ||
54 | 55 | ||
55 | config OUTPUT_FORMAT | 56 | config OUTPUT_FORMAT |
56 | string | 57 | string |
@@ -1331,7 +1332,9 @@ config MATH_EMULATION | |||
1331 | kernel, it won't hurt. | 1332 | kernel, it won't hurt. |
1332 | 1333 | ||
1333 | config MTRR | 1334 | config MTRR |
1334 | bool "MTRR (Memory Type Range Register) support" | 1335 | bool |
1336 | default y | ||
1337 | prompt "MTRR (Memory Type Range Register) support" if EMBEDDED | ||
1335 | ---help--- | 1338 | ---help--- |
1336 | On Intel P6 family processors (Pentium Pro, Pentium II and later) | 1339 | On Intel P6 family processors (Pentium Pro, Pentium II and later) |
1337 | the Memory Type Range Registers (MTRRs) may be used to control | 1340 | the Memory Type Range Registers (MTRRs) may be used to control |
@@ -1397,7 +1400,8 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT | |||
1397 | 1400 | ||
1398 | config X86_PAT | 1401 | config X86_PAT |
1399 | bool | 1402 | bool |
1400 | prompt "x86 PAT support" | 1403 | default y |
1404 | prompt "x86 PAT support" if EMBEDDED | ||
1401 | depends on MTRR | 1405 | depends on MTRR |
1402 | ---help--- | 1406 | ---help--- |
1403 | Use PAT attributes to setup page level cache control. | 1407 | Use PAT attributes to setup page level cache control. |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 5e99762eb5c2..08e442bc3ab9 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -301,15 +301,11 @@ config X86_CPU | |||
301 | 301 | ||
302 | # | 302 | # |
303 | # Define implied options from the CPU selection here | 303 | # Define implied options from the CPU selection here |
304 | config X86_L1_CACHE_BYTES | 304 | config X86_INTERNODE_CACHE_SHIFT |
305 | int | 305 | int |
306 | default "128" if MPSC | 306 | default "12" if X86_VSMP |
307 | default "64" if GENERIC_CPU || MK8 || MCORE2 || MATOM || X86_32 | 307 | default "7" if NUMA |
308 | 308 | default X86_L1_CACHE_SHIFT | |
309 | config X86_INTERNODE_CACHE_BYTES | ||
310 | int | ||
311 | default "4096" if X86_VSMP | ||
312 | default X86_L1_CACHE_BYTES if !X86_VSMP | ||
313 | 309 | ||
314 | config X86_CMPXCHG | 310 | config X86_CMPXCHG |
315 | def_bool X86_64 || (X86_32 && !M386) | 311 | def_bool X86_64 || (X86_32 && !M386) |
@@ -317,9 +313,9 @@ config X86_CMPXCHG | |||
317 | config X86_L1_CACHE_SHIFT | 313 | config X86_L1_CACHE_SHIFT |
318 | int | 314 | int |
319 | default "7" if MPENTIUM4 || MPSC | 315 | default "7" if MPENTIUM4 || MPSC |
316 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU | ||
320 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 | 317 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 |
321 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX | 318 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX |
322 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU | ||
323 | 319 | ||
324 | config X86_XADD | 320 | config X86_XADD |
325 | def_bool y | 321 | def_bool y |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 077e1b69198e..faff0dc9c06a 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -107,8 +107,7 @@ ENTRY(startup_32) | |||
107 | lgdt gdt(%ebp) | 107 | lgdt gdt(%ebp) |
108 | 108 | ||
109 | /* Enable PAE mode */ | 109 | /* Enable PAE mode */ |
110 | xorl %eax, %eax | 110 | movl $(X86_CR4_PAE), %eax |
111 | orl $(X86_CR4_PAE), %eax | ||
112 | movl %eax, %cr4 | 111 | movl %eax, %cr4 |
113 | 112 | ||
114 | /* | 113 | /* |
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index f4193bb48782..a6f1a59a5b0c 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S | |||
@@ -4,6 +4,7 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) | |||
4 | 4 | ||
5 | #undef i386 | 5 | #undef i386 |
6 | 6 | ||
7 | #include <asm/cache.h> | ||
7 | #include <asm/page_types.h> | 8 | #include <asm/page_types.h> |
8 | 9 | ||
9 | #ifdef CONFIG_X86_64 | 10 | #ifdef CONFIG_X86_64 |
@@ -46,7 +47,7 @@ SECTIONS | |||
46 | *(.data.*) | 47 | *(.data.*) |
47 | _edata = . ; | 48 | _edata = . ; |
48 | } | 49 | } |
49 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 50 | . = ALIGN(L1_CACHE_BYTES); |
50 | .bss : { | 51 | .bss : { |
51 | _bss = . ; | 52 | _bss = . ; |
52 | *(.bss) | 53 | *(.bss) |
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index d42da3802499..f767164cd5df 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c | |||
@@ -27,6 +27,12 @@ static void store_cursor_position(void) | |||
27 | 27 | ||
28 | boot_params.screen_info.orig_x = oreg.dl; | 28 | boot_params.screen_info.orig_x = oreg.dl; |
29 | boot_params.screen_info.orig_y = oreg.dh; | 29 | boot_params.screen_info.orig_y = oreg.dh; |
30 | |||
31 | if (oreg.ch & 0x20) | ||
32 | boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR; | ||
33 | |||
34 | if ((oreg.ch & 0x1f) > (oreg.cl & 0x1f)) | ||
35 | boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR; | ||
30 | } | 36 | } |
31 | 37 | ||
32 | static void store_video_mode(void) | 38 | static void store_video_mode(void) |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index cfb0010fa940..1a58ad89fdf7 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o | |||
12 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o | 12 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o |
13 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o | 13 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o |
14 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o | 14 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o |
15 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o | ||
15 | 16 | ||
16 | obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o | 17 | obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o |
17 | 18 | ||
@@ -24,3 +25,5 @@ twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o | |||
24 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o | 25 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o |
25 | 26 | ||
26 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o | 27 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o |
28 | |||
29 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | ||
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index eb0566e83319..20bb0e1ac681 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -16,6 +16,7 @@ | |||
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include <linux/linkage.h> | 18 | #include <linux/linkage.h> |
19 | #include <asm/inst.h> | ||
19 | 20 | ||
20 | .text | 21 | .text |
21 | 22 | ||
@@ -122,103 +123,72 @@ ENTRY(aesni_set_key) | |||
122 | movups 0x10(%rsi), %xmm2 # other user key | 123 | movups 0x10(%rsi), %xmm2 # other user key |
123 | movaps %xmm2, (%rcx) | 124 | movaps %xmm2, (%rcx) |
124 | add $0x10, %rcx | 125 | add $0x10, %rcx |
125 | # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 | 126 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
126 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 | ||
127 | call _key_expansion_256a | 127 | call _key_expansion_256a |
128 | # aeskeygenassist $0x1, %xmm0, %xmm1 | 128 | AESKEYGENASSIST 0x1 %xmm0 %xmm1 |
129 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 | ||
130 | call _key_expansion_256b | 129 | call _key_expansion_256b |
131 | # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 | 130 | AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 |
132 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 | ||
133 | call _key_expansion_256a | 131 | call _key_expansion_256a |
134 | # aeskeygenassist $0x2, %xmm0, %xmm1 | 132 | AESKEYGENASSIST 0x2 %xmm0 %xmm1 |
135 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 | ||
136 | call _key_expansion_256b | 133 | call _key_expansion_256b |
137 | # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 | 134 | AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 |
138 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 | ||
139 | call _key_expansion_256a | 135 | call _key_expansion_256a |
140 | # aeskeygenassist $0x4, %xmm0, %xmm1 | 136 | AESKEYGENASSIST 0x4 %xmm0 %xmm1 |
141 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 | ||
142 | call _key_expansion_256b | 137 | call _key_expansion_256b |
143 | # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 | 138 | AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 |
144 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 | ||
145 | call _key_expansion_256a | 139 | call _key_expansion_256a |
146 | # aeskeygenassist $0x8, %xmm0, %xmm1 | 140 | AESKEYGENASSIST 0x8 %xmm0 %xmm1 |
147 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 | ||
148 | call _key_expansion_256b | 141 | call _key_expansion_256b |
149 | # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 | 142 | AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 |
150 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 | ||
151 | call _key_expansion_256a | 143 | call _key_expansion_256a |
152 | # aeskeygenassist $0x10, %xmm0, %xmm1 | 144 | AESKEYGENASSIST 0x10 %xmm0 %xmm1 |
153 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 | ||
154 | call _key_expansion_256b | 145 | call _key_expansion_256b |
155 | # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 | 146 | AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 |
156 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 | ||
157 | call _key_expansion_256a | 147 | call _key_expansion_256a |
158 | # aeskeygenassist $0x20, %xmm0, %xmm1 | 148 | AESKEYGENASSIST 0x20 %xmm0 %xmm1 |
159 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 | ||
160 | call _key_expansion_256b | 149 | call _key_expansion_256b |
161 | # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 | 150 | AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 |
162 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 | ||
163 | call _key_expansion_256a | 151 | call _key_expansion_256a |
164 | jmp .Ldec_key | 152 | jmp .Ldec_key |
165 | .Lenc_key192: | 153 | .Lenc_key192: |
166 | movq 0x10(%rsi), %xmm2 # other user key | 154 | movq 0x10(%rsi), %xmm2 # other user key |
167 | # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 | 155 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
168 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 | ||
169 | call _key_expansion_192a | 156 | call _key_expansion_192a |
170 | # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 | 157 | AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 |
171 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 | ||
172 | call _key_expansion_192b | 158 | call _key_expansion_192b |
173 | # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 | 159 | AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 |
174 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 | ||
175 | call _key_expansion_192a | 160 | call _key_expansion_192a |
176 | # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 | 161 | AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 |
177 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 | ||
178 | call _key_expansion_192b | 162 | call _key_expansion_192b |
179 | # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 | 163 | AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 |
180 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 | ||
181 | call _key_expansion_192a | 164 | call _key_expansion_192a |
182 | # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 | 165 | AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 |
183 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 | ||
184 | call _key_expansion_192b | 166 | call _key_expansion_192b |
185 | # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 | 167 | AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 |
186 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 | ||
187 | call _key_expansion_192a | 168 | call _key_expansion_192a |
188 | # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8 | 169 | AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8 |
189 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80 | ||
190 | call _key_expansion_192b | 170 | call _key_expansion_192b |
191 | jmp .Ldec_key | 171 | jmp .Ldec_key |
192 | .Lenc_key128: | 172 | .Lenc_key128: |
193 | # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1 | 173 | AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1 |
194 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 | ||
195 | call _key_expansion_128 | 174 | call _key_expansion_128 |
196 | # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2 | 175 | AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2 |
197 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 | ||
198 | call _key_expansion_128 | 176 | call _key_expansion_128 |
199 | # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3 | 177 | AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3 |
200 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 | ||
201 | call _key_expansion_128 | 178 | call _key_expansion_128 |
202 | # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4 | 179 | AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4 |
203 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 | ||
204 | call _key_expansion_128 | 180 | call _key_expansion_128 |
205 | # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5 | 181 | AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5 |
206 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 | ||
207 | call _key_expansion_128 | 182 | call _key_expansion_128 |
208 | # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6 | 183 | AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6 |
209 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 | ||
210 | call _key_expansion_128 | 184 | call _key_expansion_128 |
211 | # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7 | 185 | AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7 |
212 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40 | ||
213 | call _key_expansion_128 | 186 | call _key_expansion_128 |
214 | # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8 | 187 | AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8 |
215 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80 | ||
216 | call _key_expansion_128 | 188 | call _key_expansion_128 |
217 | # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9 | 189 | AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9 |
218 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b | ||
219 | call _key_expansion_128 | 190 | call _key_expansion_128 |
220 | # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10 | 191 | AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 |
221 | .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36 | ||
222 | call _key_expansion_128 | 192 | call _key_expansion_128 |
223 | .Ldec_key: | 193 | .Ldec_key: |
224 | sub $0x10, %rcx | 194 | sub $0x10, %rcx |
@@ -231,8 +201,7 @@ ENTRY(aesni_set_key) | |||
231 | .align 4 | 201 | .align 4 |
232 | .Ldec_key_loop: | 202 | .Ldec_key_loop: |
233 | movaps (%rdi), %xmm0 | 203 | movaps (%rdi), %xmm0 |
234 | # aesimc %xmm0, %xmm1 | 204 | AESIMC %xmm0 %xmm1 |
235 | .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8 | ||
236 | movaps %xmm1, (%rsi) | 205 | movaps %xmm1, (%rsi) |
237 | add $0x10, %rdi | 206 | add $0x10, %rdi |
238 | sub $0x10, %rsi | 207 | sub $0x10, %rsi |
@@ -274,51 +243,37 @@ _aesni_enc1: | |||
274 | je .Lenc192 | 243 | je .Lenc192 |
275 | add $0x20, TKEYP | 244 | add $0x20, TKEYP |
276 | movaps -0x60(TKEYP), KEY | 245 | movaps -0x60(TKEYP), KEY |
277 | # aesenc KEY, STATE | 246 | AESENC KEY STATE |
278 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
279 | movaps -0x50(TKEYP), KEY | 247 | movaps -0x50(TKEYP), KEY |
280 | # aesenc KEY, STATE | 248 | AESENC KEY STATE |
281 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
282 | .align 4 | 249 | .align 4 |
283 | .Lenc192: | 250 | .Lenc192: |
284 | movaps -0x40(TKEYP), KEY | 251 | movaps -0x40(TKEYP), KEY |
285 | # aesenc KEY, STATE | 252 | AESENC KEY STATE |
286 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
287 | movaps -0x30(TKEYP), KEY | 253 | movaps -0x30(TKEYP), KEY |
288 | # aesenc KEY, STATE | 254 | AESENC KEY STATE |
289 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
290 | .align 4 | 255 | .align 4 |
291 | .Lenc128: | 256 | .Lenc128: |
292 | movaps -0x20(TKEYP), KEY | 257 | movaps -0x20(TKEYP), KEY |
293 | # aesenc KEY, STATE | 258 | AESENC KEY STATE |
294 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
295 | movaps -0x10(TKEYP), KEY | 259 | movaps -0x10(TKEYP), KEY |
296 | # aesenc KEY, STATE | 260 | AESENC KEY STATE |
297 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
298 | movaps (TKEYP), KEY | 261 | movaps (TKEYP), KEY |
299 | # aesenc KEY, STATE | 262 | AESENC KEY STATE |
300 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
301 | movaps 0x10(TKEYP), KEY | 263 | movaps 0x10(TKEYP), KEY |
302 | # aesenc KEY, STATE | 264 | AESENC KEY STATE |
303 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
304 | movaps 0x20(TKEYP), KEY | 265 | movaps 0x20(TKEYP), KEY |
305 | # aesenc KEY, STATE | 266 | AESENC KEY STATE |
306 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
307 | movaps 0x30(TKEYP), KEY | 267 | movaps 0x30(TKEYP), KEY |
308 | # aesenc KEY, STATE | 268 | AESENC KEY STATE |
309 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
310 | movaps 0x40(TKEYP), KEY | 269 | movaps 0x40(TKEYP), KEY |
311 | # aesenc KEY, STATE | 270 | AESENC KEY STATE |
312 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
313 | movaps 0x50(TKEYP), KEY | 271 | movaps 0x50(TKEYP), KEY |
314 | # aesenc KEY, STATE | 272 | AESENC KEY STATE |
315 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
316 | movaps 0x60(TKEYP), KEY | 273 | movaps 0x60(TKEYP), KEY |
317 | # aesenc KEY, STATE | 274 | AESENC KEY STATE |
318 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | ||
319 | movaps 0x70(TKEYP), KEY | 275 | movaps 0x70(TKEYP), KEY |
320 | # aesenclast KEY, STATE # last round | 276 | AESENCLAST KEY STATE |
321 | .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 | ||
322 | ret | 277 | ret |
323 | 278 | ||
324 | /* | 279 | /* |
@@ -353,135 +308,79 @@ _aesni_enc4: | |||
353 | je .L4enc192 | 308 | je .L4enc192 |
354 | add $0x20, TKEYP | 309 | add $0x20, TKEYP |
355 | movaps -0x60(TKEYP), KEY | 310 | movaps -0x60(TKEYP), KEY |
356 | # aesenc KEY, STATE1 | 311 | AESENC KEY STATE1 |
357 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 312 | AESENC KEY STATE2 |
358 | # aesenc KEY, STATE2 | 313 | AESENC KEY STATE3 |
359 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 314 | AESENC KEY STATE4 |
360 | # aesenc KEY, STATE3 | ||
361 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
362 | # aesenc KEY, STATE4 | ||
363 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
364 | movaps -0x50(TKEYP), KEY | 315 | movaps -0x50(TKEYP), KEY |
365 | # aesenc KEY, STATE1 | 316 | AESENC KEY STATE1 |
366 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 317 | AESENC KEY STATE2 |
367 | # aesenc KEY, STATE2 | 318 | AESENC KEY STATE3 |
368 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 319 | AESENC KEY STATE4 |
369 | # aesenc KEY, STATE3 | ||
370 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
371 | # aesenc KEY, STATE4 | ||
372 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
373 | #.align 4 | 320 | #.align 4 |
374 | .L4enc192: | 321 | .L4enc192: |
375 | movaps -0x40(TKEYP), KEY | 322 | movaps -0x40(TKEYP), KEY |
376 | # aesenc KEY, STATE1 | 323 | AESENC KEY STATE1 |
377 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 324 | AESENC KEY STATE2 |
378 | # aesenc KEY, STATE2 | 325 | AESENC KEY STATE3 |
379 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 326 | AESENC KEY STATE4 |
380 | # aesenc KEY, STATE3 | ||
381 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
382 | # aesenc KEY, STATE4 | ||
383 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
384 | movaps -0x30(TKEYP), KEY | 327 | movaps -0x30(TKEYP), KEY |
385 | # aesenc KEY, STATE1 | 328 | AESENC KEY STATE1 |
386 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 329 | AESENC KEY STATE2 |
387 | # aesenc KEY, STATE2 | 330 | AESENC KEY STATE3 |
388 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 331 | AESENC KEY STATE4 |
389 | # aesenc KEY, STATE3 | ||
390 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
391 | # aesenc KEY, STATE4 | ||
392 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
393 | #.align 4 | 332 | #.align 4 |
394 | .L4enc128: | 333 | .L4enc128: |
395 | movaps -0x20(TKEYP), KEY | 334 | movaps -0x20(TKEYP), KEY |
396 | # aesenc KEY, STATE1 | 335 | AESENC KEY STATE1 |
397 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 336 | AESENC KEY STATE2 |
398 | # aesenc KEY, STATE2 | 337 | AESENC KEY STATE3 |
399 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 338 | AESENC KEY STATE4 |
400 | # aesenc KEY, STATE3 | ||
401 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
402 | # aesenc KEY, STATE4 | ||
403 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
404 | movaps -0x10(TKEYP), KEY | 339 | movaps -0x10(TKEYP), KEY |
405 | # aesenc KEY, STATE1 | 340 | AESENC KEY STATE1 |
406 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 341 | AESENC KEY STATE2 |
407 | # aesenc KEY, STATE2 | 342 | AESENC KEY STATE3 |
408 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 343 | AESENC KEY STATE4 |
409 | # aesenc KEY, STATE3 | ||
410 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
411 | # aesenc KEY, STATE4 | ||
412 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
413 | movaps (TKEYP), KEY | 344 | movaps (TKEYP), KEY |
414 | # aesenc KEY, STATE1 | 345 | AESENC KEY STATE1 |
415 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 346 | AESENC KEY STATE2 |
416 | # aesenc KEY, STATE2 | 347 | AESENC KEY STATE3 |
417 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 348 | AESENC KEY STATE4 |
418 | # aesenc KEY, STATE3 | ||
419 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
420 | # aesenc KEY, STATE4 | ||
421 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
422 | movaps 0x10(TKEYP), KEY | 349 | movaps 0x10(TKEYP), KEY |
423 | # aesenc KEY, STATE1 | 350 | AESENC KEY STATE1 |
424 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 351 | AESENC KEY STATE2 |
425 | # aesenc KEY, STATE2 | 352 | AESENC KEY STATE3 |
426 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 353 | AESENC KEY STATE4 |
427 | # aesenc KEY, STATE3 | ||
428 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
429 | # aesenc KEY, STATE4 | ||
430 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
431 | movaps 0x20(TKEYP), KEY | 354 | movaps 0x20(TKEYP), KEY |
432 | # aesenc KEY, STATE1 | 355 | AESENC KEY STATE1 |
433 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 356 | AESENC KEY STATE2 |
434 | # aesenc KEY, STATE2 | 357 | AESENC KEY STATE3 |
435 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 358 | AESENC KEY STATE4 |
436 | # aesenc KEY, STATE3 | ||
437 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
438 | # aesenc KEY, STATE4 | ||
439 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
440 | movaps 0x30(TKEYP), KEY | 359 | movaps 0x30(TKEYP), KEY |
441 | # aesenc KEY, STATE1 | 360 | AESENC KEY STATE1 |
442 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 361 | AESENC KEY STATE2 |
443 | # aesenc KEY, STATE2 | 362 | AESENC KEY STATE3 |
444 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 363 | AESENC KEY STATE4 |
445 | # aesenc KEY, STATE3 | ||
446 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
447 | # aesenc KEY, STATE4 | ||
448 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
449 | movaps 0x40(TKEYP), KEY | 364 | movaps 0x40(TKEYP), KEY |
450 | # aesenc KEY, STATE1 | 365 | AESENC KEY STATE1 |
451 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 366 | AESENC KEY STATE2 |
452 | # aesenc KEY, STATE2 | 367 | AESENC KEY STATE3 |
453 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 368 | AESENC KEY STATE4 |
454 | # aesenc KEY, STATE3 | ||
455 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
456 | # aesenc KEY, STATE4 | ||
457 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
458 | movaps 0x50(TKEYP), KEY | 369 | movaps 0x50(TKEYP), KEY |
459 | # aesenc KEY, STATE1 | 370 | AESENC KEY STATE1 |
460 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 371 | AESENC KEY STATE2 |
461 | # aesenc KEY, STATE2 | 372 | AESENC KEY STATE3 |
462 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 373 | AESENC KEY STATE4 |
463 | # aesenc KEY, STATE3 | ||
464 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
465 | # aesenc KEY, STATE4 | ||
466 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
467 | movaps 0x60(TKEYP), KEY | 374 | movaps 0x60(TKEYP), KEY |
468 | # aesenc KEY, STATE1 | 375 | AESENC KEY STATE1 |
469 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 | 376 | AESENC KEY STATE2 |
470 | # aesenc KEY, STATE2 | 377 | AESENC KEY STATE3 |
471 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 | 378 | AESENC KEY STATE4 |
472 | # aesenc KEY, STATE3 | ||
473 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xea | ||
474 | # aesenc KEY, STATE4 | ||
475 | .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 | ||
476 | movaps 0x70(TKEYP), KEY | 379 | movaps 0x70(TKEYP), KEY |
477 | # aesenclast KEY, STATE1 # last round | 380 | AESENCLAST KEY STATE1 # last round |
478 | .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 | 381 | AESENCLAST KEY STATE2 |
479 | # aesenclast KEY, STATE2 | 382 | AESENCLAST KEY STATE3 |
480 | .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2 | 383 | AESENCLAST KEY STATE4 |
481 | # aesenclast KEY, STATE3 | ||
482 | .byte 0x66, 0x0f, 0x38, 0xdd, 0xea | ||
483 | # aesenclast KEY, STATE4 | ||
484 | .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2 | ||
485 | ret | 384 | ret |
486 | 385 | ||
487 | /* | 386 | /* |
@@ -518,51 +417,37 @@ _aesni_dec1: | |||
518 | je .Ldec192 | 417 | je .Ldec192 |
519 | add $0x20, TKEYP | 418 | add $0x20, TKEYP |
520 | movaps -0x60(TKEYP), KEY | 419 | movaps -0x60(TKEYP), KEY |
521 | # aesdec KEY, STATE | 420 | AESDEC KEY STATE |
522 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
523 | movaps -0x50(TKEYP), KEY | 421 | movaps -0x50(TKEYP), KEY |
524 | # aesdec KEY, STATE | 422 | AESDEC KEY STATE |
525 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
526 | .align 4 | 423 | .align 4 |
527 | .Ldec192: | 424 | .Ldec192: |
528 | movaps -0x40(TKEYP), KEY | 425 | movaps -0x40(TKEYP), KEY |
529 | # aesdec KEY, STATE | 426 | AESDEC KEY STATE |
530 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
531 | movaps -0x30(TKEYP), KEY | 427 | movaps -0x30(TKEYP), KEY |
532 | # aesdec KEY, STATE | 428 | AESDEC KEY STATE |
533 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
534 | .align 4 | 429 | .align 4 |
535 | .Ldec128: | 430 | .Ldec128: |
536 | movaps -0x20(TKEYP), KEY | 431 | movaps -0x20(TKEYP), KEY |
537 | # aesdec KEY, STATE | 432 | AESDEC KEY STATE |
538 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
539 | movaps -0x10(TKEYP), KEY | 433 | movaps -0x10(TKEYP), KEY |
540 | # aesdec KEY, STATE | 434 | AESDEC KEY STATE |
541 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
542 | movaps (TKEYP), KEY | 435 | movaps (TKEYP), KEY |
543 | # aesdec KEY, STATE | 436 | AESDEC KEY STATE |
544 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
545 | movaps 0x10(TKEYP), KEY | 437 | movaps 0x10(TKEYP), KEY |
546 | # aesdec KEY, STATE | 438 | AESDEC KEY STATE |
547 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
548 | movaps 0x20(TKEYP), KEY | 439 | movaps 0x20(TKEYP), KEY |
549 | # aesdec KEY, STATE | 440 | AESDEC KEY STATE |
550 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
551 | movaps 0x30(TKEYP), KEY | 441 | movaps 0x30(TKEYP), KEY |
552 | # aesdec KEY, STATE | 442 | AESDEC KEY STATE |
553 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
554 | movaps 0x40(TKEYP), KEY | 443 | movaps 0x40(TKEYP), KEY |
555 | # aesdec KEY, STATE | 444 | AESDEC KEY STATE |
556 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
557 | movaps 0x50(TKEYP), KEY | 445 | movaps 0x50(TKEYP), KEY |
558 | # aesdec KEY, STATE | 446 | AESDEC KEY STATE |
559 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
560 | movaps 0x60(TKEYP), KEY | 447 | movaps 0x60(TKEYP), KEY |
561 | # aesdec KEY, STATE | 448 | AESDEC KEY STATE |
562 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | ||
563 | movaps 0x70(TKEYP), KEY | 449 | movaps 0x70(TKEYP), KEY |
564 | # aesdeclast KEY, STATE # last round | 450 | AESDECLAST KEY STATE |
565 | .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 | ||
566 | ret | 451 | ret |
567 | 452 | ||
568 | /* | 453 | /* |
@@ -597,135 +482,79 @@ _aesni_dec4: | |||
597 | je .L4dec192 | 482 | je .L4dec192 |
598 | add $0x20, TKEYP | 483 | add $0x20, TKEYP |
599 | movaps -0x60(TKEYP), KEY | 484 | movaps -0x60(TKEYP), KEY |
600 | # aesdec KEY, STATE1 | 485 | AESDEC KEY STATE1 |
601 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 486 | AESDEC KEY STATE2 |
602 | # aesdec KEY, STATE2 | 487 | AESDEC KEY STATE3 |
603 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 488 | AESDEC KEY STATE4 |
604 | # aesdec KEY, STATE3 | ||
605 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
606 | # aesdec KEY, STATE4 | ||
607 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
608 | movaps -0x50(TKEYP), KEY | 489 | movaps -0x50(TKEYP), KEY |
609 | # aesdec KEY, STATE1 | 490 | AESDEC KEY STATE1 |
610 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 491 | AESDEC KEY STATE2 |
611 | # aesdec KEY, STATE2 | 492 | AESDEC KEY STATE3 |
612 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 493 | AESDEC KEY STATE4 |
613 | # aesdec KEY, STATE3 | ||
614 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
615 | # aesdec KEY, STATE4 | ||
616 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
617 | .align 4 | 494 | .align 4 |
618 | .L4dec192: | 495 | .L4dec192: |
619 | movaps -0x40(TKEYP), KEY | 496 | movaps -0x40(TKEYP), KEY |
620 | # aesdec KEY, STATE1 | 497 | AESDEC KEY STATE1 |
621 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 498 | AESDEC KEY STATE2 |
622 | # aesdec KEY, STATE2 | 499 | AESDEC KEY STATE3 |
623 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 500 | AESDEC KEY STATE4 |
624 | # aesdec KEY, STATE3 | ||
625 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
626 | # aesdec KEY, STATE4 | ||
627 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
628 | movaps -0x30(TKEYP), KEY | 501 | movaps -0x30(TKEYP), KEY |
629 | # aesdec KEY, STATE1 | 502 | AESDEC KEY STATE1 |
630 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 503 | AESDEC KEY STATE2 |
631 | # aesdec KEY, STATE2 | 504 | AESDEC KEY STATE3 |
632 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 505 | AESDEC KEY STATE4 |
633 | # aesdec KEY, STATE3 | ||
634 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
635 | # aesdec KEY, STATE4 | ||
636 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
637 | .align 4 | 506 | .align 4 |
638 | .L4dec128: | 507 | .L4dec128: |
639 | movaps -0x20(TKEYP), KEY | 508 | movaps -0x20(TKEYP), KEY |
640 | # aesdec KEY, STATE1 | 509 | AESDEC KEY STATE1 |
641 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 510 | AESDEC KEY STATE2 |
642 | # aesdec KEY, STATE2 | 511 | AESDEC KEY STATE3 |
643 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 512 | AESDEC KEY STATE4 |
644 | # aesdec KEY, STATE3 | ||
645 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
646 | # aesdec KEY, STATE4 | ||
647 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
648 | movaps -0x10(TKEYP), KEY | 513 | movaps -0x10(TKEYP), KEY |
649 | # aesdec KEY, STATE1 | 514 | AESDEC KEY STATE1 |
650 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 515 | AESDEC KEY STATE2 |
651 | # aesdec KEY, STATE2 | 516 | AESDEC KEY STATE3 |
652 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 517 | AESDEC KEY STATE4 |
653 | # aesdec KEY, STATE3 | ||
654 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
655 | # aesdec KEY, STATE4 | ||
656 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
657 | movaps (TKEYP), KEY | 518 | movaps (TKEYP), KEY |
658 | # aesdec KEY, STATE1 | 519 | AESDEC KEY STATE1 |
659 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 520 | AESDEC KEY STATE2 |
660 | # aesdec KEY, STATE2 | 521 | AESDEC KEY STATE3 |
661 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 522 | AESDEC KEY STATE4 |
662 | # aesdec KEY, STATE3 | ||
663 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
664 | # aesdec KEY, STATE4 | ||
665 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
666 | movaps 0x10(TKEYP), KEY | 523 | movaps 0x10(TKEYP), KEY |
667 | # aesdec KEY, STATE1 | 524 | AESDEC KEY STATE1 |
668 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 525 | AESDEC KEY STATE2 |
669 | # aesdec KEY, STATE2 | 526 | AESDEC KEY STATE3 |
670 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 527 | AESDEC KEY STATE4 |
671 | # aesdec KEY, STATE3 | ||
672 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
673 | # aesdec KEY, STATE4 | ||
674 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
675 | movaps 0x20(TKEYP), KEY | 528 | movaps 0x20(TKEYP), KEY |
676 | # aesdec KEY, STATE1 | 529 | AESDEC KEY STATE1 |
677 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 530 | AESDEC KEY STATE2 |
678 | # aesdec KEY, STATE2 | 531 | AESDEC KEY STATE3 |
679 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 532 | AESDEC KEY STATE4 |
680 | # aesdec KEY, STATE3 | ||
681 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
682 | # aesdec KEY, STATE4 | ||
683 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
684 | movaps 0x30(TKEYP), KEY | 533 | movaps 0x30(TKEYP), KEY |
685 | # aesdec KEY, STATE1 | 534 | AESDEC KEY STATE1 |
686 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 535 | AESDEC KEY STATE2 |
687 | # aesdec KEY, STATE2 | 536 | AESDEC KEY STATE3 |
688 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 537 | AESDEC KEY STATE4 |
689 | # aesdec KEY, STATE3 | ||
690 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
691 | # aesdec KEY, STATE4 | ||
692 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
693 | movaps 0x40(TKEYP), KEY | 538 | movaps 0x40(TKEYP), KEY |
694 | # aesdec KEY, STATE1 | 539 | AESDEC KEY STATE1 |
695 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 540 | AESDEC KEY STATE2 |
696 | # aesdec KEY, STATE2 | 541 | AESDEC KEY STATE3 |
697 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 542 | AESDEC KEY STATE4 |
698 | # aesdec KEY, STATE3 | ||
699 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
700 | # aesdec KEY, STATE4 | ||
701 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
702 | movaps 0x50(TKEYP), KEY | 543 | movaps 0x50(TKEYP), KEY |
703 | # aesdec KEY, STATE1 | 544 | AESDEC KEY STATE1 |
704 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 545 | AESDEC KEY STATE2 |
705 | # aesdec KEY, STATE2 | 546 | AESDEC KEY STATE3 |
706 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 547 | AESDEC KEY STATE4 |
707 | # aesdec KEY, STATE3 | ||
708 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
709 | # aesdec KEY, STATE4 | ||
710 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
711 | movaps 0x60(TKEYP), KEY | 548 | movaps 0x60(TKEYP), KEY |
712 | # aesdec KEY, STATE1 | 549 | AESDEC KEY STATE1 |
713 | .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 | 550 | AESDEC KEY STATE2 |
714 | # aesdec KEY, STATE2 | 551 | AESDEC KEY STATE3 |
715 | .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 | 552 | AESDEC KEY STATE4 |
716 | # aesdec KEY, STATE3 | ||
717 | .byte 0x66, 0x0f, 0x38, 0xde, 0xea | ||
718 | # aesdec KEY, STATE4 | ||
719 | .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 | ||
720 | movaps 0x70(TKEYP), KEY | 553 | movaps 0x70(TKEYP), KEY |
721 | # aesdeclast KEY, STATE1 # last round | 554 | AESDECLAST KEY STATE1 # last round |
722 | .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 | 555 | AESDECLAST KEY STATE2 |
723 | # aesdeclast KEY, STATE2 | 556 | AESDECLAST KEY STATE3 |
724 | .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2 | 557 | AESDECLAST KEY STATE4 |
725 | # aesdeclast KEY, STATE3 | ||
726 | .byte 0x66, 0x0f, 0x38, 0xdf, 0xea | ||
727 | # aesdeclast KEY, STATE4 | ||
728 | .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2 | ||
729 | ret | 558 | ret |
730 | 559 | ||
731 | /* | 560 | /* |
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S new file mode 100644 index 000000000000..1eb7f90cb7b9 --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S | |||
@@ -0,0 +1,157 @@ | |||
1 | /* | ||
2 | * Accelerated GHASH implementation with Intel PCLMULQDQ-NI | ||
3 | * instructions. This file contains accelerated part of ghash | ||
4 | * implementation. More information about PCLMULQDQ can be found at: | ||
5 | * | ||
6 | * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ | ||
7 | * | ||
8 | * Copyright (c) 2009 Intel Corp. | ||
9 | * Author: Huang Ying <ying.huang@intel.com> | ||
10 | * Vinodh Gopal | ||
11 | * Erdinc Ozturk | ||
12 | * Deniz Karakoyunlu | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify it | ||
15 | * under the terms of the GNU General Public License version 2 as published | ||
16 | * by the Free Software Foundation. | ||
17 | */ | ||
18 | |||
19 | #include <linux/linkage.h> | ||
20 | #include <asm/inst.h> | ||
21 | |||
22 | .data | ||
23 | |||
24 | .align 16 | ||
25 | .Lbswap_mask: | ||
26 | .octa 0x000102030405060708090a0b0c0d0e0f | ||
27 | .Lpoly: | ||
28 | .octa 0xc2000000000000000000000000000001 | ||
29 | .Ltwo_one: | ||
30 | .octa 0x00000001000000000000000000000001 | ||
31 | |||
32 | #define DATA %xmm0 | ||
33 | #define SHASH %xmm1 | ||
34 | #define T1 %xmm2 | ||
35 | #define T2 %xmm3 | ||
36 | #define T3 %xmm4 | ||
37 | #define BSWAP %xmm5 | ||
38 | #define IN1 %xmm6 | ||
39 | |||
40 | .text | ||
41 | |||
42 | /* | ||
43 | * __clmul_gf128mul_ble: internal ABI | ||
44 | * input: | ||
45 | * DATA: operand1 | ||
46 | * SHASH: operand2, hash_key << 1 mod poly | ||
47 | * output: | ||
48 | * DATA: operand1 * operand2 mod poly | ||
49 | * changed: | ||
50 | * T1 | ||
51 | * T2 | ||
52 | * T3 | ||
53 | */ | ||
54 | __clmul_gf128mul_ble: | ||
55 | movaps DATA, T1 | ||
56 | pshufd $0b01001110, DATA, T2 | ||
57 | pshufd $0b01001110, SHASH, T3 | ||
58 | pxor DATA, T2 | ||
59 | pxor SHASH, T3 | ||
60 | |||
61 | PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0 | ||
62 | PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1 | ||
63 | PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0) | ||
64 | pxor DATA, T2 | ||
65 | pxor T1, T2 # T2 = a0 * b1 + a1 * b0 | ||
66 | |||
67 | movaps T2, T3 | ||
68 | pslldq $8, T3 | ||
69 | psrldq $8, T2 | ||
70 | pxor T3, DATA | ||
71 | pxor T2, T1 # <T1:DATA> is result of | ||
72 | # carry-less multiplication | ||
73 | |||
74 | # first phase of the reduction | ||
75 | movaps DATA, T3 | ||
76 | psllq $1, T3 | ||
77 | pxor DATA, T3 | ||
78 | psllq $5, T3 | ||
79 | pxor DATA, T3 | ||
80 | psllq $57, T3 | ||
81 | movaps T3, T2 | ||
82 | pslldq $8, T2 | ||
83 | psrldq $8, T3 | ||
84 | pxor T2, DATA | ||
85 | pxor T3, T1 | ||
86 | |||
87 | # second phase of the reduction | ||
88 | movaps DATA, T2 | ||
89 | psrlq $5, T2 | ||
90 | pxor DATA, T2 | ||
91 | psrlq $1, T2 | ||
92 | pxor DATA, T2 | ||
93 | psrlq $1, T2 | ||
94 | pxor T2, T1 | ||
95 | pxor T1, DATA | ||
96 | ret | ||
97 | |||
98 | /* void clmul_ghash_mul(char *dst, const be128 *shash) */ | ||
99 | ENTRY(clmul_ghash_mul) | ||
100 | movups (%rdi), DATA | ||
101 | movups (%rsi), SHASH | ||
102 | movaps .Lbswap_mask, BSWAP | ||
103 | PSHUFB_XMM BSWAP DATA | ||
104 | call __clmul_gf128mul_ble | ||
105 | PSHUFB_XMM BSWAP DATA | ||
106 | movups DATA, (%rdi) | ||
107 | ret | ||
108 | |||
109 | /* | ||
110 | * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, | ||
111 | * const be128 *shash); | ||
112 | */ | ||
113 | ENTRY(clmul_ghash_update) | ||
114 | cmp $16, %rdx | ||
115 | jb .Lupdate_just_ret # check length | ||
116 | movaps .Lbswap_mask, BSWAP | ||
117 | movups (%rdi), DATA | ||
118 | movups (%rcx), SHASH | ||
119 | PSHUFB_XMM BSWAP DATA | ||
120 | .align 4 | ||
121 | .Lupdate_loop: | ||
122 | movups (%rsi), IN1 | ||
123 | PSHUFB_XMM BSWAP IN1 | ||
124 | pxor IN1, DATA | ||
125 | call __clmul_gf128mul_ble | ||
126 | sub $16, %rdx | ||
127 | add $16, %rsi | ||
128 | cmp $16, %rdx | ||
129 | jge .Lupdate_loop | ||
130 | PSHUFB_XMM BSWAP DATA | ||
131 | movups DATA, (%rdi) | ||
132 | .Lupdate_just_ret: | ||
133 | ret | ||
134 | |||
135 | /* | ||
136 | * void clmul_ghash_setkey(be128 *shash, const u8 *key); | ||
137 | * | ||
138 | * Calculate hash_key << 1 mod poly | ||
139 | */ | ||
140 | ENTRY(clmul_ghash_setkey) | ||
141 | movaps .Lbswap_mask, BSWAP | ||
142 | movups (%rsi), %xmm0 | ||
143 | PSHUFB_XMM BSWAP %xmm0 | ||
144 | movaps %xmm0, %xmm1 | ||
145 | psllq $1, %xmm0 | ||
146 | psrlq $63, %xmm1 | ||
147 | movaps %xmm1, %xmm2 | ||
148 | pslldq $8, %xmm1 | ||
149 | psrldq $8, %xmm2 | ||
150 | por %xmm1, %xmm0 | ||
151 | # reduction | ||
152 | pshufd $0b00100100, %xmm2, %xmm1 | ||
153 | pcmpeqd .Ltwo_one, %xmm1 | ||
154 | pand .Lpoly, %xmm1 | ||
155 | pxor %xmm1, %xmm0 | ||
156 | movups %xmm0, (%rdi) | ||
157 | ret | ||
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c new file mode 100644 index 000000000000..cbcc8d8ea93a --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c | |||
@@ -0,0 +1,333 @@ | |||
1 | /* | ||
2 | * Accelerated GHASH implementation with Intel PCLMULQDQ-NI | ||
3 | * instructions. This file contains glue code. | ||
4 | * | ||
5 | * Copyright (c) 2009 Intel Corp. | ||
6 | * Author: Huang Ying <ying.huang@intel.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms of the GNU General Public License version 2 as published | ||
10 | * by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/crypto.h> | ||
17 | #include <crypto/algapi.h> | ||
18 | #include <crypto/cryptd.h> | ||
19 | #include <crypto/gf128mul.h> | ||
20 | #include <crypto/internal/hash.h> | ||
21 | #include <asm/i387.h> | ||
22 | |||
23 | #define GHASH_BLOCK_SIZE 16 | ||
24 | #define GHASH_DIGEST_SIZE 16 | ||
25 | |||
26 | void clmul_ghash_mul(char *dst, const be128 *shash); | ||
27 | |||
28 | void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, | ||
29 | const be128 *shash); | ||
30 | |||
31 | void clmul_ghash_setkey(be128 *shash, const u8 *key); | ||
32 | |||
33 | struct ghash_async_ctx { | ||
34 | struct cryptd_ahash *cryptd_tfm; | ||
35 | }; | ||
36 | |||
37 | struct ghash_ctx { | ||
38 | be128 shash; | ||
39 | }; | ||
40 | |||
41 | struct ghash_desc_ctx { | ||
42 | u8 buffer[GHASH_BLOCK_SIZE]; | ||
43 | u32 bytes; | ||
44 | }; | ||
45 | |||
46 | static int ghash_init(struct shash_desc *desc) | ||
47 | { | ||
48 | struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); | ||
49 | |||
50 | memset(dctx, 0, sizeof(*dctx)); | ||
51 | |||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | static int ghash_setkey(struct crypto_shash *tfm, | ||
56 | const u8 *key, unsigned int keylen) | ||
57 | { | ||
58 | struct ghash_ctx *ctx = crypto_shash_ctx(tfm); | ||
59 | |||
60 | if (keylen != GHASH_BLOCK_SIZE) { | ||
61 | crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
62 | return -EINVAL; | ||
63 | } | ||
64 | |||
65 | clmul_ghash_setkey(&ctx->shash, key); | ||
66 | |||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | static int ghash_update(struct shash_desc *desc, | ||
71 | const u8 *src, unsigned int srclen) | ||
72 | { | ||
73 | struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); | ||
74 | struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); | ||
75 | u8 *dst = dctx->buffer; | ||
76 | |||
77 | kernel_fpu_begin(); | ||
78 | if (dctx->bytes) { | ||
79 | int n = min(srclen, dctx->bytes); | ||
80 | u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes); | ||
81 | |||
82 | dctx->bytes -= n; | ||
83 | srclen -= n; | ||
84 | |||
85 | while (n--) | ||
86 | *pos++ ^= *src++; | ||
87 | |||
88 | if (!dctx->bytes) | ||
89 | clmul_ghash_mul(dst, &ctx->shash); | ||
90 | } | ||
91 | |||
92 | clmul_ghash_update(dst, src, srclen, &ctx->shash); | ||
93 | kernel_fpu_end(); | ||
94 | |||
95 | if (srclen & 0xf) { | ||
96 | src += srclen - (srclen & 0xf); | ||
97 | srclen &= 0xf; | ||
98 | dctx->bytes = GHASH_BLOCK_SIZE - srclen; | ||
99 | while (srclen--) | ||
100 | *dst++ ^= *src++; | ||
101 | } | ||
102 | |||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) | ||
107 | { | ||
108 | u8 *dst = dctx->buffer; | ||
109 | |||
110 | if (dctx->bytes) { | ||
111 | u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes); | ||
112 | |||
113 | while (dctx->bytes--) | ||
114 | *tmp++ ^= 0; | ||
115 | |||
116 | kernel_fpu_begin(); | ||
117 | clmul_ghash_mul(dst, &ctx->shash); | ||
118 | kernel_fpu_end(); | ||
119 | } | ||
120 | |||
121 | dctx->bytes = 0; | ||
122 | } | ||
123 | |||
124 | static int ghash_final(struct shash_desc *desc, u8 *dst) | ||
125 | { | ||
126 | struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); | ||
127 | struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); | ||
128 | u8 *buf = dctx->buffer; | ||
129 | |||
130 | ghash_flush(ctx, dctx); | ||
131 | memcpy(dst, buf, GHASH_BLOCK_SIZE); | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | static struct shash_alg ghash_alg = { | ||
137 | .digestsize = GHASH_DIGEST_SIZE, | ||
138 | .init = ghash_init, | ||
139 | .update = ghash_update, | ||
140 | .final = ghash_final, | ||
141 | .setkey = ghash_setkey, | ||
142 | .descsize = sizeof(struct ghash_desc_ctx), | ||
143 | .base = { | ||
144 | .cra_name = "__ghash", | ||
145 | .cra_driver_name = "__ghash-pclmulqdqni", | ||
146 | .cra_priority = 0, | ||
147 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
148 | .cra_blocksize = GHASH_BLOCK_SIZE, | ||
149 | .cra_ctxsize = sizeof(struct ghash_ctx), | ||
150 | .cra_module = THIS_MODULE, | ||
151 | .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list), | ||
152 | }, | ||
153 | }; | ||
154 | |||
155 | static int ghash_async_init(struct ahash_request *req) | ||
156 | { | ||
157 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
158 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
159 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
160 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
161 | |||
162 | if (!irq_fpu_usable()) { | ||
163 | memcpy(cryptd_req, req, sizeof(*req)); | ||
164 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
165 | return crypto_ahash_init(cryptd_req); | ||
166 | } else { | ||
167 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
168 | struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); | ||
169 | |||
170 | desc->tfm = child; | ||
171 | desc->flags = req->base.flags; | ||
172 | return crypto_shash_init(desc); | ||
173 | } | ||
174 | } | ||
175 | |||
176 | static int ghash_async_update(struct ahash_request *req) | ||
177 | { | ||
178 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
179 | |||
180 | if (!irq_fpu_usable()) { | ||
181 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
182 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
183 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
184 | |||
185 | memcpy(cryptd_req, req, sizeof(*req)); | ||
186 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
187 | return crypto_ahash_update(cryptd_req); | ||
188 | } else { | ||
189 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
190 | return shash_ahash_update(req, desc); | ||
191 | } | ||
192 | } | ||
193 | |||
194 | static int ghash_async_final(struct ahash_request *req) | ||
195 | { | ||
196 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
197 | |||
198 | if (!irq_fpu_usable()) { | ||
199 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
200 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
201 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
202 | |||
203 | memcpy(cryptd_req, req, sizeof(*req)); | ||
204 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
205 | return crypto_ahash_final(cryptd_req); | ||
206 | } else { | ||
207 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
208 | return crypto_shash_final(desc, req->result); | ||
209 | } | ||
210 | } | ||
211 | |||
212 | static int ghash_async_digest(struct ahash_request *req) | ||
213 | { | ||
214 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
215 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
216 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
217 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
218 | |||
219 | if (!irq_fpu_usable()) { | ||
220 | memcpy(cryptd_req, req, sizeof(*req)); | ||
221 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
222 | return crypto_ahash_digest(cryptd_req); | ||
223 | } else { | ||
224 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
225 | struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); | ||
226 | |||
227 | desc->tfm = child; | ||
228 | desc->flags = req->base.flags; | ||
229 | return shash_ahash_digest(req, desc); | ||
230 | } | ||
231 | } | ||
232 | |||
233 | static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, | ||
234 | unsigned int keylen) | ||
235 | { | ||
236 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
237 | struct crypto_ahash *child = &ctx->cryptd_tfm->base; | ||
238 | int err; | ||
239 | |||
240 | crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
241 | crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) | ||
242 | & CRYPTO_TFM_REQ_MASK); | ||
243 | err = crypto_ahash_setkey(child, key, keylen); | ||
244 | crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child) | ||
245 | & CRYPTO_TFM_RES_MASK); | ||
246 | |||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | static int ghash_async_init_tfm(struct crypto_tfm *tfm) | ||
251 | { | ||
252 | struct cryptd_ahash *cryptd_tfm; | ||
253 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); | ||
254 | |||
255 | cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0); | ||
256 | if (IS_ERR(cryptd_tfm)) | ||
257 | return PTR_ERR(cryptd_tfm); | ||
258 | ctx->cryptd_tfm = cryptd_tfm; | ||
259 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), | ||
260 | sizeof(struct ahash_request) + | ||
261 | crypto_ahash_reqsize(&cryptd_tfm->base)); | ||
262 | |||
263 | return 0; | ||
264 | } | ||
265 | |||
266 | static void ghash_async_exit_tfm(struct crypto_tfm *tfm) | ||
267 | { | ||
268 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); | ||
269 | |||
270 | cryptd_free_ahash(ctx->cryptd_tfm); | ||
271 | } | ||
272 | |||
273 | static struct ahash_alg ghash_async_alg = { | ||
274 | .init = ghash_async_init, | ||
275 | .update = ghash_async_update, | ||
276 | .final = ghash_async_final, | ||
277 | .setkey = ghash_async_setkey, | ||
278 | .digest = ghash_async_digest, | ||
279 | .halg = { | ||
280 | .digestsize = GHASH_DIGEST_SIZE, | ||
281 | .base = { | ||
282 | .cra_name = "ghash", | ||
283 | .cra_driver_name = "ghash-clmulni", | ||
284 | .cra_priority = 400, | ||
285 | .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, | ||
286 | .cra_blocksize = GHASH_BLOCK_SIZE, | ||
287 | .cra_type = &crypto_ahash_type, | ||
288 | .cra_module = THIS_MODULE, | ||
289 | .cra_list = LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list), | ||
290 | .cra_init = ghash_async_init_tfm, | ||
291 | .cra_exit = ghash_async_exit_tfm, | ||
292 | }, | ||
293 | }, | ||
294 | }; | ||
295 | |||
296 | static int __init ghash_pclmulqdqni_mod_init(void) | ||
297 | { | ||
298 | int err; | ||
299 | |||
300 | if (!cpu_has_pclmulqdq) { | ||
301 | printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not" | ||
302 | " detected.\n"); | ||
303 | return -ENODEV; | ||
304 | } | ||
305 | |||
306 | err = crypto_register_shash(&ghash_alg); | ||
307 | if (err) | ||
308 | goto err_out; | ||
309 | err = crypto_register_ahash(&ghash_async_alg); | ||
310 | if (err) | ||
311 | goto err_shash; | ||
312 | |||
313 | return 0; | ||
314 | |||
315 | err_shash: | ||
316 | crypto_unregister_shash(&ghash_alg); | ||
317 | err_out: | ||
318 | return err; | ||
319 | } | ||
320 | |||
321 | static void __exit ghash_pclmulqdqni_mod_exit(void) | ||
322 | { | ||
323 | crypto_unregister_ahash(&ghash_async_alg); | ||
324 | crypto_unregister_shash(&ghash_alg); | ||
325 | } | ||
326 | |||
327 | module_init(ghash_pclmulqdqni_mod_init); | ||
328 | module_exit(ghash_pclmulqdqni_mod_exit); | ||
329 | |||
330 | MODULE_LICENSE("GPL"); | ||
331 | MODULE_DESCRIPTION("GHASH Message Digest Algorithm, " | ||
332 | "acclerated by PCLMULQDQ-NI"); | ||
333 | MODULE_ALIAS("ghash"); | ||
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 581b0568fe19..4eefdca9832b 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -653,7 +653,7 @@ ia32_sys_call_table: | |||
653 | .quad compat_sys_writev | 653 | .quad compat_sys_writev |
654 | .quad sys_getsid | 654 | .quad sys_getsid |
655 | .quad sys_fdatasync | 655 | .quad sys_fdatasync |
656 | .quad sys32_sysctl /* sysctl */ | 656 | .quad compat_sys_sysctl /* sysctl */ |
657 | .quad sys_mlock /* 150 */ | 657 | .quad sys_mlock /* 150 */ |
658 | .quad sys_munlock | 658 | .quad sys_munlock |
659 | .quad sys_mlockall | 659 | .quad sys_mlockall |
@@ -841,4 +841,5 @@ ia32_sys_call_table: | |||
841 | .quad compat_sys_pwritev | 841 | .quad compat_sys_pwritev |
842 | .quad compat_sys_rt_tgsigqueueinfo /* 335 */ | 842 | .quad compat_sys_rt_tgsigqueueinfo /* 335 */ |
843 | .quad sys_perf_event_open | 843 | .quad sys_perf_event_open |
844 | .quad compat_sys_recvmmsg | ||
844 | ia32_syscall_end: | 845 | ia32_syscall_end: |
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 9f5527198825..df82c0e48ded 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c | |||
@@ -434,62 +434,6 @@ asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, | |||
434 | return ret; | 434 | return ret; |
435 | } | 435 | } |
436 | 436 | ||
437 | #ifdef CONFIG_SYSCTL_SYSCALL | ||
438 | struct sysctl_ia32 { | ||
439 | unsigned int name; | ||
440 | int nlen; | ||
441 | unsigned int oldval; | ||
442 | unsigned int oldlenp; | ||
443 | unsigned int newval; | ||
444 | unsigned int newlen; | ||
445 | unsigned int __unused[4]; | ||
446 | }; | ||
447 | |||
448 | |||
449 | asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *args32) | ||
450 | { | ||
451 | struct sysctl_ia32 a32; | ||
452 | mm_segment_t old_fs = get_fs(); | ||
453 | void __user *oldvalp, *newvalp; | ||
454 | size_t oldlen; | ||
455 | int __user *namep; | ||
456 | long ret; | ||
457 | |||
458 | if (copy_from_user(&a32, args32, sizeof(a32))) | ||
459 | return -EFAULT; | ||
460 | |||
461 | /* | ||
462 | * We need to pre-validate these because we have to disable | ||
463 | * address checking before calling do_sysctl() because of | ||
464 | * OLDLEN but we can't run the risk of the user specifying bad | ||
465 | * addresses here. Well, since we're dealing with 32 bit | ||
466 | * addresses, we KNOW that access_ok() will always succeed, so | ||
467 | * this is an expensive NOP, but so what... | ||
468 | */ | ||
469 | namep = compat_ptr(a32.name); | ||
470 | oldvalp = compat_ptr(a32.oldval); | ||
471 | newvalp = compat_ptr(a32.newval); | ||
472 | |||
473 | if ((oldvalp && get_user(oldlen, (int __user *)compat_ptr(a32.oldlenp))) | ||
474 | || !access_ok(VERIFY_WRITE, namep, 0) | ||
475 | || !access_ok(VERIFY_WRITE, oldvalp, 0) | ||
476 | || !access_ok(VERIFY_WRITE, newvalp, 0)) | ||
477 | return -EFAULT; | ||
478 | |||
479 | set_fs(KERNEL_DS); | ||
480 | lock_kernel(); | ||
481 | ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *)&oldlen, | ||
482 | newvalp, (size_t) a32.newlen); | ||
483 | unlock_kernel(); | ||
484 | set_fs(old_fs); | ||
485 | |||
486 | if (oldvalp && put_user(oldlen, (int __user *)compat_ptr(a32.oldlenp))) | ||
487 | return -EFAULT; | ||
488 | |||
489 | return ret; | ||
490 | } | ||
491 | #endif | ||
492 | |||
493 | /* warning: next two assume little endian */ | 437 | /* warning: next two assume little endian */ |
494 | asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, | 438 | asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, |
495 | u32 poslo, u32 poshi) | 439 | u32 poslo, u32 poshi) |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 4518dc500903..60d2b2db0bc5 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -118,7 +118,7 @@ extern void acpi_restore_state_mem(void); | |||
118 | extern unsigned long acpi_wakeup_address; | 118 | extern unsigned long acpi_wakeup_address; |
119 | 119 | ||
120 | /* early initialization routine */ | 120 | /* early initialization routine */ |
121 | extern void acpi_reserve_bootmem(void); | 121 | extern void acpi_reserve_wakeup_memory(void); |
122 | 122 | ||
123 | /* | 123 | /* |
124 | * Check if the CPU can handle C2 and deeper | 124 | * Check if the CPU can handle C2 and deeper |
@@ -158,6 +158,7 @@ struct bootnode; | |||
158 | 158 | ||
159 | #ifdef CONFIG_ACPI_NUMA | 159 | #ifdef CONFIG_ACPI_NUMA |
160 | extern int acpi_numa; | 160 | extern int acpi_numa; |
161 | extern int acpi_get_nodes(struct bootnode *physnodes); | ||
161 | extern int acpi_scan_nodes(unsigned long start, unsigned long end); | 162 | extern int acpi_scan_nodes(unsigned long start, unsigned long end); |
162 | #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) | 163 | #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) |
163 | extern void acpi_fake_nodes(const struct bootnode *fake_nodes, | 164 | extern void acpi_fake_nodes(const struct bootnode *fake_nodes, |
diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h index 549860d3be8f..2f9047cfaaca 100644 --- a/arch/x86/include/asm/cache.h +++ b/arch/x86/include/asm/cache.h | |||
@@ -9,12 +9,13 @@ | |||
9 | 9 | ||
10 | #define __read_mostly __attribute__((__section__(".data.read_mostly"))) | 10 | #define __read_mostly __attribute__((__section__(".data.read_mostly"))) |
11 | 11 | ||
12 | #define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT | ||
13 | #define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT) | ||
14 | |||
12 | #ifdef CONFIG_X86_VSMP | 15 | #ifdef CONFIG_X86_VSMP |
13 | /* vSMP Internode cacheline shift */ | ||
14 | #define INTERNODE_CACHE_SHIFT (12) | ||
15 | #ifdef CONFIG_SMP | 16 | #ifdef CONFIG_SMP |
16 | #define __cacheline_aligned_in_smp \ | 17 | #define __cacheline_aligned_in_smp \ |
17 | __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \ | 18 | __attribute__((__aligned__(INTERNODE_CACHE_BYTES))) \ |
18 | __page_aligned_data | 19 | __page_aligned_data |
19 | #endif | 20 | #endif |
20 | #endif | 21 | #endif |
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index b54f6afe7ec4..634c40a739a6 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h | |||
@@ -12,6 +12,7 @@ static inline void flush_cache_range(struct vm_area_struct *vma, | |||
12 | unsigned long start, unsigned long end) { } | 12 | unsigned long start, unsigned long end) { } |
13 | static inline void flush_cache_page(struct vm_area_struct *vma, | 13 | static inline void flush_cache_page(struct vm_area_struct *vma, |
14 | unsigned long vmaddr, unsigned long pfn) { } | 14 | unsigned long vmaddr, unsigned long pfn) { } |
15 | #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 | ||
15 | static inline void flush_dcache_page(struct page *page) { } | 16 | static inline void flush_dcache_page(struct page *page) { } |
16 | static inline void flush_dcache_mmap_lock(struct address_space *mapping) { } | 17 | static inline void flush_dcache_mmap_lock(struct address_space *mapping) { } |
17 | static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { } | 18 | static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { } |
@@ -176,6 +177,7 @@ void clflush_cache_range(void *addr, unsigned int size); | |||
176 | #ifdef CONFIG_DEBUG_RODATA | 177 | #ifdef CONFIG_DEBUG_RODATA |
177 | void mark_rodata_ro(void); | 178 | void mark_rodata_ro(void); |
178 | extern const int rodata_test_data; | 179 | extern const int rodata_test_data; |
180 | extern int kernel_set_to_readonly; | ||
179 | void set_kernel_text_rw(void); | 181 | void set_kernel_text_rw(void); |
180 | void set_kernel_text_ro(void); | 182 | void set_kernel_text_ro(void); |
181 | #else | 183 | #else |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 9cfc88b97742..613700f27a4a 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -248,6 +248,7 @@ extern const char * const x86_power_flags[32]; | |||
248 | #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) | 248 | #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) |
249 | #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) | 249 | #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) |
250 | #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) | 250 | #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) |
251 | #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) | ||
251 | 252 | ||
252 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) | 253 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) |
253 | # define cpu_has_invlpg 1 | 254 | # define cpu_has_invlpg 1 |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 40b4e614fe71..761249e396fe 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -61,6 +61,12 @@ struct e820map { | |||
61 | struct e820entry map[E820_X_MAX]; | 61 | struct e820entry map[E820_X_MAX]; |
62 | }; | 62 | }; |
63 | 63 | ||
64 | #define ISA_START_ADDRESS 0xa0000 | ||
65 | #define ISA_END_ADDRESS 0x100000 | ||
66 | |||
67 | #define BIOS_BEGIN 0x000a0000 | ||
68 | #define BIOS_END 0x00100000 | ||
69 | |||
64 | #ifdef __KERNEL__ | 70 | #ifdef __KERNEL__ |
65 | /* see comment in arch/x86/kernel/e820.c */ | 71 | /* see comment in arch/x86/kernel/e820.c */ |
66 | extern struct e820map e820; | 72 | extern struct e820map e820; |
@@ -126,15 +132,18 @@ extern void e820_reserve_resources(void); | |||
126 | extern void e820_reserve_resources_late(void); | 132 | extern void e820_reserve_resources_late(void); |
127 | extern void setup_memory_map(void); | 133 | extern void setup_memory_map(void); |
128 | extern char *default_machine_specific_memory_setup(void); | 134 | extern char *default_machine_specific_memory_setup(void); |
129 | #endif /* __KERNEL__ */ | ||
130 | #endif /* __ASSEMBLY__ */ | ||
131 | 135 | ||
132 | #define ISA_START_ADDRESS 0xa0000 | 136 | /* |
133 | #define ISA_END_ADDRESS 0x100000 | 137 | * Returns true iff the specified range [s,e) is completely contained inside |
134 | #define is_ISA_range(s, e) ((s) >= ISA_START_ADDRESS && (e) < ISA_END_ADDRESS) | 138 | * the ISA region. |
139 | */ | ||
140 | static inline bool is_ISA_range(u64 s, u64 e) | ||
141 | { | ||
142 | return s >= ISA_START_ADDRESS && e <= ISA_END_ADDRESS; | ||
143 | } | ||
135 | 144 | ||
136 | #define BIOS_BEGIN 0x000a0000 | 145 | #endif /* __KERNEL__ */ |
137 | #define BIOS_END 0x00100000 | 146 | #endif /* __ASSEMBLY__ */ |
138 | 147 | ||
139 | #ifdef __KERNEL__ | 148 | #ifdef __KERNEL__ |
140 | #include <linux/ioport.h> | 149 | #include <linux/ioport.h> |
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 456a304b8172..8a024babe5e6 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
@@ -157,19 +157,6 @@ do { \ | |||
157 | 157 | ||
158 | #define compat_elf_check_arch(x) elf_check_arch_ia32(x) | 158 | #define compat_elf_check_arch(x) elf_check_arch_ia32(x) |
159 | 159 | ||
160 | static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp) | ||
161 | { | ||
162 | loadsegment(fs, 0); | ||
163 | loadsegment(ds, __USER32_DS); | ||
164 | loadsegment(es, __USER32_DS); | ||
165 | load_gs_index(0); | ||
166 | regs->ip = ip; | ||
167 | regs->sp = sp; | ||
168 | regs->flags = X86_EFLAGS_IF; | ||
169 | regs->cs = __USER32_CS; | ||
170 | regs->ss = __USER32_DS; | ||
171 | } | ||
172 | |||
173 | static inline void elf_common_init(struct thread_struct *t, | 160 | static inline void elf_common_init(struct thread_struct *t, |
174 | struct pt_regs *regs, const u16 ds) | 161 | struct pt_regs *regs, const u16 ds) |
175 | { | 162 | { |
@@ -191,11 +178,8 @@ do { \ | |||
191 | #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ | 178 | #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ |
192 | elf_common_init(¤t->thread, regs, __USER_DS) | 179 | elf_common_init(¤t->thread, regs, __USER_DS) |
193 | 180 | ||
194 | #define compat_start_thread(regs, ip, sp) \ | 181 | void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); |
195 | do { \ | 182 | #define compat_start_thread start_thread_ia32 |
196 | start_ia32_thread(regs, ip, sp); \ | ||
197 | set_fs(USER_DS); \ | ||
198 | } while (0) | ||
199 | 183 | ||
200 | #define COMPAT_SET_PERSONALITY(ex) \ | 184 | #define COMPAT_SET_PERSONALITY(ex) \ |
201 | do { \ | 185 | do { \ |
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index f5693c81a1db..8e8ec663a98f 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -34,7 +34,7 @@ BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7, | |||
34 | smp_invalidate_interrupt) | 34 | smp_invalidate_interrupt) |
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | BUILD_INTERRUPT(generic_interrupt, GENERIC_INTERRUPT_VECTOR) | 37 | BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * every pentium local APIC has two 'local interrupts', with a | 40 | * every pentium local APIC has two 'local interrupts', with a |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 108eb6fd1ae7..0f8576427cfe 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -12,7 +12,7 @@ typedef struct { | |||
12 | unsigned int apic_timer_irqs; /* arch dependent */ | 12 | unsigned int apic_timer_irqs; /* arch dependent */ |
13 | unsigned int irq_spurious_count; | 13 | unsigned int irq_spurious_count; |
14 | #endif | 14 | #endif |
15 | unsigned int generic_irqs; /* arch dependent */ | 15 | unsigned int x86_platform_ipis; /* arch dependent */ |
16 | unsigned int apic_perf_irqs; | 16 | unsigned int apic_perf_irqs; |
17 | unsigned int apic_pending_irqs; | 17 | unsigned int apic_pending_irqs; |
18 | #ifdef CONFIG_SMP | 18 | #ifdef CONFIG_SMP |
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 1c22cb05ad6a..5d89fd2a3690 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h | |||
@@ -65,11 +65,12 @@ | |||
65 | /* hpet memory map physical address */ | 65 | /* hpet memory map physical address */ |
66 | extern unsigned long hpet_address; | 66 | extern unsigned long hpet_address; |
67 | extern unsigned long force_hpet_address; | 67 | extern unsigned long force_hpet_address; |
68 | extern u8 hpet_blockid; | ||
68 | extern int hpet_force_user; | 69 | extern int hpet_force_user; |
69 | extern int is_hpet_enabled(void); | 70 | extern int is_hpet_enabled(void); |
70 | extern int hpet_enable(void); | 71 | extern int hpet_enable(void); |
71 | extern void hpet_disable(void); | 72 | extern void hpet_disable(void); |
72 | extern unsigned long hpet_readl(unsigned long a); | 73 | extern unsigned int hpet_readl(unsigned int a); |
73 | extern void force_hpet_resume(void); | 74 | extern void force_hpet_resume(void); |
74 | 75 | ||
75 | extern void hpet_msi_unmask(unsigned int irq); | 76 | extern void hpet_msi_unmask(unsigned int irq); |
@@ -78,9 +79,9 @@ extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg); | |||
78 | extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg); | 79 | extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg); |
79 | 80 | ||
80 | #ifdef CONFIG_PCI_MSI | 81 | #ifdef CONFIG_PCI_MSI |
81 | extern int arch_setup_hpet_msi(unsigned int irq); | 82 | extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id); |
82 | #else | 83 | #else |
83 | static inline int arch_setup_hpet_msi(unsigned int irq) | 84 | static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id) |
84 | { | 85 | { |
85 | return -EINVAL; | 86 | return -EINVAL; |
86 | } | 87 | } |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 6e124269fd4b..08c48a81841f 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -27,7 +27,7 @@ | |||
27 | 27 | ||
28 | /* Interrupt handlers registered during init_IRQ */ | 28 | /* Interrupt handlers registered during init_IRQ */ |
29 | extern void apic_timer_interrupt(void); | 29 | extern void apic_timer_interrupt(void); |
30 | extern void generic_interrupt(void); | 30 | extern void x86_platform_ipi(void); |
31 | extern void error_interrupt(void); | 31 | extern void error_interrupt(void); |
32 | extern void perf_pending_interrupt(void); | 32 | extern void perf_pending_interrupt(void); |
33 | 33 | ||
@@ -119,7 +119,7 @@ extern void eisa_set_level_irq(unsigned int irq); | |||
119 | /* SMP */ | 119 | /* SMP */ |
120 | extern void smp_apic_timer_interrupt(struct pt_regs *); | 120 | extern void smp_apic_timer_interrupt(struct pt_regs *); |
121 | extern void smp_spurious_interrupt(struct pt_regs *); | 121 | extern void smp_spurious_interrupt(struct pt_regs *); |
122 | extern void smp_generic_interrupt(struct pt_regs *); | 122 | extern void smp_x86_platform_ipi(struct pt_regs *); |
123 | extern void smp_error_interrupt(struct pt_regs *); | 123 | extern void smp_error_interrupt(struct pt_regs *); |
124 | #ifdef CONFIG_X86_IO_APIC | 124 | #ifdef CONFIG_X86_IO_APIC |
125 | extern asmlinkage void smp_irq_move_cleanup_interrupt(void); | 125 | extern asmlinkage void smp_irq_move_cleanup_interrupt(void); |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 0b20bbb758f2..ebfb8a9e11f7 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -10,6 +10,8 @@ | |||
10 | #ifndef _ASM_X86_I387_H | 10 | #ifndef _ASM_X86_I387_H |
11 | #define _ASM_X86_I387_H | 11 | #define _ASM_X86_I387_H |
12 | 12 | ||
13 | #ifndef __ASSEMBLY__ | ||
14 | |||
13 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
14 | #include <linux/kernel_stat.h> | 16 | #include <linux/kernel_stat.h> |
15 | #include <linux/regset.h> | 17 | #include <linux/regset.h> |
@@ -411,4 +413,9 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) | |||
411 | } | 413 | } |
412 | } | 414 | } |
413 | 415 | ||
416 | #endif /* __ASSEMBLY__ */ | ||
417 | |||
418 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 | ||
419 | #define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5 | ||
420 | |||
414 | #endif /* _ASM_X86_I387_H */ | 421 | #endif /* _ASM_X86_I387_H */ |
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h new file mode 100644 index 000000000000..14cf526091f9 --- /dev/null +++ b/arch/x86/include/asm/inst.h | |||
@@ -0,0 +1,150 @@ | |||
1 | /* | ||
2 | * Generate .byte code for some instructions not supported by old | ||
3 | * binutils. | ||
4 | */ | ||
5 | #ifndef X86_ASM_INST_H | ||
6 | #define X86_ASM_INST_H | ||
7 | |||
8 | #ifdef __ASSEMBLY__ | ||
9 | |||
10 | .macro XMM_NUM opd xmm | ||
11 | .ifc \xmm,%xmm0 | ||
12 | \opd = 0 | ||
13 | .endif | ||
14 | .ifc \xmm,%xmm1 | ||
15 | \opd = 1 | ||
16 | .endif | ||
17 | .ifc \xmm,%xmm2 | ||
18 | \opd = 2 | ||
19 | .endif | ||
20 | .ifc \xmm,%xmm3 | ||
21 | \opd = 3 | ||
22 | .endif | ||
23 | .ifc \xmm,%xmm4 | ||
24 | \opd = 4 | ||
25 | .endif | ||
26 | .ifc \xmm,%xmm5 | ||
27 | \opd = 5 | ||
28 | .endif | ||
29 | .ifc \xmm,%xmm6 | ||
30 | \opd = 6 | ||
31 | .endif | ||
32 | .ifc \xmm,%xmm7 | ||
33 | \opd = 7 | ||
34 | .endif | ||
35 | .ifc \xmm,%xmm8 | ||
36 | \opd = 8 | ||
37 | .endif | ||
38 | .ifc \xmm,%xmm9 | ||
39 | \opd = 9 | ||
40 | .endif | ||
41 | .ifc \xmm,%xmm10 | ||
42 | \opd = 10 | ||
43 | .endif | ||
44 | .ifc \xmm,%xmm11 | ||
45 | \opd = 11 | ||
46 | .endif | ||
47 | .ifc \xmm,%xmm12 | ||
48 | \opd = 12 | ||
49 | .endif | ||
50 | .ifc \xmm,%xmm13 | ||
51 | \opd = 13 | ||
52 | .endif | ||
53 | .ifc \xmm,%xmm14 | ||
54 | \opd = 14 | ||
55 | .endif | ||
56 | .ifc \xmm,%xmm15 | ||
57 | \opd = 15 | ||
58 | .endif | ||
59 | .endm | ||
60 | |||
61 | .macro PFX_OPD_SIZE | ||
62 | .byte 0x66 | ||
63 | .endm | ||
64 | |||
65 | .macro PFX_REX opd1 opd2 | ||
66 | .if (\opd1 | \opd2) & 8 | ||
67 | .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | ||
68 | .endif | ||
69 | .endm | ||
70 | |||
71 | .macro MODRM mod opd1 opd2 | ||
72 | .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3) | ||
73 | .endm | ||
74 | |||
75 | .macro PSHUFB_XMM xmm1 xmm2 | ||
76 | XMM_NUM pshufb_opd1 \xmm1 | ||
77 | XMM_NUM pshufb_opd2 \xmm2 | ||
78 | PFX_OPD_SIZE | ||
79 | PFX_REX pshufb_opd1 pshufb_opd2 | ||
80 | .byte 0x0f, 0x38, 0x00 | ||
81 | MODRM 0xc0 pshufb_opd1 pshufb_opd2 | ||
82 | .endm | ||
83 | |||
84 | .macro PCLMULQDQ imm8 xmm1 xmm2 | ||
85 | XMM_NUM clmul_opd1 \xmm1 | ||
86 | XMM_NUM clmul_opd2 \xmm2 | ||
87 | PFX_OPD_SIZE | ||
88 | PFX_REX clmul_opd1 clmul_opd2 | ||
89 | .byte 0x0f, 0x3a, 0x44 | ||
90 | MODRM 0xc0 clmul_opd1 clmul_opd2 | ||
91 | .byte \imm8 | ||
92 | .endm | ||
93 | |||
94 | .macro AESKEYGENASSIST rcon xmm1 xmm2 | ||
95 | XMM_NUM aeskeygen_opd1 \xmm1 | ||
96 | XMM_NUM aeskeygen_opd2 \xmm2 | ||
97 | PFX_OPD_SIZE | ||
98 | PFX_REX aeskeygen_opd1 aeskeygen_opd2 | ||
99 | .byte 0x0f, 0x3a, 0xdf | ||
100 | MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2 | ||
101 | .byte \rcon | ||
102 | .endm | ||
103 | |||
104 | .macro AESIMC xmm1 xmm2 | ||
105 | XMM_NUM aesimc_opd1 \xmm1 | ||
106 | XMM_NUM aesimc_opd2 \xmm2 | ||
107 | PFX_OPD_SIZE | ||
108 | PFX_REX aesimc_opd1 aesimc_opd2 | ||
109 | .byte 0x0f, 0x38, 0xdb | ||
110 | MODRM 0xc0 aesimc_opd1 aesimc_opd2 | ||
111 | .endm | ||
112 | |||
113 | .macro AESENC xmm1 xmm2 | ||
114 | XMM_NUM aesenc_opd1 \xmm1 | ||
115 | XMM_NUM aesenc_opd2 \xmm2 | ||
116 | PFX_OPD_SIZE | ||
117 | PFX_REX aesenc_opd1 aesenc_opd2 | ||
118 | .byte 0x0f, 0x38, 0xdc | ||
119 | MODRM 0xc0 aesenc_opd1 aesenc_opd2 | ||
120 | .endm | ||
121 | |||
122 | .macro AESENCLAST xmm1 xmm2 | ||
123 | XMM_NUM aesenclast_opd1 \xmm1 | ||
124 | XMM_NUM aesenclast_opd2 \xmm2 | ||
125 | PFX_OPD_SIZE | ||
126 | PFX_REX aesenclast_opd1 aesenclast_opd2 | ||
127 | .byte 0x0f, 0x38, 0xdd | ||
128 | MODRM 0xc0 aesenclast_opd1 aesenclast_opd2 | ||
129 | .endm | ||
130 | |||
131 | .macro AESDEC xmm1 xmm2 | ||
132 | XMM_NUM aesdec_opd1 \xmm1 | ||
133 | XMM_NUM aesdec_opd2 \xmm2 | ||
134 | PFX_OPD_SIZE | ||
135 | PFX_REX aesdec_opd1 aesdec_opd2 | ||
136 | .byte 0x0f, 0x38, 0xde | ||
137 | MODRM 0xc0 aesdec_opd1 aesdec_opd2 | ||
138 | .endm | ||
139 | |||
140 | .macro AESDECLAST xmm1 xmm2 | ||
141 | XMM_NUM aesdeclast_opd1 \xmm1 | ||
142 | XMM_NUM aesdeclast_opd2 \xmm2 | ||
143 | PFX_OPD_SIZE | ||
144 | PFX_REX aesdeclast_opd1 aesdeclast_opd2 | ||
145 | .byte 0x0f, 0x38, 0xdf | ||
146 | MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2 | ||
147 | .endm | ||
148 | #endif | ||
149 | |||
150 | #endif | ||
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index ffd700ff5dcb..5458380b6ef8 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -37,7 +37,7 @@ extern void fixup_irqs(void); | |||
37 | extern void irq_force_complete_move(int); | 37 | extern void irq_force_complete_move(int); |
38 | #endif | 38 | #endif |
39 | 39 | ||
40 | extern void (*generic_interrupt_extension)(void); | 40 | extern void (*x86_platform_ipi_callback)(void); |
41 | extern void native_init_IRQ(void); | 41 | extern void native_init_IRQ(void); |
42 | extern bool handle_irq(unsigned irq, struct pt_regs *regs); | 42 | extern bool handle_irq(unsigned irq, struct pt_regs *regs); |
43 | 43 | ||
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 5b21f0ec3df2..6a635bd39867 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -106,7 +106,7 @@ | |||
106 | /* | 106 | /* |
107 | * Generic system vector for platform specific use | 107 | * Generic system vector for platform specific use |
108 | */ | 108 | */ |
109 | #define GENERIC_INTERRUPT_VECTOR 0xed | 109 | #define X86_PLATFORM_IPI_VECTOR 0xed |
110 | 110 | ||
111 | /* | 111 | /* |
112 | * Performance monitoring pending work vector: | 112 | * Performance monitoring pending work vector: |
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index c2d1f3b58e5f..f70e60071fe8 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h | |||
@@ -4,13 +4,16 @@ | |||
4 | #include <linux/pci.h> | 4 | #include <linux/pci.h> |
5 | 5 | ||
6 | extern struct pci_device_id k8_nb_ids[]; | 6 | extern struct pci_device_id k8_nb_ids[]; |
7 | struct bootnode; | ||
7 | 8 | ||
8 | extern int early_is_k8_nb(u32 value); | 9 | extern int early_is_k8_nb(u32 value); |
9 | extern struct pci_dev **k8_northbridges; | 10 | extern struct pci_dev **k8_northbridges; |
10 | extern int num_k8_northbridges; | 11 | extern int num_k8_northbridges; |
11 | extern int cache_k8_northbridges(void); | 12 | extern int cache_k8_northbridges(void); |
12 | extern void k8_flush_garts(void); | 13 | extern void k8_flush_garts(void); |
13 | extern int k8_scan_nodes(unsigned long start, unsigned long end); | 14 | extern int k8_get_nodes(struct bootnode *nodes); |
15 | extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); | ||
16 | extern int k8_scan_nodes(void); | ||
14 | 17 | ||
15 | #ifdef CONFIG_K8_NB | 18 | #ifdef CONFIG_K8_NB |
16 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | 19 | static inline struct pci_dev *node_to_k8_nb_misc(int node) |
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 4a5fe914dc59..950df434763f 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
@@ -19,6 +19,8 @@ | |||
19 | #define __KVM_HAVE_MSIX | 19 | #define __KVM_HAVE_MSIX |
20 | #define __KVM_HAVE_MCE | 20 | #define __KVM_HAVE_MCE |
21 | #define __KVM_HAVE_PIT_STATE2 | 21 | #define __KVM_HAVE_PIT_STATE2 |
22 | #define __KVM_HAVE_XEN_HVM | ||
23 | #define __KVM_HAVE_VCPU_EVENTS | ||
22 | 24 | ||
23 | /* Architectural interrupt line count. */ | 25 | /* Architectural interrupt line count. */ |
24 | #define KVM_NR_INTERRUPTS 256 | 26 | #define KVM_NR_INTERRUPTS 256 |
@@ -79,6 +81,7 @@ struct kvm_ioapic_state { | |||
79 | #define KVM_IRQCHIP_PIC_MASTER 0 | 81 | #define KVM_IRQCHIP_PIC_MASTER 0 |
80 | #define KVM_IRQCHIP_PIC_SLAVE 1 | 82 | #define KVM_IRQCHIP_PIC_SLAVE 1 |
81 | #define KVM_IRQCHIP_IOAPIC 2 | 83 | #define KVM_IRQCHIP_IOAPIC 2 |
84 | #define KVM_NR_IRQCHIPS 3 | ||
82 | 85 | ||
83 | /* for KVM_GET_REGS and KVM_SET_REGS */ | 86 | /* for KVM_GET_REGS and KVM_SET_REGS */ |
84 | struct kvm_regs { | 87 | struct kvm_regs { |
@@ -250,4 +253,31 @@ struct kvm_reinject_control { | |||
250 | __u8 pit_reinject; | 253 | __u8 pit_reinject; |
251 | __u8 reserved[31]; | 254 | __u8 reserved[31]; |
252 | }; | 255 | }; |
256 | |||
257 | /* for KVM_GET/SET_VCPU_EVENTS */ | ||
258 | struct kvm_vcpu_events { | ||
259 | struct { | ||
260 | __u8 injected; | ||
261 | __u8 nr; | ||
262 | __u8 has_error_code; | ||
263 | __u8 pad; | ||
264 | __u32 error_code; | ||
265 | } exception; | ||
266 | struct { | ||
267 | __u8 injected; | ||
268 | __u8 nr; | ||
269 | __u8 soft; | ||
270 | __u8 pad; | ||
271 | } interrupt; | ||
272 | struct { | ||
273 | __u8 injected; | ||
274 | __u8 pending; | ||
275 | __u8 masked; | ||
276 | __u8 pad; | ||
277 | } nmi; | ||
278 | __u32 sipi_vector; | ||
279 | __u32 flags; | ||
280 | __u32 reserved[10]; | ||
281 | }; | ||
282 | |||
253 | #endif /* _ASM_X86_KVM_H */ | 283 | #endif /* _ASM_X86_KVM_H */ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b7ed2c423116..7c18e1230f54 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -129,7 +129,7 @@ struct decode_cache { | |||
129 | u8 seg_override; | 129 | u8 seg_override; |
130 | unsigned int d; | 130 | unsigned int d; |
131 | unsigned long regs[NR_VCPU_REGS]; | 131 | unsigned long regs[NR_VCPU_REGS]; |
132 | unsigned long eip; | 132 | unsigned long eip, eip_orig; |
133 | /* modrm */ | 133 | /* modrm */ |
134 | u8 modrm; | 134 | u8 modrm; |
135 | u8 modrm_mod; | 135 | u8 modrm_mod; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d83892226f73..4f865e8b8540 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -354,7 +354,6 @@ struct kvm_vcpu_arch { | |||
354 | unsigned int time_offset; | 354 | unsigned int time_offset; |
355 | struct page *time_page; | 355 | struct page *time_page; |
356 | 356 | ||
357 | bool singlestep; /* guest is single stepped by KVM */ | ||
358 | bool nmi_pending; | 357 | bool nmi_pending; |
359 | bool nmi_injected; | 358 | bool nmi_injected; |
360 | 359 | ||
@@ -371,6 +370,10 @@ struct kvm_vcpu_arch { | |||
371 | u64 mcg_status; | 370 | u64 mcg_status; |
372 | u64 mcg_ctl; | 371 | u64 mcg_ctl; |
373 | u64 *mce_banks; | 372 | u64 *mce_banks; |
373 | |||
374 | /* used for guest single stepping over the given code position */ | ||
375 | u16 singlestep_cs; | ||
376 | unsigned long singlestep_rip; | ||
374 | }; | 377 | }; |
375 | 378 | ||
376 | struct kvm_mem_alias { | 379 | struct kvm_mem_alias { |
@@ -397,7 +400,6 @@ struct kvm_arch{ | |||
397 | struct kvm_pic *vpic; | 400 | struct kvm_pic *vpic; |
398 | struct kvm_ioapic *vioapic; | 401 | struct kvm_ioapic *vioapic; |
399 | struct kvm_pit *vpit; | 402 | struct kvm_pit *vpit; |
400 | struct hlist_head irq_ack_notifier_list; | ||
401 | int vapics_in_nmi_mode; | 403 | int vapics_in_nmi_mode; |
402 | 404 | ||
403 | unsigned int tss_addr; | 405 | unsigned int tss_addr; |
@@ -410,8 +412,10 @@ struct kvm_arch{ | |||
410 | gpa_t ept_identity_map_addr; | 412 | gpa_t ept_identity_map_addr; |
411 | 413 | ||
412 | unsigned long irq_sources_bitmap; | 414 | unsigned long irq_sources_bitmap; |
413 | unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; | ||
414 | u64 vm_init_tsc; | 415 | u64 vm_init_tsc; |
416 | s64 kvmclock_offset; | ||
417 | |||
418 | struct kvm_xen_hvm_config xen_hvm_config; | ||
415 | }; | 419 | }; |
416 | 420 | ||
417 | struct kvm_vm_stat { | 421 | struct kvm_vm_stat { |
@@ -461,7 +465,7 @@ struct descriptor_table { | |||
461 | struct kvm_x86_ops { | 465 | struct kvm_x86_ops { |
462 | int (*cpu_has_kvm_support)(void); /* __init */ | 466 | int (*cpu_has_kvm_support)(void); /* __init */ |
463 | int (*disabled_by_bios)(void); /* __init */ | 467 | int (*disabled_by_bios)(void); /* __init */ |
464 | void (*hardware_enable)(void *dummy); /* __init */ | 468 | int (*hardware_enable)(void *dummy); |
465 | void (*hardware_disable)(void *dummy); | 469 | void (*hardware_disable)(void *dummy); |
466 | void (*check_processor_compatibility)(void *rtn); | 470 | void (*check_processor_compatibility)(void *rtn); |
467 | int (*hardware_setup)(void); /* __init */ | 471 | int (*hardware_setup)(void); /* __init */ |
@@ -477,8 +481,8 @@ struct kvm_x86_ops { | |||
477 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); | 481 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); |
478 | void (*vcpu_put)(struct kvm_vcpu *vcpu); | 482 | void (*vcpu_put)(struct kvm_vcpu *vcpu); |
479 | 483 | ||
480 | int (*set_guest_debug)(struct kvm_vcpu *vcpu, | 484 | void (*set_guest_debug)(struct kvm_vcpu *vcpu, |
481 | struct kvm_guest_debug *dbg); | 485 | struct kvm_guest_debug *dbg); |
482 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); | 486 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); |
483 | int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 487 | int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); |
484 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); | 488 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); |
@@ -506,8 +510,8 @@ struct kvm_x86_ops { | |||
506 | 510 | ||
507 | void (*tlb_flush)(struct kvm_vcpu *vcpu); | 511 | void (*tlb_flush)(struct kvm_vcpu *vcpu); |
508 | 512 | ||
509 | void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); | 513 | void (*run)(struct kvm_vcpu *vcpu); |
510 | int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); | 514 | int (*handle_exit)(struct kvm_vcpu *vcpu); |
511 | void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); | 515 | void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); |
512 | void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); | 516 | void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); |
513 | u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); | 517 | u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); |
@@ -519,6 +523,8 @@ struct kvm_x86_ops { | |||
519 | bool has_error_code, u32 error_code); | 523 | bool has_error_code, u32 error_code); |
520 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); | 524 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); |
521 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); | 525 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); |
526 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); | ||
527 | void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); | ||
522 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); | 528 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
523 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); | 529 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); |
524 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); | 530 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); |
@@ -568,7 +574,7 @@ enum emulation_result { | |||
568 | #define EMULTYPE_NO_DECODE (1 << 0) | 574 | #define EMULTYPE_NO_DECODE (1 << 0) |
569 | #define EMULTYPE_TRAP_UD (1 << 1) | 575 | #define EMULTYPE_TRAP_UD (1 << 1) |
570 | #define EMULTYPE_SKIP (1 << 2) | 576 | #define EMULTYPE_SKIP (1 << 2) |
571 | int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, | 577 | int emulate_instruction(struct kvm_vcpu *vcpu, |
572 | unsigned long cr2, u16 error_code, int emulation_type); | 578 | unsigned long cr2, u16 error_code, int emulation_type); |
573 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); | 579 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); |
574 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 580 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
@@ -585,9 +591,9 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | |||
585 | 591 | ||
586 | struct x86_emulate_ctxt; | 592 | struct x86_emulate_ctxt; |
587 | 593 | ||
588 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 594 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, |
589 | int size, unsigned port); | 595 | int size, unsigned port); |
590 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 596 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, |
591 | int size, unsigned long count, int down, | 597 | int size, unsigned long count, int down, |
592 | gva_t address, int rep, unsigned port); | 598 | gva_t address, int rep, unsigned port); |
593 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | 599 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
@@ -616,6 +622,9 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); | |||
616 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); | 622 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); |
617 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); | 623 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); |
618 | 624 | ||
625 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); | ||
626 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | ||
627 | |||
619 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); | 628 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
620 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | 629 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); |
621 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, | 630 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, |
@@ -802,4 +811,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | |||
802 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); | 811 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
803 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | 812 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); |
804 | 813 | ||
814 | void kvm_define_shared_msr(unsigned index, u32 msr); | ||
815 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | ||
816 | |||
805 | #endif /* _ASM_X86_KVM_HOST_H */ | 817 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index ef51b501e22a..c24ca9a56458 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h | |||
@@ -12,6 +12,8 @@ struct device; | |||
12 | enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; | 12 | enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; |
13 | 13 | ||
14 | struct microcode_ops { | 14 | struct microcode_ops { |
15 | void (*init)(struct device *device); | ||
16 | void (*fini)(void); | ||
15 | enum ucode_state (*request_microcode_user) (int cpu, | 17 | enum ucode_state (*request_microcode_user) (int cpu, |
16 | const void __user *buf, size_t size); | 18 | const void __user *buf, size_t size); |
17 | 19 | ||
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 61d90b1331c3..d8bf23a88d05 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
@@ -71,12 +71,7 @@ static inline void early_get_smp_config(void) | |||
71 | 71 | ||
72 | static inline void find_smp_config(void) | 72 | static inline void find_smp_config(void) |
73 | { | 73 | { |
74 | x86_init.mpparse.find_smp_config(1); | 74 | x86_init.mpparse.find_smp_config(); |
75 | } | ||
76 | |||
77 | static inline void early_find_smp_config(void) | ||
78 | { | ||
79 | x86_init.mpparse.find_smp_config(0); | ||
80 | } | 75 | } |
81 | 76 | ||
82 | #ifdef CONFIG_X86_MPPARSE | 77 | #ifdef CONFIG_X86_MPPARSE |
@@ -89,7 +84,7 @@ extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str); | |||
89 | # else | 84 | # else |
90 | # define default_mpc_oem_bus_info NULL | 85 | # define default_mpc_oem_bus_info NULL |
91 | # endif | 86 | # endif |
92 | extern void default_find_smp_config(unsigned int reserve); | 87 | extern void default_find_smp_config(void); |
93 | extern void default_get_smp_config(unsigned int early); | 88 | extern void default_get_smp_config(unsigned int early); |
94 | #else | 89 | #else |
95 | static inline void early_reserve_e820_mpc_new(void) { } | 90 | static inline void early_reserve_e820_mpc_new(void) { } |
@@ -97,7 +92,7 @@ static inline void early_reserve_e820_mpc_new(void) { } | |||
97 | #define default_mpc_apic_id NULL | 92 | #define default_mpc_apic_id NULL |
98 | #define default_smp_read_mpc_oem NULL | 93 | #define default_smp_read_mpc_oem NULL |
99 | #define default_mpc_oem_bus_info NULL | 94 | #define default_mpc_oem_bus_info NULL |
100 | #define default_find_smp_config x86_init_uint_noop | 95 | #define default_find_smp_config x86_init_noop |
101 | #define default_get_smp_config x86_init_uint_noop | 96 | #define default_get_smp_config x86_init_uint_noop |
102 | #endif | 97 | #endif |
103 | 98 | ||
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 6473f5ccff85..642fe34b36a2 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
@@ -49,7 +49,8 @@ extern unsigned long max_pfn_mapped; | |||
49 | extern unsigned long init_memory_mapping(unsigned long start, | 49 | extern unsigned long init_memory_mapping(unsigned long start, |
50 | unsigned long end); | 50 | unsigned long end); |
51 | 51 | ||
52 | extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); | 52 | extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
53 | int acpi, int k8); | ||
53 | extern void free_initmem(void); | 54 | extern void free_initmem(void); |
54 | 55 | ||
55 | #endif /* !__ASSEMBLY__ */ | 56 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index af6fd360ab35..a34c785c5a63 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -16,6 +16,8 @@ | |||
16 | 16 | ||
17 | #ifndef __ASSEMBLY__ | 17 | #ifndef __ASSEMBLY__ |
18 | 18 | ||
19 | #include <asm/x86_init.h> | ||
20 | |||
19 | /* | 21 | /* |
20 | * ZERO_PAGE is a global shared page that is always zero: used | 22 | * ZERO_PAGE is a global shared page that is always zero: used |
21 | * for zero-mapped memory areas etc.. | 23 | * for zero-mapped memory areas etc.. |
@@ -270,9 +272,9 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, | |||
270 | unsigned long new_flags) | 272 | unsigned long new_flags) |
271 | { | 273 | { |
272 | /* | 274 | /* |
273 | * PAT type is always WB for ISA. So no need to check. | 275 | * PAT type is always WB for untracked ranges, so no need to check. |
274 | */ | 276 | */ |
275 | if (is_ISA_range(paddr, paddr + size - 1)) | 277 | if (x86_platform.is_untracked_pat_range(paddr, paddr + size)) |
276 | return 1; | 278 | return 1; |
277 | 279 | ||
278 | /* | 280 | /* |
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 621f56d73121..4009f6534f52 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h | |||
@@ -5,18 +5,19 @@ | |||
5 | 5 | ||
6 | /* misc architecture specific prototypes */ | 6 | /* misc architecture specific prototypes */ |
7 | 7 | ||
8 | extern void early_idt_handler(void); | 8 | void early_idt_handler(void); |
9 | 9 | ||
10 | extern void system_call(void); | 10 | void system_call(void); |
11 | extern void syscall_init(void); | 11 | void syscall_init(void); |
12 | 12 | ||
13 | extern void ia32_syscall(void); | 13 | void ia32_syscall(void); |
14 | extern void ia32_cstar_target(void); | 14 | void ia32_cstar_target(void); |
15 | extern void ia32_sysenter_target(void); | 15 | void ia32_sysenter_target(void); |
16 | 16 | ||
17 | extern void syscall32_cpu_init(void); | 17 | void syscall32_cpu_init(void); |
18 | 18 | ||
19 | extern void check_efer(void); | 19 | void x86_configure_nx(void); |
20 | void x86_report_nx(void); | ||
20 | 21 | ||
21 | extern int reboot_force; | 22 | extern int reboot_force; |
22 | 23 | ||
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 1b7ee5d673c2..0a5242428659 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h | |||
@@ -2,7 +2,13 @@ | |||
2 | #define _ASM_X86_SECTIONS_H | 2 | #define _ASM_X86_SECTIONS_H |
3 | 3 | ||
4 | #include <asm-generic/sections.h> | 4 | #include <asm-generic/sections.h> |
5 | #include <asm/uaccess.h> | ||
5 | 6 | ||
6 | extern char __brk_base[], __brk_limit[]; | 7 | extern char __brk_base[], __brk_limit[]; |
8 | extern struct exception_table_entry __stop___ex_table[]; | ||
9 | |||
10 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | ||
11 | extern char __end_rodata_hpage_align[]; | ||
12 | #endif | ||
7 | 13 | ||
8 | #endif /* _ASM_X86_SECTIONS_H */ | 14 | #endif /* _ASM_X86_SECTIONS_H */ |
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 85574b7c1bc1..1fecb7e61130 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
@@ -57,7 +57,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
57 | u16 intercept_dr_write; | 57 | u16 intercept_dr_write; |
58 | u32 intercept_exceptions; | 58 | u32 intercept_exceptions; |
59 | u64 intercept; | 59 | u64 intercept; |
60 | u8 reserved_1[44]; | 60 | u8 reserved_1[42]; |
61 | u16 pause_filter_count; | ||
61 | u64 iopm_base_pa; | 62 | u64 iopm_base_pa; |
62 | u64 msrpm_base_pa; | 63 | u64 msrpm_base_pa; |
63 | u64 tsc_offset; | 64 | u64 tsc_offset; |
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 72a6dcd1299b..9af9decb38c3 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h | |||
@@ -51,11 +51,6 @@ asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, | |||
51 | asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t); | 51 | asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t); |
52 | asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *); | 52 | asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *); |
53 | 53 | ||
54 | #ifdef CONFIG_SYSCTL_SYSCALL | ||
55 | struct sysctl_ia32; | ||
56 | asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *); | ||
57 | #endif | ||
58 | |||
59 | asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); | 54 | asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); |
60 | asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); | 55 | asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); |
61 | 56 | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index d27d0a2fec4c..375c917c37d2 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -83,6 +83,7 @@ struct thread_info { | |||
83 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | 83 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ |
84 | #define TIF_SECCOMP 8 /* secure computing */ | 84 | #define TIF_SECCOMP 8 /* secure computing */ |
85 | #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ | 85 | #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ |
86 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ | ||
86 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | 87 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ |
87 | #define TIF_IA32 17 /* 32bit process */ | 88 | #define TIF_IA32 17 /* 32bit process */ |
88 | #define TIF_FORK 18 /* ret_from_fork */ | 89 | #define TIF_FORK 18 /* ret_from_fork */ |
@@ -107,6 +108,7 @@ struct thread_info { | |||
107 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | 108 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) |
108 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) | 109 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) |
109 | #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) | 110 | #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) |
111 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) | ||
110 | #define _TIF_NOTSC (1 << TIF_NOTSC) | 112 | #define _TIF_NOTSC (1 << TIF_NOTSC) |
111 | #define _TIF_IA32 (1 << TIF_IA32) | 113 | #define _TIF_IA32 (1 << TIF_IA32) |
112 | #define _TIF_FORK (1 << TIF_FORK) | 114 | #define _TIF_FORK (1 << TIF_FORK) |
@@ -142,13 +144,14 @@ struct thread_info { | |||
142 | 144 | ||
143 | /* Only used for 64 bit */ | 145 | /* Only used for 64 bit */ |
144 | #define _TIF_DO_NOTIFY_MASK \ | 146 | #define _TIF_DO_NOTIFY_MASK \ |
145 | (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) | 147 | (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ |
148 | _TIF_USER_RETURN_NOTIFY) | ||
146 | 149 | ||
147 | /* flags to check in __switch_to() */ | 150 | /* flags to check in __switch_to() */ |
148 | #define _TIF_WORK_CTXSW \ | 151 | #define _TIF_WORK_CTXSW \ |
149 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) | 152 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) |
150 | 153 | ||
151 | #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW | 154 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) |
152 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) | 155 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) |
153 | 156 | ||
154 | #define PREEMPT_ACTIVE 0x10000000 | 157 | #define PREEMPT_ACTIVE 0x10000000 |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 6fb3c209a7e3..3baf379fa840 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -342,10 +342,11 @@ | |||
342 | #define __NR_pwritev 334 | 342 | #define __NR_pwritev 334 |
343 | #define __NR_rt_tgsigqueueinfo 335 | 343 | #define __NR_rt_tgsigqueueinfo 335 |
344 | #define __NR_perf_event_open 336 | 344 | #define __NR_perf_event_open 336 |
345 | #define __NR_recvmmsg 337 | ||
345 | 346 | ||
346 | #ifdef __KERNEL__ | 347 | #ifdef __KERNEL__ |
347 | 348 | ||
348 | #define NR_syscalls 337 | 349 | #define NR_syscalls 338 |
349 | 350 | ||
350 | #define __ARCH_WANT_IPC_PARSE_VERSION | 351 | #define __ARCH_WANT_IPC_PARSE_VERSION |
351 | #define __ARCH_WANT_OLD_READDIR | 352 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 8d3ad0adbc68..4843f7ba754a 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -661,6 +661,8 @@ __SYSCALL(__NR_pwritev, sys_pwritev) | |||
661 | __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) | 661 | __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) |
662 | #define __NR_perf_event_open 298 | 662 | #define __NR_perf_event_open 298 |
663 | __SYSCALL(__NR_perf_event_open, sys_perf_event_open) | 663 | __SYSCALL(__NR_perf_event_open, sys_perf_event_open) |
664 | #define __NR_recvmmsg 299 | ||
665 | __SYSCALL(__NR_recvmmsg, sys_recvmmsg) | ||
664 | 666 | ||
665 | #ifndef __NO_STUBS | 667 | #ifndef __NO_STUBS |
666 | #define __ARCH_WANT_OLD_READDIR | 668 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 272514c2d456..2b4945419a84 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -56,6 +56,7 @@ | |||
56 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 | 56 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 |
57 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 | 57 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 |
58 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 | 58 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 |
59 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | ||
59 | 60 | ||
60 | 61 | ||
61 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 | 62 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 |
@@ -144,6 +145,8 @@ enum vmcs_field { | |||
144 | VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, | 145 | VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, |
145 | TPR_THRESHOLD = 0x0000401c, | 146 | TPR_THRESHOLD = 0x0000401c, |
146 | SECONDARY_VM_EXEC_CONTROL = 0x0000401e, | 147 | SECONDARY_VM_EXEC_CONTROL = 0x0000401e, |
148 | PLE_GAP = 0x00004020, | ||
149 | PLE_WINDOW = 0x00004022, | ||
147 | VM_INSTRUCTION_ERROR = 0x00004400, | 150 | VM_INSTRUCTION_ERROR = 0x00004400, |
148 | VM_EXIT_REASON = 0x00004402, | 151 | VM_EXIT_REASON = 0x00004402, |
149 | VM_EXIT_INTR_INFO = 0x00004404, | 152 | VM_EXIT_INTR_INFO = 0x00004404, |
@@ -248,6 +251,7 @@ enum vmcs_field { | |||
248 | #define EXIT_REASON_MSR_READ 31 | 251 | #define EXIT_REASON_MSR_READ 31 |
249 | #define EXIT_REASON_MSR_WRITE 32 | 252 | #define EXIT_REASON_MSR_WRITE 32 |
250 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 253 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
254 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 | ||
251 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 | 255 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 |
252 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 | 256 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 |
253 | #define EXIT_REASON_APIC_ACCESS 44 | 257 | #define EXIT_REASON_APIC_ACCESS 44 |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index d8e71459f025..ea0e8ea15e15 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -26,7 +26,7 @@ struct x86_init_mpparse { | |||
26 | void (*smp_read_mpc_oem)(struct mpc_table *mpc); | 26 | void (*smp_read_mpc_oem)(struct mpc_table *mpc); |
27 | void (*mpc_oem_pci_bus)(struct mpc_bus *m); | 27 | void (*mpc_oem_pci_bus)(struct mpc_bus *m); |
28 | void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); | 28 | void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); |
29 | void (*find_smp_config)(unsigned int reserve); | 29 | void (*find_smp_config)(void); |
30 | void (*get_smp_config)(unsigned int early); | 30 | void (*get_smp_config)(unsigned int early); |
31 | }; | 31 | }; |
32 | 32 | ||
@@ -125,12 +125,14 @@ struct x86_cpuinit_ops { | |||
125 | * @calibrate_tsc: calibrate TSC | 125 | * @calibrate_tsc: calibrate TSC |
126 | * @get_wallclock: get time from HW clock like RTC etc. | 126 | * @get_wallclock: get time from HW clock like RTC etc. |
127 | * @set_wallclock: set time back to HW clock | 127 | * @set_wallclock: set time back to HW clock |
128 | * @is_untracked_pat_range exclude from PAT logic | ||
128 | */ | 129 | */ |
129 | struct x86_platform_ops { | 130 | struct x86_platform_ops { |
130 | unsigned long (*calibrate_tsc)(void); | 131 | unsigned long (*calibrate_tsc)(void); |
131 | unsigned long (*get_wallclock)(void); | 132 | unsigned long (*get_wallclock)(void); |
132 | int (*set_wallclock)(unsigned long nowtime); | 133 | int (*set_wallclock)(unsigned long nowtime); |
133 | void (*iommu_shutdown)(void); | 134 | void (*iommu_shutdown)(void); |
135 | bool (*is_untracked_pat_range)(u64 start, u64 end); | ||
134 | }; | 136 | }; |
135 | 137 | ||
136 | extern struct x86_init_ops x86_init; | 138 | extern struct x86_init_ops x86_init; |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 67e929b89875..87eee07da21f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -624,6 +624,7 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) | |||
624 | } | 624 | } |
625 | 625 | ||
626 | hpet_address = hpet_tbl->address.address; | 626 | hpet_address = hpet_tbl->address.address; |
627 | hpet_blockid = hpet_tbl->sequence; | ||
627 | 628 | ||
628 | /* | 629 | /* |
629 | * Some broken BIOSes advertise HPET at 0x0. We really do not | 630 | * Some broken BIOSes advertise HPET at 0x0. We really do not |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index ca93638ba430..82e508677b91 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -78,12 +78,9 @@ int acpi_save_state_mem(void) | |||
78 | #ifndef CONFIG_64BIT | 78 | #ifndef CONFIG_64BIT |
79 | store_gdt((struct desc_ptr *)&header->pmode_gdt); | 79 | store_gdt((struct desc_ptr *)&header->pmode_gdt); |
80 | 80 | ||
81 | header->pmode_efer_low = nx_enabled; | 81 | if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low, |
82 | if (header->pmode_efer_low & 1) { | 82 | &header->pmode_efer_high)) |
83 | /* This is strange, why not save efer, always? */ | 83 | header->pmode_efer_low = header->pmode_efer_high = 0; |
84 | rdmsr(MSR_EFER, header->pmode_efer_low, | ||
85 | header->pmode_efer_high); | ||
86 | } | ||
87 | #endif /* !CONFIG_64BIT */ | 84 | #endif /* !CONFIG_64BIT */ |
88 | 85 | ||
89 | header->pmode_cr0 = read_cr0(); | 86 | header->pmode_cr0 = read_cr0(); |
@@ -119,29 +116,32 @@ void acpi_restore_state_mem(void) | |||
119 | 116 | ||
120 | 117 | ||
121 | /** | 118 | /** |
122 | * acpi_reserve_bootmem - do _very_ early ACPI initialisation | 119 | * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation |
123 | * | 120 | * |
124 | * We allocate a page from the first 1MB of memory for the wakeup | 121 | * We allocate a page from the first 1MB of memory for the wakeup |
125 | * routine for when we come back from a sleep state. The | 122 | * routine for when we come back from a sleep state. The |
126 | * runtime allocator allows specification of <16MB pages, but not | 123 | * runtime allocator allows specification of <16MB pages, but not |
127 | * <1MB pages. | 124 | * <1MB pages. |
128 | */ | 125 | */ |
129 | void __init acpi_reserve_bootmem(void) | 126 | void __init acpi_reserve_wakeup_memory(void) |
130 | { | 127 | { |
128 | unsigned long mem; | ||
129 | |||
131 | if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { | 130 | if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { |
132 | printk(KERN_ERR | 131 | printk(KERN_ERR |
133 | "ACPI: Wakeup code way too big, S3 disabled.\n"); | 132 | "ACPI: Wakeup code way too big, S3 disabled.\n"); |
134 | return; | 133 | return; |
135 | } | 134 | } |
136 | 135 | ||
137 | acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE); | 136 | mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE); |
138 | 137 | ||
139 | if (!acpi_realmode) { | 138 | if (mem == -1L) { |
140 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); | 139 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); |
141 | return; | 140 | return; |
142 | } | 141 | } |
143 | 142 | acpi_realmode = (unsigned long) phys_to_virt(mem); | |
144 | acpi_wakeup_address = virt_to_phys((void *)acpi_realmode); | 143 | acpi_wakeup_address = mem; |
144 | reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); | ||
145 | } | 145 | } |
146 | 146 | ||
147 | 147 | ||
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ad8c75b9e453..efb2b9cd132c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -647,7 +647,7 @@ static int __init calibrate_APIC_clock(void) | |||
647 | calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; | 647 | calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; |
648 | 648 | ||
649 | apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); | 649 | apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); |
650 | apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); | 650 | apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); |
651 | apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", | 651 | apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", |
652 | calibration_result); | 652 | calibration_result); |
653 | 653 | ||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index c0b4468683f9..d5d498fbee4b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -3267,7 +3267,8 @@ void destroy_irq(unsigned int irq) | |||
3267 | * MSI message composition | 3267 | * MSI message composition |
3268 | */ | 3268 | */ |
3269 | #ifdef CONFIG_PCI_MSI | 3269 | #ifdef CONFIG_PCI_MSI |
3270 | static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) | 3270 | static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, |
3271 | struct msi_msg *msg, u8 hpet_id) | ||
3271 | { | 3272 | { |
3272 | struct irq_cfg *cfg; | 3273 | struct irq_cfg *cfg; |
3273 | int err; | 3274 | int err; |
@@ -3301,7 +3302,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
3301 | irte.dest_id = IRTE_DEST(dest); | 3302 | irte.dest_id = IRTE_DEST(dest); |
3302 | 3303 | ||
3303 | /* Set source-id of interrupt request */ | 3304 | /* Set source-id of interrupt request */ |
3304 | set_msi_sid(&irte, pdev); | 3305 | if (pdev) |
3306 | set_msi_sid(&irte, pdev); | ||
3307 | else | ||
3308 | set_hpet_sid(&irte, hpet_id); | ||
3305 | 3309 | ||
3306 | modify_irte(irq, &irte); | 3310 | modify_irte(irq, &irte); |
3307 | 3311 | ||
@@ -3466,7 +3470,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) | |||
3466 | int ret; | 3470 | int ret; |
3467 | struct msi_msg msg; | 3471 | struct msi_msg msg; |
3468 | 3472 | ||
3469 | ret = msi_compose_msg(dev, irq, &msg); | 3473 | ret = msi_compose_msg(dev, irq, &msg, -1); |
3470 | if (ret < 0) | 3474 | if (ret < 0) |
3471 | return ret; | 3475 | return ret; |
3472 | 3476 | ||
@@ -3599,7 +3603,7 @@ int arch_setup_dmar_msi(unsigned int irq) | |||
3599 | int ret; | 3603 | int ret; |
3600 | struct msi_msg msg; | 3604 | struct msi_msg msg; |
3601 | 3605 | ||
3602 | ret = msi_compose_msg(NULL, irq, &msg); | 3606 | ret = msi_compose_msg(NULL, irq, &msg, -1); |
3603 | if (ret < 0) | 3607 | if (ret < 0) |
3604 | return ret; | 3608 | return ret; |
3605 | dmar_msi_write(irq, &msg); | 3609 | dmar_msi_write(irq, &msg); |
@@ -3639,6 +3643,19 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
3639 | 3643 | ||
3640 | #endif /* CONFIG_SMP */ | 3644 | #endif /* CONFIG_SMP */ |
3641 | 3645 | ||
3646 | static struct irq_chip ir_hpet_msi_type = { | ||
3647 | .name = "IR-HPET_MSI", | ||
3648 | .unmask = hpet_msi_unmask, | ||
3649 | .mask = hpet_msi_mask, | ||
3650 | #ifdef CONFIG_INTR_REMAP | ||
3651 | .ack = ir_ack_apic_edge, | ||
3652 | #ifdef CONFIG_SMP | ||
3653 | .set_affinity = ir_set_msi_irq_affinity, | ||
3654 | #endif | ||
3655 | #endif | ||
3656 | .retrigger = ioapic_retrigger_irq, | ||
3657 | }; | ||
3658 | |||
3642 | static struct irq_chip hpet_msi_type = { | 3659 | static struct irq_chip hpet_msi_type = { |
3643 | .name = "HPET_MSI", | 3660 | .name = "HPET_MSI", |
3644 | .unmask = hpet_msi_unmask, | 3661 | .unmask = hpet_msi_unmask, |
@@ -3650,20 +3667,36 @@ static struct irq_chip hpet_msi_type = { | |||
3650 | .retrigger = ioapic_retrigger_irq, | 3667 | .retrigger = ioapic_retrigger_irq, |
3651 | }; | 3668 | }; |
3652 | 3669 | ||
3653 | int arch_setup_hpet_msi(unsigned int irq) | 3670 | int arch_setup_hpet_msi(unsigned int irq, unsigned int id) |
3654 | { | 3671 | { |
3655 | int ret; | 3672 | int ret; |
3656 | struct msi_msg msg; | 3673 | struct msi_msg msg; |
3657 | struct irq_desc *desc = irq_to_desc(irq); | 3674 | struct irq_desc *desc = irq_to_desc(irq); |
3658 | 3675 | ||
3659 | ret = msi_compose_msg(NULL, irq, &msg); | 3676 | if (intr_remapping_enabled) { |
3677 | struct intel_iommu *iommu = map_hpet_to_ir(id); | ||
3678 | int index; | ||
3679 | |||
3680 | if (!iommu) | ||
3681 | return -1; | ||
3682 | |||
3683 | index = alloc_irte(iommu, irq, 1); | ||
3684 | if (index < 0) | ||
3685 | return -1; | ||
3686 | } | ||
3687 | |||
3688 | ret = msi_compose_msg(NULL, irq, &msg, id); | ||
3660 | if (ret < 0) | 3689 | if (ret < 0) |
3661 | return ret; | 3690 | return ret; |
3662 | 3691 | ||
3663 | hpet_msi_write(irq, &msg); | 3692 | hpet_msi_write(irq, &msg); |
3664 | desc->status |= IRQ_MOVE_PCNTXT; | 3693 | desc->status |= IRQ_MOVE_PCNTXT; |
3665 | set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq, | 3694 | if (irq_remapped(irq)) |
3666 | "edge"); | 3695 | set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, |
3696 | handle_edge_irq, "edge"); | ||
3697 | else | ||
3698 | set_irq_chip_and_handler_name(irq, &hpet_msi_type, | ||
3699 | handle_edge_irq, "edge"); | ||
3667 | 3700 | ||
3668 | return 0; | 3701 | return 0; |
3669 | } | 3702 | } |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 07cdbdcd7a92..98c4665f251c 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -264,11 +264,6 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc) | |||
264 | static __init void early_check_numaq(void) | 264 | static __init void early_check_numaq(void) |
265 | { | 265 | { |
266 | /* | 266 | /* |
267 | * Find possible boot-time SMP configuration: | ||
268 | */ | ||
269 | early_find_smp_config(); | ||
270 | |||
271 | /* | ||
272 | * get boot-time SMP configuration: | 267 | * get boot-time SMP configuration: |
273 | */ | 268 | */ |
274 | if (smp_found_config) | 269 | if (smp_found_config) |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 130c4b934877..b684bb303cbf 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -30,10 +30,22 @@ | |||
30 | #include <asm/apic.h> | 30 | #include <asm/apic.h> |
31 | #include <asm/ipi.h> | 31 | #include <asm/ipi.h> |
32 | #include <asm/smp.h> | 32 | #include <asm/smp.h> |
33 | #include <asm/x86_init.h> | ||
33 | 34 | ||
34 | DEFINE_PER_CPU(int, x2apic_extra_bits); | 35 | DEFINE_PER_CPU(int, x2apic_extra_bits); |
35 | 36 | ||
36 | static enum uv_system_type uv_system_type; | 37 | static enum uv_system_type uv_system_type; |
38 | static u64 gru_start_paddr, gru_end_paddr; | ||
39 | |||
40 | static inline bool is_GRU_range(u64 start, u64 end) | ||
41 | { | ||
42 | return start >= gru_start_paddr && end <= gru_end_paddr; | ||
43 | } | ||
44 | |||
45 | static bool uv_is_untracked_pat_range(u64 start, u64 end) | ||
46 | { | ||
47 | return is_ISA_range(start, end) || is_GRU_range(start, end); | ||
48 | } | ||
37 | 49 | ||
38 | static int early_get_nodeid(void) | 50 | static int early_get_nodeid(void) |
39 | { | 51 | { |
@@ -49,6 +61,7 @@ static int early_get_nodeid(void) | |||
49 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 61 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
50 | { | 62 | { |
51 | if (!strcmp(oem_id, "SGI")) { | 63 | if (!strcmp(oem_id, "SGI")) { |
64 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; | ||
52 | if (!strcmp(oem_table_id, "UVL")) | 65 | if (!strcmp(oem_table_id, "UVL")) |
53 | uv_system_type = UV_LEGACY_APIC; | 66 | uv_system_type = UV_LEGACY_APIC; |
54 | else if (!strcmp(oem_table_id, "UVX")) | 67 | else if (!strcmp(oem_table_id, "UVX")) |
@@ -385,8 +398,12 @@ static __init void map_gru_high(int max_pnode) | |||
385 | int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; | 398 | int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; |
386 | 399 | ||
387 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); | 400 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); |
388 | if (gru.s.enable) | 401 | if (gru.s.enable) { |
389 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); | 402 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); |
403 | gru_start_paddr = ((u64)gru.s.base << shift); | ||
404 | gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); | ||
405 | |||
406 | } | ||
390 | } | 407 | } |
391 | 408 | ||
392 | static __init void map_mmr_high(int max_pnode) | 409 | static __init void map_mmr_high(int max_pnode) |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a4ec8b647544..c1afa990a6c8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1136,7 +1136,7 @@ void __cpuinit cpu_init(void) | |||
1136 | wrmsrl(MSR_KERNEL_GS_BASE, 0); | 1136 | wrmsrl(MSR_KERNEL_GS_BASE, 0); |
1137 | barrier(); | 1137 | barrier(); |
1138 | 1138 | ||
1139 | check_efer(); | 1139 | x86_configure_nx(); |
1140 | if (cpu != 0) | 1140 | if (cpu != 0) |
1141 | enable_x2apic(); | 1141 | enable_x2apic(); |
1142 | 1142 | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 40e1835b35e8..c900b73f9224 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -263,8 +263,12 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | |||
263 | /* Don't do the funky fallback heuristics the AMD version employs | 263 | /* Don't do the funky fallback heuristics the AMD version employs |
264 | for now. */ | 264 | for now. */ |
265 | node = apicid_to_node[apicid]; | 265 | node = apicid_to_node[apicid]; |
266 | if (node == NUMA_NO_NODE || !node_online(node)) | 266 | if (node == NUMA_NO_NODE) |
267 | node = first_node(node_online_map); | 267 | node = first_node(node_online_map); |
268 | else if (!node_online(node)) { | ||
269 | /* reuse the value from init_cpu_to_node() */ | ||
270 | node = cpu_to_node(cpu); | ||
271 | } | ||
268 | numa_set_node(cpu, node); | 272 | numa_set_node(cpu, node); |
269 | 273 | ||
270 | printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); | 274 | printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); |
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 73c86db5acbe..09b1698e0466 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -170,6 +170,41 @@ static int __init cmp_range(const void *x1, const void *x2) | |||
170 | return start1 - start2; | 170 | return start1 - start2; |
171 | } | 171 | } |
172 | 172 | ||
173 | static int __init clean_sort_range(struct res_range *range, int az) | ||
174 | { | ||
175 | int i, j, k = az - 1, nr_range = 0; | ||
176 | |||
177 | for (i = 0; i < k; i++) { | ||
178 | if (range[i].end) | ||
179 | continue; | ||
180 | for (j = k; j > i; j--) { | ||
181 | if (range[j].end) { | ||
182 | k = j; | ||
183 | break; | ||
184 | } | ||
185 | } | ||
186 | if (j == i) | ||
187 | break; | ||
188 | range[i].start = range[k].start; | ||
189 | range[i].end = range[k].end; | ||
190 | range[k].start = 0; | ||
191 | range[k].end = 0; | ||
192 | k--; | ||
193 | } | ||
194 | /* count it */ | ||
195 | for (i = 0; i < az; i++) { | ||
196 | if (!range[i].end) { | ||
197 | nr_range = i; | ||
198 | break; | ||
199 | } | ||
200 | } | ||
201 | |||
202 | /* sort them */ | ||
203 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | ||
204 | |||
205 | return nr_range; | ||
206 | } | ||
207 | |||
173 | #define BIOS_BUG_MSG KERN_WARNING \ | 208 | #define BIOS_BUG_MSG KERN_WARNING \ |
174 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" | 209 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" |
175 | 210 | ||
@@ -223,22 +258,18 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
223 | subtract_range(range, extra_remove_base, | 258 | subtract_range(range, extra_remove_base, |
224 | extra_remove_base + extra_remove_size - 1); | 259 | extra_remove_base + extra_remove_size - 1); |
225 | 260 | ||
226 | /* get new range num */ | ||
227 | nr_range = 0; | ||
228 | for (i = 0; i < RANGE_NUM; i++) { | ||
229 | if (!range[i].end) | ||
230 | continue; | ||
231 | nr_range++; | ||
232 | } | ||
233 | if (debug_print) { | 261 | if (debug_print) { |
234 | printk(KERN_DEBUG "After UC checking\n"); | 262 | printk(KERN_DEBUG "After UC checking\n"); |
235 | for (i = 0; i < nr_range; i++) | 263 | for (i = 0; i < RANGE_NUM; i++) { |
264 | if (!range[i].end) | ||
265 | continue; | ||
236 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 266 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", |
237 | range[i].start, range[i].end + 1); | 267 | range[i].start, range[i].end + 1); |
268 | } | ||
238 | } | 269 | } |
239 | 270 | ||
240 | /* sort the ranges */ | 271 | /* sort the ranges */ |
241 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | 272 | nr_range = clean_sort_range(range, RANGE_NUM); |
242 | if (debug_print) { | 273 | if (debug_print) { |
243 | printk(KERN_DEBUG "After sorting\n"); | 274 | printk(KERN_DEBUG "After sorting\n"); |
244 | for (i = 0; i < nr_range; i++) | 275 | for (i = 0; i < nr_range; i++) |
@@ -689,8 +720,6 @@ static int __init mtrr_need_cleanup(void) | |||
689 | continue; | 720 | continue; |
690 | if (!size) | 721 | if (!size) |
691 | type = MTRR_NUM_TYPES; | 722 | type = MTRR_NUM_TYPES; |
692 | if (type == MTRR_TYPE_WRPROT) | ||
693 | type = MTRR_TYPE_UNCACHABLE; | ||
694 | num[type]++; | 723 | num[type]++; |
695 | } | 724 | } |
696 | 725 | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 4deb8fc849dd..63bca794c8f9 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -977,8 +977,8 @@ apicinterrupt UV_BAU_MESSAGE \ | |||
977 | #endif | 977 | #endif |
978 | apicinterrupt LOCAL_TIMER_VECTOR \ | 978 | apicinterrupt LOCAL_TIMER_VECTOR \ |
979 | apic_timer_interrupt smp_apic_timer_interrupt | 979 | apic_timer_interrupt smp_apic_timer_interrupt |
980 | apicinterrupt GENERIC_INTERRUPT_VECTOR \ | 980 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ |
981 | generic_interrupt smp_generic_interrupt | 981 | x86_platform_ipi smp_x86_platform_ipi |
982 | 982 | ||
983 | #ifdef CONFIG_SMP | 983 | #ifdef CONFIG_SMP |
984 | apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ | 984 | apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 5a1b9758fd62..309689245431 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -189,9 +189,26 @@ static void wait_for_nmi(void) | |||
189 | nmi_wait_count++; | 189 | nmi_wait_count++; |
190 | } | 190 | } |
191 | 191 | ||
192 | static inline int | ||
193 | within(unsigned long addr, unsigned long start, unsigned long end) | ||
194 | { | ||
195 | return addr >= start && addr < end; | ||
196 | } | ||
197 | |||
192 | static int | 198 | static int |
193 | do_ftrace_mod_code(unsigned long ip, void *new_code) | 199 | do_ftrace_mod_code(unsigned long ip, void *new_code) |
194 | { | 200 | { |
201 | /* | ||
202 | * On x86_64, kernel text mappings are mapped read-only with | ||
203 | * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead | ||
204 | * of the kernel text mapping to modify the kernel text. | ||
205 | * | ||
206 | * For 32bit kernels, these mappings are same and we can use | ||
207 | * kernel identity mapping to modify code. | ||
208 | */ | ||
209 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) | ||
210 | ip = (unsigned long)__va(__pa(ip)); | ||
211 | |||
195 | mod_code_ip = (void *)ip; | 212 | mod_code_ip = (void *)ip; |
196 | mod_code_newcode = new_code; | 213 | mod_code_newcode = new_code; |
197 | 214 | ||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 050c278481b1..7fd318bac59c 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <asm/asm-offsets.h> | 18 | #include <asm/asm-offsets.h> |
19 | #include <asm/setup.h> | 19 | #include <asm/setup.h> |
20 | #include <asm/processor-flags.h> | 20 | #include <asm/processor-flags.h> |
21 | #include <asm/msr-index.h> | ||
22 | #include <asm/cpufeature.h> | ||
21 | #include <asm/percpu.h> | 23 | #include <asm/percpu.h> |
22 | 24 | ||
23 | /* Physical address */ | 25 | /* Physical address */ |
@@ -297,25 +299,27 @@ ENTRY(startup_32_smp) | |||
297 | orl %edx,%eax | 299 | orl %edx,%eax |
298 | movl %eax,%cr4 | 300 | movl %eax,%cr4 |
299 | 301 | ||
300 | btl $5, %eax # check if PAE is enabled | 302 | testb $X86_CR4_PAE, %al # check if PAE is enabled |
301 | jnc 6f | 303 | jz 6f |
302 | 304 | ||
303 | /* Check if extended functions are implemented */ | 305 | /* Check if extended functions are implemented */ |
304 | movl $0x80000000, %eax | 306 | movl $0x80000000, %eax |
305 | cpuid | 307 | cpuid |
306 | cmpl $0x80000000, %eax | 308 | /* Value must be in the range 0x80000001 to 0x8000ffff */ |
307 | jbe 6f | 309 | subl $0x80000001, %eax |
310 | cmpl $(0x8000ffff-0x80000001), %eax | ||
311 | ja 6f | ||
308 | mov $0x80000001, %eax | 312 | mov $0x80000001, %eax |
309 | cpuid | 313 | cpuid |
310 | /* Execute Disable bit supported? */ | 314 | /* Execute Disable bit supported? */ |
311 | btl $20, %edx | 315 | btl $(X86_FEATURE_NX & 31), %edx |
312 | jnc 6f | 316 | jnc 6f |
313 | 317 | ||
314 | /* Setup EFER (Extended Feature Enable Register) */ | 318 | /* Setup EFER (Extended Feature Enable Register) */ |
315 | movl $0xc0000080, %ecx | 319 | movl $MSR_EFER, %ecx |
316 | rdmsr | 320 | rdmsr |
317 | 321 | ||
318 | btsl $11, %eax | 322 | btsl $_EFER_NX, %eax |
319 | /* Make changes effective */ | 323 | /* Make changes effective */ |
320 | wrmsr | 324 | wrmsr |
321 | 325 | ||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 22db86a37643..2d8b5035371c 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -262,11 +262,11 @@ ENTRY(secondary_startup_64) | |||
262 | .quad x86_64_start_kernel | 262 | .quad x86_64_start_kernel |
263 | ENTRY(initial_gs) | 263 | ENTRY(initial_gs) |
264 | .quad INIT_PER_CPU_VAR(irq_stack_union) | 264 | .quad INIT_PER_CPU_VAR(irq_stack_union) |
265 | __FINITDATA | ||
266 | 265 | ||
267 | ENTRY(stack_start) | 266 | ENTRY(stack_start) |
268 | .quad init_thread_union+THREAD_SIZE-8 | 267 | .quad init_thread_union+THREAD_SIZE-8 |
269 | .word 0 | 268 | .word 0 |
269 | __FINITDATA | ||
270 | 270 | ||
271 | bad_address: | 271 | bad_address: |
272 | jmp bad_address | 272 | jmp bad_address |
@@ -340,6 +340,7 @@ ENTRY(name) | |||
340 | i = i + 1 ; \ | 340 | i = i + 1 ; \ |
341 | .endr | 341 | .endr |
342 | 342 | ||
343 | .data | ||
343 | /* | 344 | /* |
344 | * This default setting generates an ident mapping at address 0x100000 | 345 | * This default setting generates an ident mapping at address 0x100000 |
345 | * and a mapping for the kernel that precisely maps virtual address | 346 | * and a mapping for the kernel that precisely maps virtual address |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index dedc2bddf7a5..ba6e65884603 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -33,6 +33,7 @@ | |||
33 | * HPET address is set in acpi/boot.c, when an ACPI entry exists | 33 | * HPET address is set in acpi/boot.c, when an ACPI entry exists |
34 | */ | 34 | */ |
35 | unsigned long hpet_address; | 35 | unsigned long hpet_address; |
36 | u8 hpet_blockid; /* OS timer block num */ | ||
36 | #ifdef CONFIG_PCI_MSI | 37 | #ifdef CONFIG_PCI_MSI |
37 | static unsigned long hpet_num_timers; | 38 | static unsigned long hpet_num_timers; |
38 | #endif | 39 | #endif |
@@ -47,12 +48,12 @@ struct hpet_dev { | |||
47 | char name[10]; | 48 | char name[10]; |
48 | }; | 49 | }; |
49 | 50 | ||
50 | unsigned long hpet_readl(unsigned long a) | 51 | inline unsigned int hpet_readl(unsigned int a) |
51 | { | 52 | { |
52 | return readl(hpet_virt_address + a); | 53 | return readl(hpet_virt_address + a); |
53 | } | 54 | } |
54 | 55 | ||
55 | static inline void hpet_writel(unsigned long d, unsigned long a) | 56 | static inline void hpet_writel(unsigned int d, unsigned int a) |
56 | { | 57 | { |
57 | writel(d, hpet_virt_address + a); | 58 | writel(d, hpet_virt_address + a); |
58 | } | 59 | } |
@@ -167,7 +168,7 @@ do { \ | |||
167 | 168 | ||
168 | static void hpet_reserve_msi_timers(struct hpet_data *hd); | 169 | static void hpet_reserve_msi_timers(struct hpet_data *hd); |
169 | 170 | ||
170 | static void hpet_reserve_platform_timers(unsigned long id) | 171 | static void hpet_reserve_platform_timers(unsigned int id) |
171 | { | 172 | { |
172 | struct hpet __iomem *hpet = hpet_virt_address; | 173 | struct hpet __iomem *hpet = hpet_virt_address; |
173 | struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; | 174 | struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; |
@@ -205,7 +206,7 @@ static void hpet_reserve_platform_timers(unsigned long id) | |||
205 | 206 | ||
206 | } | 207 | } |
207 | #else | 208 | #else |
208 | static void hpet_reserve_platform_timers(unsigned long id) { } | 209 | static void hpet_reserve_platform_timers(unsigned int id) { } |
209 | #endif | 210 | #endif |
210 | 211 | ||
211 | /* | 212 | /* |
@@ -246,7 +247,7 @@ static void hpet_reset_counter(void) | |||
246 | 247 | ||
247 | static void hpet_start_counter(void) | 248 | static void hpet_start_counter(void) |
248 | { | 249 | { |
249 | unsigned long cfg = hpet_readl(HPET_CFG); | 250 | unsigned int cfg = hpet_readl(HPET_CFG); |
250 | cfg |= HPET_CFG_ENABLE; | 251 | cfg |= HPET_CFG_ENABLE; |
251 | hpet_writel(cfg, HPET_CFG); | 252 | hpet_writel(cfg, HPET_CFG); |
252 | } | 253 | } |
@@ -271,7 +272,7 @@ static void hpet_resume_counter(void) | |||
271 | 272 | ||
272 | static void hpet_enable_legacy_int(void) | 273 | static void hpet_enable_legacy_int(void) |
273 | { | 274 | { |
274 | unsigned long cfg = hpet_readl(HPET_CFG); | 275 | unsigned int cfg = hpet_readl(HPET_CFG); |
275 | 276 | ||
276 | cfg |= HPET_CFG_LEGACY; | 277 | cfg |= HPET_CFG_LEGACY; |
277 | hpet_writel(cfg, HPET_CFG); | 278 | hpet_writel(cfg, HPET_CFG); |
@@ -314,7 +315,7 @@ static int hpet_setup_msi_irq(unsigned int irq); | |||
314 | static void hpet_set_mode(enum clock_event_mode mode, | 315 | static void hpet_set_mode(enum clock_event_mode mode, |
315 | struct clock_event_device *evt, int timer) | 316 | struct clock_event_device *evt, int timer) |
316 | { | 317 | { |
317 | unsigned long cfg, cmp, now; | 318 | unsigned int cfg, cmp, now; |
318 | uint64_t delta; | 319 | uint64_t delta; |
319 | 320 | ||
320 | switch (mode) { | 321 | switch (mode) { |
@@ -323,7 +324,7 @@ static void hpet_set_mode(enum clock_event_mode mode, | |||
323 | delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; | 324 | delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; |
324 | delta >>= evt->shift; | 325 | delta >>= evt->shift; |
325 | now = hpet_readl(HPET_COUNTER); | 326 | now = hpet_readl(HPET_COUNTER); |
326 | cmp = now + (unsigned long) delta; | 327 | cmp = now + (unsigned int) delta; |
327 | cfg = hpet_readl(HPET_Tn_CFG(timer)); | 328 | cfg = hpet_readl(HPET_Tn_CFG(timer)); |
328 | /* Make sure we use edge triggered interrupts */ | 329 | /* Make sure we use edge triggered interrupts */ |
329 | cfg &= ~HPET_TN_LEVEL; | 330 | cfg &= ~HPET_TN_LEVEL; |
@@ -339,7 +340,7 @@ static void hpet_set_mode(enum clock_event_mode mode, | |||
339 | * (See AMD-8111 HyperTransport I/O Hub Data Sheet, | 340 | * (See AMD-8111 HyperTransport I/O Hub Data Sheet, |
340 | * Publication # 24674) | 341 | * Publication # 24674) |
341 | */ | 342 | */ |
342 | hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); | 343 | hpet_writel((unsigned int) delta, HPET_Tn_CMP(timer)); |
343 | hpet_start_counter(); | 344 | hpet_start_counter(); |
344 | hpet_print_config(); | 345 | hpet_print_config(); |
345 | break; | 346 | break; |
@@ -383,13 +384,24 @@ static int hpet_next_event(unsigned long delta, | |||
383 | hpet_writel(cnt, HPET_Tn_CMP(timer)); | 384 | hpet_writel(cnt, HPET_Tn_CMP(timer)); |
384 | 385 | ||
385 | /* | 386 | /* |
386 | * We need to read back the CMP register to make sure that | 387 | * We need to read back the CMP register on certain HPET |
387 | * what we wrote hit the chip before we compare it to the | 388 | * implementations (ATI chipsets) which seem to delay the |
388 | * counter. | 389 | * transfer of the compare register into the internal compare |
390 | * logic. With small deltas this might actually be too late as | ||
391 | * the counter could already be higher than the compare value | ||
392 | * at that point and we would wait for the next hpet interrupt | ||
393 | * forever. We found out that reading the CMP register back | ||
394 | * forces the transfer so we can rely on the comparison with | ||
395 | * the counter register below. If the read back from the | ||
396 | * compare register does not match the value we programmed | ||
397 | * then we might have a real hardware problem. We can not do | ||
398 | * much about it here, but at least alert the user/admin with | ||
399 | * a prominent warning. | ||
389 | */ | 400 | */ |
390 | WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt); | 401 | WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt, |
402 | KERN_WARNING "hpet: compare register read back failed.\n"); | ||
391 | 403 | ||
392 | return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; | 404 | return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; |
393 | } | 405 | } |
394 | 406 | ||
395 | static void hpet_legacy_set_mode(enum clock_event_mode mode, | 407 | static void hpet_legacy_set_mode(enum clock_event_mode mode, |
@@ -415,7 +427,7 @@ static struct hpet_dev *hpet_devs; | |||
415 | void hpet_msi_unmask(unsigned int irq) | 427 | void hpet_msi_unmask(unsigned int irq) |
416 | { | 428 | { |
417 | struct hpet_dev *hdev = get_irq_data(irq); | 429 | struct hpet_dev *hdev = get_irq_data(irq); |
418 | unsigned long cfg; | 430 | unsigned int cfg; |
419 | 431 | ||
420 | /* unmask it */ | 432 | /* unmask it */ |
421 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); | 433 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); |
@@ -425,7 +437,7 @@ void hpet_msi_unmask(unsigned int irq) | |||
425 | 437 | ||
426 | void hpet_msi_mask(unsigned int irq) | 438 | void hpet_msi_mask(unsigned int irq) |
427 | { | 439 | { |
428 | unsigned long cfg; | 440 | unsigned int cfg; |
429 | struct hpet_dev *hdev = get_irq_data(irq); | 441 | struct hpet_dev *hdev = get_irq_data(irq); |
430 | 442 | ||
431 | /* mask it */ | 443 | /* mask it */ |
@@ -467,7 +479,7 @@ static int hpet_msi_next_event(unsigned long delta, | |||
467 | 479 | ||
468 | static int hpet_setup_msi_irq(unsigned int irq) | 480 | static int hpet_setup_msi_irq(unsigned int irq) |
469 | { | 481 | { |
470 | if (arch_setup_hpet_msi(irq)) { | 482 | if (arch_setup_hpet_msi(irq, hpet_blockid)) { |
471 | destroy_irq(irq); | 483 | destroy_irq(irq); |
472 | return -EINVAL; | 484 | return -EINVAL; |
473 | } | 485 | } |
@@ -584,6 +596,8 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) | |||
584 | unsigned int num_timers_used = 0; | 596 | unsigned int num_timers_used = 0; |
585 | int i; | 597 | int i; |
586 | 598 | ||
599 | if (boot_cpu_has(X86_FEATURE_ARAT)) | ||
600 | return; | ||
587 | id = hpet_readl(HPET_ID); | 601 | id = hpet_readl(HPET_ID); |
588 | 602 | ||
589 | num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); | 603 | num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); |
@@ -598,7 +612,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) | |||
598 | 612 | ||
599 | for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) { | 613 | for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) { |
600 | struct hpet_dev *hdev = &hpet_devs[num_timers_used]; | 614 | struct hpet_dev *hdev = &hpet_devs[num_timers_used]; |
601 | unsigned long cfg = hpet_readl(HPET_Tn_CFG(i)); | 615 | unsigned int cfg = hpet_readl(HPET_Tn_CFG(i)); |
602 | 616 | ||
603 | /* Only consider HPET timer with MSI support */ | 617 | /* Only consider HPET timer with MSI support */ |
604 | if (!(cfg & HPET_TN_FSB_CAP)) | 618 | if (!(cfg & HPET_TN_FSB_CAP)) |
@@ -813,7 +827,7 @@ static int hpet_clocksource_register(void) | |||
813 | */ | 827 | */ |
814 | int __init hpet_enable(void) | 828 | int __init hpet_enable(void) |
815 | { | 829 | { |
816 | unsigned long id; | 830 | unsigned int id; |
817 | int i; | 831 | int i; |
818 | 832 | ||
819 | if (!is_hpet_capable()) | 833 | if (!is_hpet_capable()) |
@@ -872,10 +886,8 @@ int __init hpet_enable(void) | |||
872 | 886 | ||
873 | if (id & HPET_ID_LEGSUP) { | 887 | if (id & HPET_ID_LEGSUP) { |
874 | hpet_legacy_clockevent_register(); | 888 | hpet_legacy_clockevent_register(); |
875 | hpet_msi_capability_lookup(2); | ||
876 | return 1; | 889 | return 1; |
877 | } | 890 | } |
878 | hpet_msi_capability_lookup(0); | ||
879 | return 0; | 891 | return 0; |
880 | 892 | ||
881 | out_nohpet: | 893 | out_nohpet: |
@@ -908,9 +920,17 @@ static __init int hpet_late_init(void) | |||
908 | if (!hpet_virt_address) | 920 | if (!hpet_virt_address) |
909 | return -ENODEV; | 921 | return -ENODEV; |
910 | 922 | ||
923 | if (hpet_readl(HPET_ID) & HPET_ID_LEGSUP) | ||
924 | hpet_msi_capability_lookup(2); | ||
925 | else | ||
926 | hpet_msi_capability_lookup(0); | ||
927 | |||
911 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); | 928 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); |
912 | hpet_print_config(); | 929 | hpet_print_config(); |
913 | 930 | ||
931 | if (boot_cpu_has(X86_FEATURE_ARAT)) | ||
932 | return 0; | ||
933 | |||
914 | for_each_online_cpu(cpu) { | 934 | for_each_online_cpu(cpu) { |
915 | hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); | 935 | hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); |
916 | } | 936 | } |
@@ -925,7 +945,7 @@ fs_initcall(hpet_late_init); | |||
925 | void hpet_disable(void) | 945 | void hpet_disable(void) |
926 | { | 946 | { |
927 | if (is_hpet_capable()) { | 947 | if (is_hpet_capable()) { |
928 | unsigned long cfg = hpet_readl(HPET_CFG); | 948 | unsigned int cfg = hpet_readl(HPET_CFG); |
929 | 949 | ||
930 | if (hpet_legacy_int_enabled) { | 950 | if (hpet_legacy_int_enabled) { |
931 | cfg &= ~HPET_CFG_LEGACY; | 951 | cfg &= ~HPET_CFG_LEGACY; |
@@ -965,8 +985,8 @@ static int hpet_prev_update_sec; | |||
965 | static struct rtc_time hpet_alarm_time; | 985 | static struct rtc_time hpet_alarm_time; |
966 | static unsigned long hpet_pie_count; | 986 | static unsigned long hpet_pie_count; |
967 | static u32 hpet_t1_cmp; | 987 | static u32 hpet_t1_cmp; |
968 | static unsigned long hpet_default_delta; | 988 | static u32 hpet_default_delta; |
969 | static unsigned long hpet_pie_delta; | 989 | static u32 hpet_pie_delta; |
970 | static unsigned long hpet_pie_limit; | 990 | static unsigned long hpet_pie_limit; |
971 | 991 | ||
972 | static rtc_irq_handler irq_handler; | 992 | static rtc_irq_handler irq_handler; |
@@ -1017,7 +1037,8 @@ EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler); | |||
1017 | */ | 1037 | */ |
1018 | int hpet_rtc_timer_init(void) | 1038 | int hpet_rtc_timer_init(void) |
1019 | { | 1039 | { |
1020 | unsigned long cfg, cnt, delta, flags; | 1040 | unsigned int cfg, cnt, delta; |
1041 | unsigned long flags; | ||
1021 | 1042 | ||
1022 | if (!is_hpet_enabled()) | 1043 | if (!is_hpet_enabled()) |
1023 | return 0; | 1044 | return 0; |
@@ -1027,7 +1048,7 @@ int hpet_rtc_timer_init(void) | |||
1027 | 1048 | ||
1028 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; | 1049 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; |
1029 | clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT; | 1050 | clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT; |
1030 | hpet_default_delta = (unsigned long) clc; | 1051 | hpet_default_delta = clc; |
1031 | } | 1052 | } |
1032 | 1053 | ||
1033 | if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) | 1054 | if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) |
@@ -1113,7 +1134,7 @@ int hpet_set_periodic_freq(unsigned long freq) | |||
1113 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; | 1134 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; |
1114 | do_div(clc, freq); | 1135 | do_div(clc, freq); |
1115 | clc >>= hpet_clockevent.shift; | 1136 | clc >>= hpet_clockevent.shift; |
1116 | hpet_pie_delta = (unsigned long) clc; | 1137 | hpet_pie_delta = clc; |
1117 | } | 1138 | } |
1118 | return 1; | 1139 | return 1; |
1119 | } | 1140 | } |
@@ -1127,7 +1148,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq); | |||
1127 | 1148 | ||
1128 | static void hpet_rtc_timer_reinit(void) | 1149 | static void hpet_rtc_timer_reinit(void) |
1129 | { | 1150 | { |
1130 | unsigned long cfg, delta; | 1151 | unsigned int cfg, delta; |
1131 | int lost_ints = -1; | 1152 | int lost_ints = -1; |
1132 | 1153 | ||
1133 | if (unlikely(!hpet_rtc_flags)) { | 1154 | if (unlikely(!hpet_rtc_flags)) { |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index fee6cc2b2079..664bcb7384ac 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -18,7 +18,7 @@ | |||
18 | atomic_t irq_err_count; | 18 | atomic_t irq_err_count; |
19 | 19 | ||
20 | /* Function pointer for generic interrupt vector handling */ | 20 | /* Function pointer for generic interrupt vector handling */ |
21 | void (*generic_interrupt_extension)(void) = NULL; | 21 | void (*x86_platform_ipi_callback)(void) = NULL; |
22 | 22 | ||
23 | /* | 23 | /* |
24 | * 'what should we do if we get a hw irq event on an illegal vector'. | 24 | * 'what should we do if we get a hw irq event on an illegal vector'. |
@@ -72,10 +72,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
72 | seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); | 72 | seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); |
73 | seq_printf(p, " Performance pending work\n"); | 73 | seq_printf(p, " Performance pending work\n"); |
74 | #endif | 74 | #endif |
75 | if (generic_interrupt_extension) { | 75 | if (x86_platform_ipi_callback) { |
76 | seq_printf(p, "%*s: ", prec, "PLT"); | 76 | seq_printf(p, "%*s: ", prec, "PLT"); |
77 | for_each_online_cpu(j) | 77 | for_each_online_cpu(j) |
78 | seq_printf(p, "%10u ", irq_stats(j)->generic_irqs); | 78 | seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis); |
79 | seq_printf(p, " Platform interrupts\n"); | 79 | seq_printf(p, " Platform interrupts\n"); |
80 | } | 80 | } |
81 | #ifdef CONFIG_SMP | 81 | #ifdef CONFIG_SMP |
@@ -187,8 +187,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
187 | sum += irq_stats(cpu)->apic_perf_irqs; | 187 | sum += irq_stats(cpu)->apic_perf_irqs; |
188 | sum += irq_stats(cpu)->apic_pending_irqs; | 188 | sum += irq_stats(cpu)->apic_pending_irqs; |
189 | #endif | 189 | #endif |
190 | if (generic_interrupt_extension) | 190 | if (x86_platform_ipi_callback) |
191 | sum += irq_stats(cpu)->generic_irqs; | 191 | sum += irq_stats(cpu)->x86_platform_ipis; |
192 | #ifdef CONFIG_SMP | 192 | #ifdef CONFIG_SMP |
193 | sum += irq_stats(cpu)->irq_resched_count; | 193 | sum += irq_stats(cpu)->irq_resched_count; |
194 | sum += irq_stats(cpu)->irq_call_count; | 194 | sum += irq_stats(cpu)->irq_call_count; |
@@ -251,9 +251,9 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | |||
251 | } | 251 | } |
252 | 252 | ||
253 | /* | 253 | /* |
254 | * Handler for GENERIC_INTERRUPT_VECTOR. | 254 | * Handler for X86_PLATFORM_IPI_VECTOR. |
255 | */ | 255 | */ |
256 | void smp_generic_interrupt(struct pt_regs *regs) | 256 | void smp_x86_platform_ipi(struct pt_regs *regs) |
257 | { | 257 | { |
258 | struct pt_regs *old_regs = set_irq_regs(regs); | 258 | struct pt_regs *old_regs = set_irq_regs(regs); |
259 | 259 | ||
@@ -263,10 +263,10 @@ void smp_generic_interrupt(struct pt_regs *regs) | |||
263 | 263 | ||
264 | irq_enter(); | 264 | irq_enter(); |
265 | 265 | ||
266 | inc_irq_stat(generic_irqs); | 266 | inc_irq_stat(x86_platform_ipis); |
267 | 267 | ||
268 | if (generic_interrupt_extension) | 268 | if (x86_platform_ipi_callback) |
269 | generic_interrupt_extension(); | 269 | x86_platform_ipi_callback(); |
270 | 270 | ||
271 | irq_exit(); | 271 | irq_exit(); |
272 | 272 | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 40f30773fb29..d5932226614f 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -200,8 +200,8 @@ static void __init apic_intr_init(void) | |||
200 | /* self generated IPI for local APIC timer */ | 200 | /* self generated IPI for local APIC timer */ |
201 | alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | 201 | alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); |
202 | 202 | ||
203 | /* generic IPI for platform specific use */ | 203 | /* IPI for X86 platform specific use */ |
204 | alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); | 204 | alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); |
205 | 205 | ||
206 | /* IPI vectors for APIC spurious and error interrupts */ | 206 | /* IPI vectors for APIC spurious and error interrupts */ |
207 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 207 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c843f8406da2..a3fa43ba5d3b 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -158,8 +158,7 @@ int machine_kexec_prepare(struct kimage *image) | |||
158 | { | 158 | { |
159 | int error; | 159 | int error; |
160 | 160 | ||
161 | if (nx_enabled) | 161 | set_pages_x(image->control_code_page, 1); |
162 | set_pages_x(image->control_code_page, 1); | ||
163 | error = machine_kexec_alloc_page_tables(image); | 162 | error = machine_kexec_alloc_page_tables(image); |
164 | if (error) | 163 | if (error) |
165 | return error; | 164 | return error; |
@@ -173,8 +172,7 @@ int machine_kexec_prepare(struct kimage *image) | |||
173 | */ | 172 | */ |
174 | void machine_kexec_cleanup(struct kimage *image) | 173 | void machine_kexec_cleanup(struct kimage *image) |
175 | { | 174 | { |
176 | if (nx_enabled) | 175 | set_pages_nx(image->control_code_page, 1); |
177 | set_pages_nx(image->control_code_page, 1); | ||
178 | machine_kexec_free_page_tables(image); | 176 | machine_kexec_free_page_tables(image); |
179 | } | 177 | } |
180 | 178 | ||
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index f4c538b681ca..63123d902103 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -33,6 +33,9 @@ MODULE_LICENSE("GPL v2"); | |||
33 | #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 | 33 | #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 |
34 | #define UCODE_UCODE_TYPE 0x00000001 | 34 | #define UCODE_UCODE_TYPE 0x00000001 |
35 | 35 | ||
36 | const struct firmware *firmware; | ||
37 | static int supported_cpu; | ||
38 | |||
36 | struct equiv_cpu_entry { | 39 | struct equiv_cpu_entry { |
37 | u32 installed_cpu; | 40 | u32 installed_cpu; |
38 | u32 fixed_errata_mask; | 41 | u32 fixed_errata_mask; |
@@ -71,17 +74,14 @@ static struct equiv_cpu_entry *equiv_cpu_table; | |||
71 | 74 | ||
72 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | 75 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) |
73 | { | 76 | { |
74 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
75 | u32 dummy; | 77 | u32 dummy; |
76 | 78 | ||
77 | memset(csig, 0, sizeof(*csig)); | 79 | if (!supported_cpu) |
78 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { | ||
79 | printk(KERN_WARNING "microcode: CPU%d: AMD CPU family 0x%x not " | ||
80 | "supported\n", cpu, c->x86); | ||
81 | return -1; | 80 | return -1; |
82 | } | 81 | |
82 | memset(csig, 0, sizeof(*csig)); | ||
83 | rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); | 83 | rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); |
84 | printk(KERN_INFO "microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); | 84 | pr_info("microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); |
85 | return 0; | 85 | return 0; |
86 | } | 86 | } |
87 | 87 | ||
@@ -103,22 +103,15 @@ static int get_matching_microcode(int cpu, void *mc, int rev) | |||
103 | i++; | 103 | i++; |
104 | } | 104 | } |
105 | 105 | ||
106 | if (!equiv_cpu_id) { | 106 | if (!equiv_cpu_id) |
107 | printk(KERN_WARNING "microcode: CPU%d: cpu revision " | ||
108 | "not listed in equivalent cpu table\n", cpu); | ||
109 | return 0; | 107 | return 0; |
110 | } | ||
111 | 108 | ||
112 | if (mc_header->processor_rev_id != equiv_cpu_id) { | 109 | if (mc_header->processor_rev_id != equiv_cpu_id) |
113 | printk(KERN_ERR "microcode: CPU%d: patch mismatch " | ||
114 | "(processor_rev_id: %x, equiv_cpu_id: %x)\n", | ||
115 | cpu, mc_header->processor_rev_id, equiv_cpu_id); | ||
116 | return 0; | 110 | return 0; |
117 | } | ||
118 | 111 | ||
119 | /* ucode might be chipset specific -- currently we don't support this */ | 112 | /* ucode might be chipset specific -- currently we don't support this */ |
120 | if (mc_header->nb_dev_id || mc_header->sb_dev_id) { | 113 | if (mc_header->nb_dev_id || mc_header->sb_dev_id) { |
121 | printk(KERN_ERR "microcode: CPU%d: loading of chipset " | 114 | pr_err(KERN_ERR "microcode: CPU%d: loading of chipset " |
122 | "specific code not yet supported\n", cpu); | 115 | "specific code not yet supported\n", cpu); |
123 | return 0; | 116 | return 0; |
124 | } | 117 | } |
@@ -148,14 +141,12 @@ static int apply_microcode_amd(int cpu) | |||
148 | 141 | ||
149 | /* check current patch id and patch's id for match */ | 142 | /* check current patch id and patch's id for match */ |
150 | if (rev != mc_amd->hdr.patch_id) { | 143 | if (rev != mc_amd->hdr.patch_id) { |
151 | printk(KERN_ERR "microcode: CPU%d: update failed " | 144 | pr_err("microcode: CPU%d: update failed " |
152 | "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); | 145 | "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); |
153 | return -1; | 146 | return -1; |
154 | } | 147 | } |
155 | 148 | ||
156 | printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n", | 149 | pr_info("microcode: CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); |
157 | cpu, rev); | ||
158 | |||
159 | uci->cpu_sig.rev = rev; | 150 | uci->cpu_sig.rev = rev; |
160 | 151 | ||
161 | return 0; | 152 | return 0; |
@@ -178,18 +169,15 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) | |||
178 | return NULL; | 169 | return NULL; |
179 | 170 | ||
180 | if (section_hdr[0] != UCODE_UCODE_TYPE) { | 171 | if (section_hdr[0] != UCODE_UCODE_TYPE) { |
181 | printk(KERN_ERR "microcode: error: invalid type field in " | 172 | pr_err("microcode: error: invalid type field in " |
182 | "container file section header\n"); | 173 | "container file section header\n"); |
183 | return NULL; | 174 | return NULL; |
184 | } | 175 | } |
185 | 176 | ||
186 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); | 177 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); |
187 | 178 | ||
188 | printk(KERN_DEBUG "microcode: size %u, total_size %u\n", | ||
189 | size, total_size); | ||
190 | |||
191 | if (total_size > size || total_size > UCODE_MAX_SIZE) { | 179 | if (total_size > size || total_size > UCODE_MAX_SIZE) { |
192 | printk(KERN_ERR "microcode: error: size mismatch\n"); | 180 | pr_err("microcode: error: size mismatch\n"); |
193 | return NULL; | 181 | return NULL; |
194 | } | 182 | } |
195 | 183 | ||
@@ -218,15 +206,14 @@ static int install_equiv_cpu_table(const u8 *buf) | |||
218 | size = buf_pos[2]; | 206 | size = buf_pos[2]; |
219 | 207 | ||
220 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { | 208 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { |
221 | printk(KERN_ERR "microcode: error: invalid type field in " | 209 | pr_err("microcode: error: invalid type field in " |
222 | "container file section header\n"); | 210 | "container file section header\n"); |
223 | return 0; | 211 | return 0; |
224 | } | 212 | } |
225 | 213 | ||
226 | equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); | 214 | equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); |
227 | if (!equiv_cpu_table) { | 215 | if (!equiv_cpu_table) { |
228 | printk(KERN_ERR "microcode: failed to allocate " | 216 | pr_err("microcode: failed to allocate equivalent CPU table\n"); |
229 | "equivalent CPU table\n"); | ||
230 | return 0; | 217 | return 0; |
231 | } | 218 | } |
232 | 219 | ||
@@ -259,8 +246,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
259 | 246 | ||
260 | offset = install_equiv_cpu_table(ucode_ptr); | 247 | offset = install_equiv_cpu_table(ucode_ptr); |
261 | if (!offset) { | 248 | if (!offset) { |
262 | printk(KERN_ERR "microcode: failed to create " | 249 | pr_err("microcode: failed to create equivalent cpu table\n"); |
263 | "equivalent cpu table\n"); | ||
264 | return UCODE_ERROR; | 250 | return UCODE_ERROR; |
265 | } | 251 | } |
266 | 252 | ||
@@ -308,33 +294,27 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
308 | 294 | ||
309 | static enum ucode_state request_microcode_fw(int cpu, struct device *device) | 295 | static enum ucode_state request_microcode_fw(int cpu, struct device *device) |
310 | { | 296 | { |
311 | const char *fw_name = "amd-ucode/microcode_amd.bin"; | ||
312 | const struct firmware *firmware; | ||
313 | enum ucode_state ret; | 297 | enum ucode_state ret; |
314 | 298 | ||
315 | if (request_firmware(&firmware, fw_name, device)) { | 299 | if (firmware == NULL) |
316 | printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); | ||
317 | return UCODE_NFOUND; | 300 | return UCODE_NFOUND; |
318 | } | ||
319 | 301 | ||
320 | if (*(u32 *)firmware->data != UCODE_MAGIC) { | 302 | if (*(u32 *)firmware->data != UCODE_MAGIC) { |
321 | printk(KERN_ERR "microcode: invalid UCODE_MAGIC (0x%08x)\n", | 303 | pr_err("microcode: invalid UCODE_MAGIC (0x%08x)\n", |
322 | *(u32 *)firmware->data); | 304 | *(u32 *)firmware->data); |
323 | return UCODE_ERROR; | 305 | return UCODE_ERROR; |
324 | } | 306 | } |
325 | 307 | ||
326 | ret = generic_load_microcode(cpu, firmware->data, firmware->size); | 308 | ret = generic_load_microcode(cpu, firmware->data, firmware->size); |
327 | 309 | ||
328 | release_firmware(firmware); | ||
329 | |||
330 | return ret; | 310 | return ret; |
331 | } | 311 | } |
332 | 312 | ||
333 | static enum ucode_state | 313 | static enum ucode_state |
334 | request_microcode_user(int cpu, const void __user *buf, size_t size) | 314 | request_microcode_user(int cpu, const void __user *buf, size_t size) |
335 | { | 315 | { |
336 | printk(KERN_INFO "microcode: AMD microcode update via " | 316 | pr_info("microcode: AMD microcode update via " |
337 | "/dev/cpu/microcode not supported\n"); | 317 | "/dev/cpu/microcode not supported\n"); |
338 | return UCODE_ERROR; | 318 | return UCODE_ERROR; |
339 | } | 319 | } |
340 | 320 | ||
@@ -346,7 +326,32 @@ static void microcode_fini_cpu_amd(int cpu) | |||
346 | uci->mc = NULL; | 326 | uci->mc = NULL; |
347 | } | 327 | } |
348 | 328 | ||
329 | void init_microcode_amd(struct device *device) | ||
330 | { | ||
331 | const char *fw_name = "amd-ucode/microcode_amd.bin"; | ||
332 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
333 | |||
334 | WARN_ON(c->x86_vendor != X86_VENDOR_AMD); | ||
335 | |||
336 | if (c->x86 < 0x10) { | ||
337 | pr_warning("microcode: AMD CPU family 0x%x not supported\n", | ||
338 | c->x86); | ||
339 | return; | ||
340 | } | ||
341 | supported_cpu = 1; | ||
342 | |||
343 | if (request_firmware(&firmware, fw_name, device)) | ||
344 | pr_err("microcode: failed to load file %s\n", fw_name); | ||
345 | } | ||
346 | |||
347 | void fini_microcode_amd(void) | ||
348 | { | ||
349 | release_firmware(firmware); | ||
350 | } | ||
351 | |||
349 | static struct microcode_ops microcode_amd_ops = { | 352 | static struct microcode_ops microcode_amd_ops = { |
353 | .init = init_microcode_amd, | ||
354 | .fini = fini_microcode_amd, | ||
350 | .request_microcode_user = request_microcode_user, | 355 | .request_microcode_user = request_microcode_user, |
351 | .request_microcode_fw = request_microcode_fw, | 356 | .request_microcode_fw = request_microcode_fw, |
352 | .collect_cpu_info = collect_cpu_info_amd, | 357 | .collect_cpu_info = collect_cpu_info_amd, |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 2bcad3926edb..e68aae397869 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -391,7 +391,7 @@ static enum ucode_state microcode_update_cpu(int cpu) | |||
391 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 391 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
392 | enum ucode_state ustate; | 392 | enum ucode_state ustate; |
393 | 393 | ||
394 | if (uci->valid) | 394 | if (uci->valid && uci->mc) |
395 | ustate = microcode_resume_cpu(cpu); | 395 | ustate = microcode_resume_cpu(cpu); |
396 | else | 396 | else |
397 | ustate = microcode_init_cpu(cpu); | 397 | ustate = microcode_init_cpu(cpu); |
@@ -518,6 +518,9 @@ static int __init microcode_init(void) | |||
518 | return PTR_ERR(microcode_pdev); | 518 | return PTR_ERR(microcode_pdev); |
519 | } | 519 | } |
520 | 520 | ||
521 | if (microcode_ops->init) | ||
522 | microcode_ops->init(µcode_pdev->dev); | ||
523 | |||
521 | get_online_cpus(); | 524 | get_online_cpus(); |
522 | mutex_lock(µcode_mutex); | 525 | mutex_lock(µcode_mutex); |
523 | 526 | ||
@@ -561,6 +564,9 @@ static void __exit microcode_exit(void) | |||
561 | 564 | ||
562 | platform_device_unregister(microcode_pdev); | 565 | platform_device_unregister(microcode_pdev); |
563 | 566 | ||
567 | if (microcode_ops->fini) | ||
568 | microcode_ops->fini(); | ||
569 | |||
564 | microcode_ops = NULL; | 570 | microcode_ops = NULL; |
565 | 571 | ||
566 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); | 572 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 5be95ef4ffec..35a57c963df9 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -667,36 +667,18 @@ void __init default_get_smp_config(unsigned int early) | |||
667 | */ | 667 | */ |
668 | } | 668 | } |
669 | 669 | ||
670 | static void __init smp_reserve_bootmem(struct mpf_intel *mpf) | 670 | static void __init smp_reserve_memory(struct mpf_intel *mpf) |
671 | { | 671 | { |
672 | unsigned long size = get_mpc_size(mpf->physptr); | 672 | unsigned long size = get_mpc_size(mpf->physptr); |
673 | #ifdef CONFIG_X86_32 | ||
674 | /* | ||
675 | * We cannot access to MPC table to compute table size yet, | ||
676 | * as only few megabytes from the bottom is mapped now. | ||
677 | * PC-9800's MPC table places on the very last of physical | ||
678 | * memory; so that simply reserving PAGE_SIZE from mpf->physptr | ||
679 | * yields BUG() in reserve_bootmem. | ||
680 | * also need to make sure physptr is below than max_low_pfn | ||
681 | * we don't need reserve the area above max_low_pfn | ||
682 | */ | ||
683 | unsigned long end = max_low_pfn * PAGE_SIZE; | ||
684 | 673 | ||
685 | if (mpf->physptr < end) { | 674 | reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc"); |
686 | if (mpf->physptr + size > end) | ||
687 | size = end - mpf->physptr; | ||
688 | reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); | ||
689 | } | ||
690 | #else | ||
691 | reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); | ||
692 | #endif | ||
693 | } | 675 | } |
694 | 676 | ||
695 | static int __init smp_scan_config(unsigned long base, unsigned long length, | 677 | static int __init smp_scan_config(unsigned long base, unsigned long length) |
696 | unsigned reserve) | ||
697 | { | 678 | { |
698 | unsigned int *bp = phys_to_virt(base); | 679 | unsigned int *bp = phys_to_virt(base); |
699 | struct mpf_intel *mpf; | 680 | struct mpf_intel *mpf; |
681 | unsigned long mem; | ||
700 | 682 | ||
701 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", | 683 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", |
702 | bp, length); | 684 | bp, length); |
@@ -717,12 +699,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
717 | printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", | 699 | printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", |
718 | mpf, (u64)virt_to_phys(mpf)); | 700 | mpf, (u64)virt_to_phys(mpf)); |
719 | 701 | ||
720 | if (!reserve) | 702 | mem = virt_to_phys(mpf); |
721 | return 1; | 703 | reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf"); |
722 | reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf), | ||
723 | BOOTMEM_DEFAULT); | ||
724 | if (mpf->physptr) | 704 | if (mpf->physptr) |
725 | smp_reserve_bootmem(mpf); | 705 | smp_reserve_memory(mpf); |
726 | 706 | ||
727 | return 1; | 707 | return 1; |
728 | } | 708 | } |
@@ -732,7 +712,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
732 | return 0; | 712 | return 0; |
733 | } | 713 | } |
734 | 714 | ||
735 | void __init default_find_smp_config(unsigned int reserve) | 715 | void __init default_find_smp_config(void) |
736 | { | 716 | { |
737 | unsigned int address; | 717 | unsigned int address; |
738 | 718 | ||
@@ -744,9 +724,9 @@ void __init default_find_smp_config(unsigned int reserve) | |||
744 | * 2) Scan the top 1K of base RAM | 724 | * 2) Scan the top 1K of base RAM |
745 | * 3) Scan the 64K of bios | 725 | * 3) Scan the 64K of bios |
746 | */ | 726 | */ |
747 | if (smp_scan_config(0x0, 0x400, reserve) || | 727 | if (smp_scan_config(0x0, 0x400) || |
748 | smp_scan_config(639 * 0x400, 0x400, reserve) || | 728 | smp_scan_config(639 * 0x400, 0x400) || |
749 | smp_scan_config(0xF0000, 0x10000, reserve)) | 729 | smp_scan_config(0xF0000, 0x10000)) |
750 | return; | 730 | return; |
751 | /* | 731 | /* |
752 | * If it is an SMP machine we should know now, unless the | 732 | * If it is an SMP machine we should know now, unless the |
@@ -767,7 +747,7 @@ void __init default_find_smp_config(unsigned int reserve) | |||
767 | 747 | ||
768 | address = get_bios_ebda(); | 748 | address = get_bios_ebda(); |
769 | if (address) | 749 | if (address) |
770 | smp_scan_config(address, 0x400, reserve); | 750 | smp_scan_config(address, 0x400); |
771 | } | 751 | } |
772 | 752 | ||
773 | #ifdef CONFIG_X86_IO_APIC | 753 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 744508e7cfdd..5e2ba634ea15 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/pm.h> | 9 | #include <linux/pm.h> |
10 | #include <linux/clockchips.h> | 10 | #include <linux/clockchips.h> |
11 | #include <linux/random.h> | 11 | #include <linux/random.h> |
12 | #include <linux/user-return-notifier.h> | ||
12 | #include <trace/events/power.h> | 13 | #include <trace/events/power.h> |
13 | #include <linux/hw_breakpoint.h> | 14 | #include <linux/hw_breakpoint.h> |
14 | #include <asm/system.h> | 15 | #include <asm/system.h> |
@@ -209,6 +210,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
209 | */ | 210 | */ |
210 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 211 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
211 | } | 212 | } |
213 | propagate_user_return_notify(prev_p, next_p); | ||
212 | } | 214 | } |
213 | 215 | ||
214 | int sys_fork(struct pt_regs *regs) | 216 | int sys_fork(struct pt_regs *regs) |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index a98fe88fab64..c95c8f4e790a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -349,26 +349,42 @@ out: | |||
349 | return err; | 349 | return err; |
350 | } | 350 | } |
351 | 351 | ||
352 | void | 352 | static void |
353 | start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | 353 | start_thread_common(struct pt_regs *regs, unsigned long new_ip, |
354 | unsigned long new_sp, | ||
355 | unsigned int _cs, unsigned int _ss, unsigned int _ds) | ||
354 | { | 356 | { |
355 | loadsegment(fs, 0); | 357 | loadsegment(fs, 0); |
356 | loadsegment(es, 0); | 358 | loadsegment(es, _ds); |
357 | loadsegment(ds, 0); | 359 | loadsegment(ds, _ds); |
358 | load_gs_index(0); | 360 | load_gs_index(0); |
359 | regs->ip = new_ip; | 361 | regs->ip = new_ip; |
360 | regs->sp = new_sp; | 362 | regs->sp = new_sp; |
361 | percpu_write(old_rsp, new_sp); | 363 | percpu_write(old_rsp, new_sp); |
362 | regs->cs = __USER_CS; | 364 | regs->cs = _cs; |
363 | regs->ss = __USER_DS; | 365 | regs->ss = _ss; |
364 | regs->flags = 0x200; | 366 | regs->flags = X86_EFLAGS_IF; |
365 | set_fs(USER_DS); | 367 | set_fs(USER_DS); |
366 | /* | 368 | /* |
367 | * Free the old FP and other extended state | 369 | * Free the old FP and other extended state |
368 | */ | 370 | */ |
369 | free_thread_xstate(current); | 371 | free_thread_xstate(current); |
370 | } | 372 | } |
371 | EXPORT_SYMBOL_GPL(start_thread); | 373 | |
374 | void | ||
375 | start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | ||
376 | { | ||
377 | start_thread_common(regs, new_ip, new_sp, | ||
378 | __USER_CS, __USER_DS, 0); | ||
379 | } | ||
380 | |||
381 | #ifdef CONFIG_IA32_EMULATION | ||
382 | void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) | ||
383 | { | ||
384 | start_thread_common(regs, new_ip, new_sp, | ||
385 | __USER32_CS, __USER32_DS, __USER32_DS); | ||
386 | } | ||
387 | #endif | ||
372 | 388 | ||
373 | /* | 389 | /* |
374 | * switch_to(x,y) should switch tasks from x to y. | 390 | * switch_to(x,y) should switch tasks from x to y. |
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index 61a837743fe5..201eab63b05f 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c | |||
@@ -80,6 +80,7 @@ void mach_reboot_fixups(void) | |||
80 | continue; | 80 | continue; |
81 | 81 | ||
82 | cur->reboot_fixup(dev); | 82 | cur->reboot_fixup(dev); |
83 | pci_dev_put(dev); | ||
83 | } | 84 | } |
84 | } | 85 | } |
85 | 86 | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 82e88cdda9bc..946a311a25c9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -106,6 +106,7 @@ | |||
106 | #include <asm/percpu.h> | 106 | #include <asm/percpu.h> |
107 | #include <asm/topology.h> | 107 | #include <asm/topology.h> |
108 | #include <asm/apicdef.h> | 108 | #include <asm/apicdef.h> |
109 | #include <asm/k8.h> | ||
109 | #ifdef CONFIG_X86_64 | 110 | #ifdef CONFIG_X86_64 |
110 | #include <asm/numa_64.h> | 111 | #include <asm/numa_64.h> |
111 | #endif | 112 | #endif |
@@ -487,42 +488,11 @@ static void __init reserve_early_setup_data(void) | |||
487 | 488 | ||
488 | #ifdef CONFIG_KEXEC | 489 | #ifdef CONFIG_KEXEC |
489 | 490 | ||
490 | /** | ||
491 | * Reserve @size bytes of crashkernel memory at any suitable offset. | ||
492 | * | ||
493 | * @size: Size of the crashkernel memory to reserve. | ||
494 | * Returns the base address on success, and -1ULL on failure. | ||
495 | */ | ||
496 | static | ||
497 | unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) | ||
498 | { | ||
499 | const unsigned long long alignment = 16<<20; /* 16M */ | ||
500 | unsigned long long start = 0LL; | ||
501 | |||
502 | while (1) { | ||
503 | int ret; | ||
504 | |||
505 | start = find_e820_area(start, ULONG_MAX, size, alignment); | ||
506 | if (start == -1ULL) | ||
507 | return start; | ||
508 | |||
509 | /* try to reserve it */ | ||
510 | ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE); | ||
511 | if (ret >= 0) | ||
512 | return start; | ||
513 | |||
514 | start += alignment; | ||
515 | } | ||
516 | } | ||
517 | |||
518 | static inline unsigned long long get_total_mem(void) | 491 | static inline unsigned long long get_total_mem(void) |
519 | { | 492 | { |
520 | unsigned long long total; | 493 | unsigned long long total; |
521 | 494 | ||
522 | total = max_low_pfn - min_low_pfn; | 495 | total = max_pfn - min_low_pfn; |
523 | #ifdef CONFIG_HIGHMEM | ||
524 | total += highend_pfn - highstart_pfn; | ||
525 | #endif | ||
526 | 496 | ||
527 | return total << PAGE_SHIFT; | 497 | return total << PAGE_SHIFT; |
528 | } | 498 | } |
@@ -542,21 +512,25 @@ static void __init reserve_crashkernel(void) | |||
542 | 512 | ||
543 | /* 0 means: find the address automatically */ | 513 | /* 0 means: find the address automatically */ |
544 | if (crash_base <= 0) { | 514 | if (crash_base <= 0) { |
545 | crash_base = find_and_reserve_crashkernel(crash_size); | 515 | const unsigned long long alignment = 16<<20; /* 16M */ |
516 | |||
517 | crash_base = find_e820_area(alignment, ULONG_MAX, crash_size, | ||
518 | alignment); | ||
546 | if (crash_base == -1ULL) { | 519 | if (crash_base == -1ULL) { |
547 | pr_info("crashkernel reservation failed. " | 520 | pr_info("crashkernel reservation failed - No suitable area found.\n"); |
548 | "No suitable area found.\n"); | ||
549 | return; | 521 | return; |
550 | } | 522 | } |
551 | } else { | 523 | } else { |
552 | ret = reserve_bootmem_generic(crash_base, crash_size, | 524 | unsigned long long start; |
553 | BOOTMEM_EXCLUSIVE); | 525 | |
554 | if (ret < 0) { | 526 | start = find_e820_area(crash_base, ULONG_MAX, crash_size, |
555 | pr_info("crashkernel reservation failed - " | 527 | 1<<20); |
556 | "memory is in use\n"); | 528 | if (start != crash_base) { |
529 | pr_info("crashkernel reservation failed - memory is in use.\n"); | ||
557 | return; | 530 | return; |
558 | } | 531 | } |
559 | } | 532 | } |
533 | reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL"); | ||
560 | 534 | ||
561 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " | 535 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " |
562 | "for crashkernel (System RAM: %ldMB)\n", | 536 | "for crashkernel (System RAM: %ldMB)\n", |
@@ -699,6 +673,9 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | |||
699 | 673 | ||
700 | void __init setup_arch(char **cmdline_p) | 674 | void __init setup_arch(char **cmdline_p) |
701 | { | 675 | { |
676 | int acpi = 0; | ||
677 | int k8 = 0; | ||
678 | |||
702 | #ifdef CONFIG_X86_32 | 679 | #ifdef CONFIG_X86_32 |
703 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 680 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
704 | visws_early_detect(); | 681 | visws_early_detect(); |
@@ -791,21 +768,18 @@ void __init setup_arch(char **cmdline_p) | |||
791 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | 768 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
792 | *cmdline_p = command_line; | 769 | *cmdline_p = command_line; |
793 | 770 | ||
794 | #ifdef CONFIG_X86_64 | ||
795 | /* | 771 | /* |
796 | * Must call this twice: Once just to detect whether hardware doesn't | 772 | * x86_configure_nx() is called before parse_early_param() to detect |
797 | * support NX (so that the early EHCI debug console setup can safely | 773 | * whether hardware doesn't support NX (so that the early EHCI debug |
798 | * call set_fixmap(), and then again after parsing early parameters to | 774 | * console setup can safely call set_fixmap()). It may then be called |
799 | * honor the respective command line option. | 775 | * again from within noexec_setup() during parsing early parameters |
776 | * to honor the respective command line option. | ||
800 | */ | 777 | */ |
801 | check_efer(); | 778 | x86_configure_nx(); |
802 | #endif | ||
803 | 779 | ||
804 | parse_early_param(); | 780 | parse_early_param(); |
805 | 781 | ||
806 | #ifdef CONFIG_X86_64 | 782 | x86_report_nx(); |
807 | check_efer(); | ||
808 | #endif | ||
809 | 783 | ||
810 | /* Must be before kernel pagetables are setup */ | 784 | /* Must be before kernel pagetables are setup */ |
811 | vmi_activate(); | 785 | vmi_activate(); |
@@ -901,6 +875,13 @@ void __init setup_arch(char **cmdline_p) | |||
901 | 875 | ||
902 | reserve_brk(); | 876 | reserve_brk(); |
903 | 877 | ||
878 | #ifdef CONFIG_ACPI_SLEEP | ||
879 | /* | ||
880 | * Reserve low memory region for sleep support. | ||
881 | * even before init_memory_mapping | ||
882 | */ | ||
883 | acpi_reserve_wakeup_memory(); | ||
884 | #endif | ||
904 | init_gbpages(); | 885 | init_gbpages(); |
905 | 886 | ||
906 | /* max_pfn_mapped is updated here */ | 887 | /* max_pfn_mapped is updated here */ |
@@ -927,6 +908,8 @@ void __init setup_arch(char **cmdline_p) | |||
927 | 908 | ||
928 | reserve_initrd(); | 909 | reserve_initrd(); |
929 | 910 | ||
911 | reserve_crashkernel(); | ||
912 | |||
930 | vsmp_init(); | 913 | vsmp_init(); |
931 | 914 | ||
932 | io_delay_init(); | 915 | io_delay_init(); |
@@ -938,27 +921,24 @@ void __init setup_arch(char **cmdline_p) | |||
938 | 921 | ||
939 | early_acpi_boot_init(); | 922 | early_acpi_boot_init(); |
940 | 923 | ||
924 | /* | ||
925 | * Find and reserve possible boot-time SMP configuration: | ||
926 | */ | ||
927 | find_smp_config(); | ||
928 | |||
941 | #ifdef CONFIG_ACPI_NUMA | 929 | #ifdef CONFIG_ACPI_NUMA |
942 | /* | 930 | /* |
943 | * Parse SRAT to discover nodes. | 931 | * Parse SRAT to discover nodes. |
944 | */ | 932 | */ |
945 | acpi_numa_init(); | 933 | acpi = acpi_numa_init(); |
946 | #endif | 934 | #endif |
947 | 935 | ||
948 | initmem_init(0, max_pfn); | 936 | #ifdef CONFIG_K8_NUMA |
949 | 937 | if (!acpi) | |
950 | #ifdef CONFIG_ACPI_SLEEP | 938 | k8 = !k8_numa_init(0, max_pfn); |
951 | /* | ||
952 | * Reserve low memory region for sleep support. | ||
953 | */ | ||
954 | acpi_reserve_bootmem(); | ||
955 | #endif | 939 | #endif |
956 | /* | ||
957 | * Find and reserve possible boot-time SMP configuration: | ||
958 | */ | ||
959 | find_smp_config(); | ||
960 | 940 | ||
961 | reserve_crashkernel(); | 941 | initmem_init(0, max_pfn, acpi, k8); |
962 | 942 | ||
963 | #ifdef CONFIG_X86_64 | 943 | #ifdef CONFIG_X86_64 |
964 | /* | 944 | /* |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index fbf3b07c8567..74fe6d86dc5d 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/stddef.h> | 19 | #include <linux/stddef.h> |
20 | #include <linux/personality.h> | 20 | #include <linux/personality.h> |
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/user-return-notifier.h> | ||
22 | 23 | ||
23 | #include <asm/processor.h> | 24 | #include <asm/processor.h> |
24 | #include <asm/ucontext.h> | 25 | #include <asm/ucontext.h> |
@@ -863,6 +864,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
863 | if (current->replacement_session_keyring) | 864 | if (current->replacement_session_keyring) |
864 | key_replace_session_keyring(); | 865 | key_replace_session_keyring(); |
865 | } | 866 | } |
867 | if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) | ||
868 | fire_user_return_notifiers(); | ||
866 | 869 | ||
867 | #ifdef CONFIG_X86_32 | 870 | #ifdef CONFIG_X86_32 |
868 | clear_thread_flag(TIF_IRET); | 871 | clear_thread_flag(TIF_IRET); |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 0157cd26d7cc..70c2125d55b9 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -336,3 +336,4 @@ ENTRY(sys_call_table) | |||
336 | .long sys_pwritev | 336 | .long sys_pwritev |
337 | .long sys_rt_tgsigqueueinfo /* 335 */ | 337 | .long sys_rt_tgsigqueueinfo /* 335 */ |
338 | .long sys_perf_event_open | 338 | .long sys_perf_event_open |
339 | .long sys_recvmmsg | ||
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 1740c85e24bb..364d015efebc 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -817,10 +817,8 @@ static int __init uv_init_blade(int blade) | |||
817 | */ | 817 | */ |
818 | apicid = blade_to_first_apicid(blade); | 818 | apicid = blade_to_first_apicid(blade); |
819 | pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); | 819 | pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); |
820 | if ((pa & 0xff) != UV_BAU_MESSAGE) { | 820 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, |
821 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, | ||
822 | ((apicid << 32) | UV_BAU_MESSAGE)); | 821 | ((apicid << 32) | UV_BAU_MESSAGE)); |
823 | } | ||
824 | return 0; | 822 | return 0; |
825 | } | 823 | } |
826 | 824 | ||
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index 583f11d5c480..3c84aa001c11 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c | |||
@@ -74,7 +74,7 @@ struct uv_rtc_timer_head { | |||
74 | */ | 74 | */ |
75 | static struct uv_rtc_timer_head **blade_info __read_mostly; | 75 | static struct uv_rtc_timer_head **blade_info __read_mostly; |
76 | 76 | ||
77 | static int uv_rtc_enable; | 77 | static int uv_rtc_evt_enable; |
78 | 78 | ||
79 | /* | 79 | /* |
80 | * Hardware interface routines | 80 | * Hardware interface routines |
@@ -90,7 +90,7 @@ static void uv_rtc_send_IPI(int cpu) | |||
90 | pnode = uv_apicid_to_pnode(apicid); | 90 | pnode = uv_apicid_to_pnode(apicid); |
91 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | | 91 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | |
92 | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | | 92 | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | |
93 | (GENERIC_INTERRUPT_VECTOR << UVH_IPI_INT_VECTOR_SHFT); | 93 | (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); |
94 | 94 | ||
95 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | 95 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); |
96 | } | 96 | } |
@@ -115,7 +115,7 @@ static int uv_setup_intr(int cpu, u64 expires) | |||
115 | uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, | 115 | uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, |
116 | UVH_EVENT_OCCURRED0_RTC1_MASK); | 116 | UVH_EVENT_OCCURRED0_RTC1_MASK); |
117 | 117 | ||
118 | val = (GENERIC_INTERRUPT_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | | 118 | val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | |
119 | ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); | 119 | ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); |
120 | 120 | ||
121 | /* Set configuration */ | 121 | /* Set configuration */ |
@@ -123,7 +123,10 @@ static int uv_setup_intr(int cpu, u64 expires) | |||
123 | /* Initialize comparator value */ | 123 | /* Initialize comparator value */ |
124 | uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); | 124 | uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); |
125 | 125 | ||
126 | return (expires < uv_read_rtc(NULL) && !uv_intr_pending(pnode)); | 126 | if (uv_read_rtc(NULL) <= expires) |
127 | return 0; | ||
128 | |||
129 | return !uv_intr_pending(pnode); | ||
127 | } | 130 | } |
128 | 131 | ||
129 | /* | 132 | /* |
@@ -223,6 +226,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |||
223 | 226 | ||
224 | next_cpu = head->next_cpu; | 227 | next_cpu = head->next_cpu; |
225 | *t = expires; | 228 | *t = expires; |
229 | |||
226 | /* Will this one be next to go off? */ | 230 | /* Will this one be next to go off? */ |
227 | if (next_cpu < 0 || bcpu == next_cpu || | 231 | if (next_cpu < 0 || bcpu == next_cpu || |
228 | expires < head->cpu[next_cpu].expires) { | 232 | expires < head->cpu[next_cpu].expires) { |
@@ -231,7 +235,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |||
231 | *t = ULLONG_MAX; | 235 | *t = ULLONG_MAX; |
232 | uv_rtc_find_next_timer(head, pnode); | 236 | uv_rtc_find_next_timer(head, pnode); |
233 | spin_unlock_irqrestore(&head->lock, flags); | 237 | spin_unlock_irqrestore(&head->lock, flags); |
234 | return 1; | 238 | return -ETIME; |
235 | } | 239 | } |
236 | } | 240 | } |
237 | 241 | ||
@@ -244,7 +248,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |||
244 | * | 248 | * |
245 | * Returns 1 if this timer was pending. | 249 | * Returns 1 if this timer was pending. |
246 | */ | 250 | */ |
247 | static int uv_rtc_unset_timer(int cpu) | 251 | static int uv_rtc_unset_timer(int cpu, int force) |
248 | { | 252 | { |
249 | int pnode = uv_cpu_to_pnode(cpu); | 253 | int pnode = uv_cpu_to_pnode(cpu); |
250 | int bid = uv_cpu_to_blade_id(cpu); | 254 | int bid = uv_cpu_to_blade_id(cpu); |
@@ -256,14 +260,15 @@ static int uv_rtc_unset_timer(int cpu) | |||
256 | 260 | ||
257 | spin_lock_irqsave(&head->lock, flags); | 261 | spin_lock_irqsave(&head->lock, flags); |
258 | 262 | ||
259 | if (head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) | 263 | if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) |
260 | rc = 1; | 264 | rc = 1; |
261 | 265 | ||
262 | *t = ULLONG_MAX; | 266 | if (rc) { |
263 | 267 | *t = ULLONG_MAX; | |
264 | /* Was the hardware setup for this timer? */ | 268 | /* Was the hardware setup for this timer? */ |
265 | if (head->next_cpu == bcpu) | 269 | if (head->next_cpu == bcpu) |
266 | uv_rtc_find_next_timer(head, pnode); | 270 | uv_rtc_find_next_timer(head, pnode); |
271 | } | ||
267 | 272 | ||
268 | spin_unlock_irqrestore(&head->lock, flags); | 273 | spin_unlock_irqrestore(&head->lock, flags); |
269 | 274 | ||
@@ -310,32 +315,32 @@ static void uv_rtc_timer_setup(enum clock_event_mode mode, | |||
310 | break; | 315 | break; |
311 | case CLOCK_EVT_MODE_UNUSED: | 316 | case CLOCK_EVT_MODE_UNUSED: |
312 | case CLOCK_EVT_MODE_SHUTDOWN: | 317 | case CLOCK_EVT_MODE_SHUTDOWN: |
313 | uv_rtc_unset_timer(ced_cpu); | 318 | uv_rtc_unset_timer(ced_cpu, 1); |
314 | break; | 319 | break; |
315 | } | 320 | } |
316 | } | 321 | } |
317 | 322 | ||
318 | static void uv_rtc_interrupt(void) | 323 | static void uv_rtc_interrupt(void) |
319 | { | 324 | { |
320 | struct clock_event_device *ced = &__get_cpu_var(cpu_ced); | ||
321 | int cpu = smp_processor_id(); | 325 | int cpu = smp_processor_id(); |
326 | struct clock_event_device *ced = &per_cpu(cpu_ced, cpu); | ||
322 | 327 | ||
323 | if (!ced || !ced->event_handler) | 328 | if (!ced || !ced->event_handler) |
324 | return; | 329 | return; |
325 | 330 | ||
326 | if (uv_rtc_unset_timer(cpu) != 1) | 331 | if (uv_rtc_unset_timer(cpu, 0) != 1) |
327 | return; | 332 | return; |
328 | 333 | ||
329 | ced->event_handler(ced); | 334 | ced->event_handler(ced); |
330 | } | 335 | } |
331 | 336 | ||
332 | static int __init uv_enable_rtc(char *str) | 337 | static int __init uv_enable_evt_rtc(char *str) |
333 | { | 338 | { |
334 | uv_rtc_enable = 1; | 339 | uv_rtc_evt_enable = 1; |
335 | 340 | ||
336 | return 1; | 341 | return 1; |
337 | } | 342 | } |
338 | __setup("uvrtc", uv_enable_rtc); | 343 | __setup("uvrtcevt", uv_enable_evt_rtc); |
339 | 344 | ||
340 | static __init void uv_rtc_register_clockevents(struct work_struct *dummy) | 345 | static __init void uv_rtc_register_clockevents(struct work_struct *dummy) |
341 | { | 346 | { |
@@ -350,27 +355,32 @@ static __init int uv_rtc_setup_clock(void) | |||
350 | { | 355 | { |
351 | int rc; | 356 | int rc; |
352 | 357 | ||
353 | if (!uv_rtc_enable || !is_uv_system() || generic_interrupt_extension) | 358 | if (!is_uv_system()) |
354 | return -ENODEV; | 359 | return -ENODEV; |
355 | 360 | ||
356 | generic_interrupt_extension = uv_rtc_interrupt; | ||
357 | |||
358 | clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, | 361 | clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, |
359 | clocksource_uv.shift); | 362 | clocksource_uv.shift); |
360 | 363 | ||
364 | /* If single blade, prefer tsc */ | ||
365 | if (uv_num_possible_blades() == 1) | ||
366 | clocksource_uv.rating = 250; | ||
367 | |||
361 | rc = clocksource_register(&clocksource_uv); | 368 | rc = clocksource_register(&clocksource_uv); |
362 | if (rc) { | 369 | if (rc) |
363 | generic_interrupt_extension = NULL; | 370 | printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); |
371 | else | ||
372 | printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n", | ||
373 | sn_rtc_cycles_per_second/(unsigned long)1E6); | ||
374 | |||
375 | if (rc || !uv_rtc_evt_enable || x86_platform_ipi_callback) | ||
364 | return rc; | 376 | return rc; |
365 | } | ||
366 | 377 | ||
367 | /* Setup and register clockevents */ | 378 | /* Setup and register clockevents */ |
368 | rc = uv_rtc_allocate_timers(); | 379 | rc = uv_rtc_allocate_timers(); |
369 | if (rc) { | 380 | if (rc) |
370 | clocksource_unregister(&clocksource_uv); | 381 | goto error; |
371 | generic_interrupt_extension = NULL; | 382 | |
372 | return rc; | 383 | x86_platform_ipi_callback = uv_rtc_interrupt; |
373 | } | ||
374 | 384 | ||
375 | clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second, | 385 | clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second, |
376 | NSEC_PER_SEC, clock_event_device_uv.shift); | 386 | NSEC_PER_SEC, clock_event_device_uv.shift); |
@@ -383,11 +393,19 @@ static __init int uv_rtc_setup_clock(void) | |||
383 | 393 | ||
384 | rc = schedule_on_each_cpu(uv_rtc_register_clockevents); | 394 | rc = schedule_on_each_cpu(uv_rtc_register_clockevents); |
385 | if (rc) { | 395 | if (rc) { |
386 | clocksource_unregister(&clocksource_uv); | 396 | x86_platform_ipi_callback = NULL; |
387 | generic_interrupt_extension = NULL; | ||
388 | uv_rtc_deallocate_timers(); | 397 | uv_rtc_deallocate_timers(); |
398 | goto error; | ||
389 | } | 399 | } |
390 | 400 | ||
401 | printk(KERN_INFO "UV RTC clockevents registered\n"); | ||
402 | |||
403 | return 0; | ||
404 | |||
405 | error: | ||
406 | clocksource_unregister(&clocksource_uv); | ||
407 | printk(KERN_INFO "UV RTC clockevents failed rc %d\n", rc); | ||
408 | |||
391 | return rc; | 409 | return rc; |
392 | } | 410 | } |
393 | arch_initcall(uv_rtc_setup_clock); | 411 | arch_initcall(uv_rtc_setup_clock); |
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index abda6f53e71e..34a279a7471d 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
@@ -197,7 +197,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
197 | apic_version[m->apicid] = ver; | 197 | apic_version[m->apicid] = ver; |
198 | } | 198 | } |
199 | 199 | ||
200 | static void __init visws_find_smp_config(unsigned int reserve) | 200 | static void __init visws_find_smp_config(void) |
201 | { | 201 | { |
202 | struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); | 202 | struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); |
203 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); | 203 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); |
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 611b9e2360d3..74c92bb194df 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c | |||
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void) | |||
226 | evt->min_delta_ns = clockevent_delta2ns(1, evt); | 226 | evt->min_delta_ns = clockevent_delta2ns(1, evt); |
227 | evt->cpumask = cpumask_of(cpu); | 227 | evt->cpumask = cpumask_of(cpu); |
228 | 228 | ||
229 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", | 229 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%u shift=%u\n", |
230 | evt->name, evt->mult, evt->shift); | 230 | evt->name, evt->mult, evt->shift); |
231 | clockevents_register_device(evt); | 231 | clockevents_register_device(evt); |
232 | } | 232 | } |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3c68fe2d46cf..f3f2104408d9 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -41,6 +41,32 @@ ENTRY(phys_startup_64) | |||
41 | jiffies_64 = jiffies; | 41 | jiffies_64 = jiffies; |
42 | #endif | 42 | #endif |
43 | 43 | ||
44 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | ||
45 | /* | ||
46 | * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA | ||
47 | * we retain large page mappings for boundaries spanning kernel text, rodata | ||
48 | * and data sections. | ||
49 | * | ||
50 | * However, kernel identity mappings will have different RWX permissions | ||
51 | * to the pages mapping to text and to the pages padding (which are freed) the | ||
52 | * text section. Hence kernel identity mappings will be broken to smaller | ||
53 | * pages. For 64-bit, kernel text and kernel identity mappings are different, | ||
54 | * so we can enable protection checks that come with CONFIG_DEBUG_RODATA, | ||
55 | * as well as retain 2MB large page mappings for kernel text. | ||
56 | */ | ||
57 | #define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); | ||
58 | |||
59 | #define X64_ALIGN_DEBUG_RODATA_END \ | ||
60 | . = ALIGN(HPAGE_SIZE); \ | ||
61 | __end_rodata_hpage_align = .; | ||
62 | |||
63 | #else | ||
64 | |||
65 | #define X64_ALIGN_DEBUG_RODATA_BEGIN | ||
66 | #define X64_ALIGN_DEBUG_RODATA_END | ||
67 | |||
68 | #endif | ||
69 | |||
44 | PHDRS { | 70 | PHDRS { |
45 | text PT_LOAD FLAGS(5); /* R_E */ | 71 | text PT_LOAD FLAGS(5); /* R_E */ |
46 | data PT_LOAD FLAGS(7); /* RWE */ | 72 | data PT_LOAD FLAGS(7); /* RWE */ |
@@ -90,7 +116,9 @@ SECTIONS | |||
90 | 116 | ||
91 | EXCEPTION_TABLE(16) :text = 0x9090 | 117 | EXCEPTION_TABLE(16) :text = 0x9090 |
92 | 118 | ||
119 | X64_ALIGN_DEBUG_RODATA_BEGIN | ||
93 | RO_DATA(PAGE_SIZE) | 120 | RO_DATA(PAGE_SIZE) |
121 | X64_ALIGN_DEBUG_RODATA_END | ||
94 | 122 | ||
95 | /* Data */ | 123 | /* Data */ |
96 | .data : AT(ADDR(.data) - LOAD_OFFSET) { | 124 | .data : AT(ADDR(.data) - LOAD_OFFSET) { |
@@ -107,13 +135,13 @@ SECTIONS | |||
107 | 135 | ||
108 | PAGE_ALIGNED_DATA(PAGE_SIZE) | 136 | PAGE_ALIGNED_DATA(PAGE_SIZE) |
109 | 137 | ||
110 | CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) | 138 | CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) |
111 | 139 | ||
112 | DATA_DATA | 140 | DATA_DATA |
113 | CONSTRUCTORS | 141 | CONSTRUCTORS |
114 | 142 | ||
115 | /* rarely changed data like cpu maps */ | 143 | /* rarely changed data like cpu maps */ |
116 | READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES) | 144 | READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES) |
117 | 145 | ||
118 | /* End of data section */ | 146 | /* End of data section */ |
119 | _edata = .; | 147 | _edata = .; |
@@ -137,12 +165,12 @@ SECTIONS | |||
137 | *(.vsyscall_0) | 165 | *(.vsyscall_0) |
138 | } :user | 166 | } :user |
139 | 167 | ||
140 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 168 | . = ALIGN(L1_CACHE_BYTES); |
141 | .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { | 169 | .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { |
142 | *(.vsyscall_fn) | 170 | *(.vsyscall_fn) |
143 | } | 171 | } |
144 | 172 | ||
145 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 173 | . = ALIGN(L1_CACHE_BYTES); |
146 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { | 174 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { |
147 | *(.vsyscall_gtod_data) | 175 | *(.vsyscall_gtod_data) |
148 | } | 176 | } |
@@ -166,7 +194,7 @@ SECTIONS | |||
166 | } | 194 | } |
167 | vgetcpu_mode = VVIRT(.vgetcpu_mode); | 195 | vgetcpu_mode = VVIRT(.vgetcpu_mode); |
168 | 196 | ||
169 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 197 | . = ALIGN(L1_CACHE_BYTES); |
170 | .jiffies : AT(VLOAD(.jiffies)) { | 198 | .jiffies : AT(VLOAD(.jiffies)) { |
171 | *(.jiffies) | 199 | *(.jiffies) |
172 | } | 200 | } |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 8cb4974ff599..9055e5872ff0 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -73,7 +73,8 @@ void update_vsyscall_tz(void) | |||
73 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | 73 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
74 | } | 74 | } |
75 | 75 | ||
76 | void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) | 76 | void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, |
77 | u32 mult) | ||
77 | { | 78 | { |
78 | unsigned long flags; | 79 | unsigned long flags; |
79 | 80 | ||
@@ -82,7 +83,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) | |||
82 | vsyscall_gtod_data.clock.vread = clock->vread; | 83 | vsyscall_gtod_data.clock.vread = clock->vread; |
83 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; | 84 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; |
84 | vsyscall_gtod_data.clock.mask = clock->mask; | 85 | vsyscall_gtod_data.clock.mask = clock->mask; |
85 | vsyscall_gtod_data.clock.mult = clock->mult; | 86 | vsyscall_gtod_data.clock.mult = mult; |
86 | vsyscall_gtod_data.clock.shift = clock->shift; | 87 | vsyscall_gtod_data.clock.shift = clock->shift; |
87 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; | 88 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; |
88 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | 89 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; |
@@ -237,7 +238,7 @@ static ctl_table kernel_table2[] = { | |||
237 | }; | 238 | }; |
238 | 239 | ||
239 | static ctl_table kernel_root_table2[] = { | 240 | static ctl_table kernel_root_table2[] = { |
240 | { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, | 241 | { .procname = "kernel", .mode = 0555, |
241 | .child = kernel_table2 }, | 242 | .child = kernel_table2 }, |
242 | {} | 243 | {} |
243 | }; | 244 | }; |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index d11c5ff7c65e..ccd179dec36e 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <asm/e820.h> | 13 | #include <asm/e820.h> |
14 | #include <asm/time.h> | 14 | #include <asm/time.h> |
15 | #include <asm/irq.h> | 15 | #include <asm/irq.h> |
16 | #include <asm/pat.h> | ||
16 | #include <asm/tsc.h> | 17 | #include <asm/tsc.h> |
17 | #include <asm/iommu.h> | 18 | #include <asm/iommu.h> |
18 | 19 | ||
@@ -80,4 +81,5 @@ struct x86_platform_ops x86_platform = { | |||
80 | .get_wallclock = mach_get_cmos_time, | 81 | .get_wallclock = mach_get_cmos_time, |
81 | .set_wallclock = mach_set_rtc_mmss, | 82 | .set_wallclock = mach_set_rtc_mmss, |
82 | .iommu_shutdown = iommu_shutdown_noop, | 83 | .iommu_shutdown = iommu_shutdown_noop, |
84 | .is_untracked_pat_range = is_ISA_range, | ||
83 | }; | 85 | }; |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b84e571f4175..4cd498332466 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -28,6 +28,7 @@ config KVM | |||
28 | select HAVE_KVM_IRQCHIP | 28 | select HAVE_KVM_IRQCHIP |
29 | select HAVE_KVM_EVENTFD | 29 | select HAVE_KVM_EVENTFD |
30 | select KVM_APIC_ARCHITECTURE | 30 | select KVM_APIC_ARCHITECTURE |
31 | select USER_RETURN_NOTIFIER | ||
31 | ---help--- | 32 | ---help--- |
32 | Support hosting fully virtualized guest machines using hardware | 33 | Support hosting fully virtualized guest machines using hardware |
33 | virtualization extensions. You will need a fairly recent | 34 | virtualization extensions. You will need a fairly recent |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 0e7fe78d0f74..31a7035c4bd9 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -6,7 +6,8 @@ CFLAGS_svm.o := -I. | |||
6 | CFLAGS_vmx.o := -I. | 6 | CFLAGS_vmx.o := -I. |
7 | 7 | ||
8 | kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | 8 | kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ |
9 | coalesced_mmio.o irq_comm.o eventfd.o) | 9 | coalesced_mmio.o irq_comm.o eventfd.o \ |
10 | assigned-dev.o) | ||
10 | kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) | 11 | kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) |
11 | 12 | ||
12 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 13 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1be5cd640e93..7e8faea4651e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -75,6 +75,8 @@ | |||
75 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | 75 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ |
76 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | 76 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ |
77 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ | 77 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ |
78 | /* Misc flags */ | ||
79 | #define No64 (1<<28) | ||
78 | /* Source 2 operand type */ | 80 | /* Source 2 operand type */ |
79 | #define Src2None (0<<29) | 81 | #define Src2None (0<<29) |
80 | #define Src2CL (1<<29) | 82 | #define Src2CL (1<<29) |
@@ -92,19 +94,23 @@ static u32 opcode_table[256] = { | |||
92 | /* 0x00 - 0x07 */ | 94 | /* 0x00 - 0x07 */ |
93 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 95 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
94 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 96 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
95 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, | 97 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
98 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | ||
96 | /* 0x08 - 0x0F */ | 99 | /* 0x08 - 0x0F */ |
97 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 100 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
98 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 101 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
99 | 0, 0, 0, 0, | 102 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
103 | ImplicitOps | Stack | No64, 0, | ||
100 | /* 0x10 - 0x17 */ | 104 | /* 0x10 - 0x17 */ |
101 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 105 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
102 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 106 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
103 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, | 107 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
108 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | ||
104 | /* 0x18 - 0x1F */ | 109 | /* 0x18 - 0x1F */ |
105 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 110 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
106 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 111 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
107 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, | 112 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
113 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | ||
108 | /* 0x20 - 0x27 */ | 114 | /* 0x20 - 0x27 */ |
109 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 115 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
110 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 116 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
@@ -133,7 +139,8 @@ static u32 opcode_table[256] = { | |||
133 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, | 139 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, |
134 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, | 140 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, |
135 | /* 0x60 - 0x67 */ | 141 | /* 0x60 - 0x67 */ |
136 | 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , | 142 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, |
143 | 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , | ||
137 | 0, 0, 0, 0, | 144 | 0, 0, 0, 0, |
138 | /* 0x68 - 0x6F */ | 145 | /* 0x68 - 0x6F */ |
139 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, | 146 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, |
@@ -158,7 +165,7 @@ static u32 opcode_table[256] = { | |||
158 | /* 0x90 - 0x97 */ | 165 | /* 0x90 - 0x97 */ |
159 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, | 166 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, |
160 | /* 0x98 - 0x9F */ | 167 | /* 0x98 - 0x9F */ |
161 | 0, 0, SrcImm | Src2Imm16, 0, | 168 | 0, 0, SrcImm | Src2Imm16 | No64, 0, |
162 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, | 169 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, |
163 | /* 0xA0 - 0xA7 */ | 170 | /* 0xA0 - 0xA7 */ |
164 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 171 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, |
@@ -185,7 +192,7 @@ static u32 opcode_table[256] = { | |||
185 | ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, | 192 | ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, |
186 | /* 0xC8 - 0xCF */ | 193 | /* 0xC8 - 0xCF */ |
187 | 0, 0, 0, ImplicitOps | Stack, | 194 | 0, 0, 0, ImplicitOps | Stack, |
188 | ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps, | 195 | ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps, |
189 | /* 0xD0 - 0xD7 */ | 196 | /* 0xD0 - 0xD7 */ |
190 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, | 197 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, |
191 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, | 198 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, |
@@ -198,7 +205,7 @@ static u32 opcode_table[256] = { | |||
198 | ByteOp | SrcImmUByte, SrcImmUByte, | 205 | ByteOp | SrcImmUByte, SrcImmUByte, |
199 | /* 0xE8 - 0xEF */ | 206 | /* 0xE8 - 0xEF */ |
200 | SrcImm | Stack, SrcImm | ImplicitOps, | 207 | SrcImm | Stack, SrcImm | ImplicitOps, |
201 | SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps, | 208 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, |
202 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 209 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
203 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 210 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
204 | /* 0xF0 - 0xF7 */ | 211 | /* 0xF0 - 0xF7 */ |
@@ -244,11 +251,13 @@ static u32 twobyte_table[256] = { | |||
244 | /* 0x90 - 0x9F */ | 251 | /* 0x90 - 0x9F */ |
245 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 252 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
246 | /* 0xA0 - 0xA7 */ | 253 | /* 0xA0 - 0xA7 */ |
247 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, | 254 | ImplicitOps | Stack, ImplicitOps | Stack, |
255 | 0, DstMem | SrcReg | ModRM | BitOp, | ||
248 | DstMem | SrcReg | Src2ImmByte | ModRM, | 256 | DstMem | SrcReg | Src2ImmByte | ModRM, |
249 | DstMem | SrcReg | Src2CL | ModRM, 0, 0, | 257 | DstMem | SrcReg | Src2CL | ModRM, 0, 0, |
250 | /* 0xA8 - 0xAF */ | 258 | /* 0xA8 - 0xAF */ |
251 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, | 259 | ImplicitOps | Stack, ImplicitOps | Stack, |
260 | 0, DstMem | SrcReg | ModRM | BitOp, | ||
252 | DstMem | SrcReg | Src2ImmByte | ModRM, | 261 | DstMem | SrcReg | Src2ImmByte | ModRM, |
253 | DstMem | SrcReg | Src2CL | ModRM, | 262 | DstMem | SrcReg | Src2CL | ModRM, |
254 | ModRM, 0, | 263 | ModRM, 0, |
@@ -613,6 +622,9 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | |||
613 | { | 622 | { |
614 | int rc = 0; | 623 | int rc = 0; |
615 | 624 | ||
625 | /* x86 instructions are limited to 15 bytes. */ | ||
626 | if (eip + size - ctxt->decode.eip_orig > 15) | ||
627 | return X86EMUL_UNHANDLEABLE; | ||
616 | eip += ctxt->cs_base; | 628 | eip += ctxt->cs_base; |
617 | while (size--) { | 629 | while (size--) { |
618 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); | 630 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); |
@@ -871,7 +883,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
871 | /* Shadow copy of register state. Committed on successful emulation. */ | 883 | /* Shadow copy of register state. Committed on successful emulation. */ |
872 | 884 | ||
873 | memset(c, 0, sizeof(struct decode_cache)); | 885 | memset(c, 0, sizeof(struct decode_cache)); |
874 | c->eip = kvm_rip_read(ctxt->vcpu); | 886 | c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu); |
875 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | 887 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); |
876 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 888 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
877 | 889 | ||
@@ -962,6 +974,11 @@ done_prefixes: | |||
962 | } | 974 | } |
963 | } | 975 | } |
964 | 976 | ||
977 | if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
978 | kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");; | ||
979 | return -1; | ||
980 | } | ||
981 | |||
965 | if (c->d & Group) { | 982 | if (c->d & Group) { |
966 | group = c->d & GroupMask; | 983 | group = c->d & GroupMask; |
967 | c->modrm = insn_fetch(u8, 1, c->eip); | 984 | c->modrm = insn_fetch(u8, 1, c->eip); |
@@ -1186,6 +1203,69 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
1186 | return rc; | 1203 | return rc; |
1187 | } | 1204 | } |
1188 | 1205 | ||
1206 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) | ||
1207 | { | ||
1208 | struct decode_cache *c = &ctxt->decode; | ||
1209 | struct kvm_segment segment; | ||
1210 | |||
1211 | kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg); | ||
1212 | |||
1213 | c->src.val = segment.selector; | ||
1214 | emulate_push(ctxt); | ||
1215 | } | ||
1216 | |||
1217 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | ||
1218 | struct x86_emulate_ops *ops, int seg) | ||
1219 | { | ||
1220 | struct decode_cache *c = &ctxt->decode; | ||
1221 | unsigned long selector; | ||
1222 | int rc; | ||
1223 | |||
1224 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); | ||
1225 | if (rc != 0) | ||
1226 | return rc; | ||
1227 | |||
1228 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg); | ||
1229 | return rc; | ||
1230 | } | ||
1231 | |||
1232 | static void emulate_pusha(struct x86_emulate_ctxt *ctxt) | ||
1233 | { | ||
1234 | struct decode_cache *c = &ctxt->decode; | ||
1235 | unsigned long old_esp = c->regs[VCPU_REGS_RSP]; | ||
1236 | int reg = VCPU_REGS_RAX; | ||
1237 | |||
1238 | while (reg <= VCPU_REGS_RDI) { | ||
1239 | (reg == VCPU_REGS_RSP) ? | ||
1240 | (c->src.val = old_esp) : (c->src.val = c->regs[reg]); | ||
1241 | |||
1242 | emulate_push(ctxt); | ||
1243 | ++reg; | ||
1244 | } | ||
1245 | } | ||
1246 | |||
1247 | static int emulate_popa(struct x86_emulate_ctxt *ctxt, | ||
1248 | struct x86_emulate_ops *ops) | ||
1249 | { | ||
1250 | struct decode_cache *c = &ctxt->decode; | ||
1251 | int rc = 0; | ||
1252 | int reg = VCPU_REGS_RDI; | ||
1253 | |||
1254 | while (reg >= VCPU_REGS_RAX) { | ||
1255 | if (reg == VCPU_REGS_RSP) { | ||
1256 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], | ||
1257 | c->op_bytes); | ||
1258 | --reg; | ||
1259 | } | ||
1260 | |||
1261 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); | ||
1262 | if (rc != 0) | ||
1263 | break; | ||
1264 | --reg; | ||
1265 | } | ||
1266 | return rc; | ||
1267 | } | ||
1268 | |||
1189 | static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | 1269 | static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, |
1190 | struct x86_emulate_ops *ops) | 1270 | struct x86_emulate_ops *ops) |
1191 | { | 1271 | { |
@@ -1707,18 +1787,45 @@ special_insn: | |||
1707 | add: /* add */ | 1787 | add: /* add */ |
1708 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); | 1788 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); |
1709 | break; | 1789 | break; |
1790 | case 0x06: /* push es */ | ||
1791 | emulate_push_sreg(ctxt, VCPU_SREG_ES); | ||
1792 | break; | ||
1793 | case 0x07: /* pop es */ | ||
1794 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | ||
1795 | if (rc != 0) | ||
1796 | goto done; | ||
1797 | break; | ||
1710 | case 0x08 ... 0x0d: | 1798 | case 0x08 ... 0x0d: |
1711 | or: /* or */ | 1799 | or: /* or */ |
1712 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); | 1800 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); |
1713 | break; | 1801 | break; |
1802 | case 0x0e: /* push cs */ | ||
1803 | emulate_push_sreg(ctxt, VCPU_SREG_CS); | ||
1804 | break; | ||
1714 | case 0x10 ... 0x15: | 1805 | case 0x10 ... 0x15: |
1715 | adc: /* adc */ | 1806 | adc: /* adc */ |
1716 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); | 1807 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); |
1717 | break; | 1808 | break; |
1809 | case 0x16: /* push ss */ | ||
1810 | emulate_push_sreg(ctxt, VCPU_SREG_SS); | ||
1811 | break; | ||
1812 | case 0x17: /* pop ss */ | ||
1813 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | ||
1814 | if (rc != 0) | ||
1815 | goto done; | ||
1816 | break; | ||
1718 | case 0x18 ... 0x1d: | 1817 | case 0x18 ... 0x1d: |
1719 | sbb: /* sbb */ | 1818 | sbb: /* sbb */ |
1720 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); | 1819 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); |
1721 | break; | 1820 | break; |
1821 | case 0x1e: /* push ds */ | ||
1822 | emulate_push_sreg(ctxt, VCPU_SREG_DS); | ||
1823 | break; | ||
1824 | case 0x1f: /* pop ds */ | ||
1825 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | ||
1826 | if (rc != 0) | ||
1827 | goto done; | ||
1828 | break; | ||
1722 | case 0x20 ... 0x25: | 1829 | case 0x20 ... 0x25: |
1723 | and: /* and */ | 1830 | and: /* and */ |
1724 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); | 1831 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); |
@@ -1750,6 +1857,14 @@ special_insn: | |||
1750 | if (rc != 0) | 1857 | if (rc != 0) |
1751 | goto done; | 1858 | goto done; |
1752 | break; | 1859 | break; |
1860 | case 0x60: /* pusha */ | ||
1861 | emulate_pusha(ctxt); | ||
1862 | break; | ||
1863 | case 0x61: /* popa */ | ||
1864 | rc = emulate_popa(ctxt, ops); | ||
1865 | if (rc != 0) | ||
1866 | goto done; | ||
1867 | break; | ||
1753 | case 0x63: /* movsxd */ | 1868 | case 0x63: /* movsxd */ |
1754 | if (ctxt->mode != X86EMUL_MODE_PROT64) | 1869 | if (ctxt->mode != X86EMUL_MODE_PROT64) |
1755 | goto cannot_emulate; | 1870 | goto cannot_emulate; |
@@ -1761,7 +1876,7 @@ special_insn: | |||
1761 | break; | 1876 | break; |
1762 | case 0x6c: /* insb */ | 1877 | case 0x6c: /* insb */ |
1763 | case 0x6d: /* insw/insd */ | 1878 | case 0x6d: /* insw/insd */ |
1764 | if (kvm_emulate_pio_string(ctxt->vcpu, NULL, | 1879 | if (kvm_emulate_pio_string(ctxt->vcpu, |
1765 | 1, | 1880 | 1, |
1766 | (c->d & ByteOp) ? 1 : c->op_bytes, | 1881 | (c->d & ByteOp) ? 1 : c->op_bytes, |
1767 | c->rep_prefix ? | 1882 | c->rep_prefix ? |
@@ -1777,7 +1892,7 @@ special_insn: | |||
1777 | return 0; | 1892 | return 0; |
1778 | case 0x6e: /* outsb */ | 1893 | case 0x6e: /* outsb */ |
1779 | case 0x6f: /* outsw/outsd */ | 1894 | case 0x6f: /* outsw/outsd */ |
1780 | if (kvm_emulate_pio_string(ctxt->vcpu, NULL, | 1895 | if (kvm_emulate_pio_string(ctxt->vcpu, |
1781 | 0, | 1896 | 0, |
1782 | (c->d & ByteOp) ? 1 : c->op_bytes, | 1897 | (c->d & ByteOp) ? 1 : c->op_bytes, |
1783 | c->rep_prefix ? | 1898 | c->rep_prefix ? |
@@ -2070,7 +2185,7 @@ special_insn: | |||
2070 | case 0xef: /* out (e/r)ax,dx */ | 2185 | case 0xef: /* out (e/r)ax,dx */ |
2071 | port = c->regs[VCPU_REGS_RDX]; | 2186 | port = c->regs[VCPU_REGS_RDX]; |
2072 | io_dir_in = 0; | 2187 | io_dir_in = 0; |
2073 | do_io: if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in, | 2188 | do_io: if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, |
2074 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2189 | (c->d & ByteOp) ? 1 : c->op_bytes, |
2075 | port) != 0) { | 2190 | port) != 0) { |
2076 | c->eip = saved_eip; | 2191 | c->eip = saved_eip; |
@@ -2297,6 +2412,14 @@ twobyte_insn: | |||
2297 | jmp_rel(c, c->src.val); | 2412 | jmp_rel(c, c->src.val); |
2298 | c->dst.type = OP_NONE; | 2413 | c->dst.type = OP_NONE; |
2299 | break; | 2414 | break; |
2415 | case 0xa0: /* push fs */ | ||
2416 | emulate_push_sreg(ctxt, VCPU_SREG_FS); | ||
2417 | break; | ||
2418 | case 0xa1: /* pop fs */ | ||
2419 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | ||
2420 | if (rc != 0) | ||
2421 | goto done; | ||
2422 | break; | ||
2300 | case 0xa3: | 2423 | case 0xa3: |
2301 | bt: /* bt */ | 2424 | bt: /* bt */ |
2302 | c->dst.type = OP_NONE; | 2425 | c->dst.type = OP_NONE; |
@@ -2308,6 +2431,14 @@ twobyte_insn: | |||
2308 | case 0xa5: /* shld cl, r, r/m */ | 2431 | case 0xa5: /* shld cl, r, r/m */ |
2309 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); | 2432 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); |
2310 | break; | 2433 | break; |
2434 | case 0xa8: /* push gs */ | ||
2435 | emulate_push_sreg(ctxt, VCPU_SREG_GS); | ||
2436 | break; | ||
2437 | case 0xa9: /* pop gs */ | ||
2438 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | ||
2439 | if (rc != 0) | ||
2440 | goto done; | ||
2441 | break; | ||
2311 | case 0xab: | 2442 | case 0xab: |
2312 | bts: /* bts */ | 2443 | bts: /* bts */ |
2313 | /* only subword offset */ | 2444 | /* only subword offset */ |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 144e7f60b5e2..fab7440c9bb2 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -688,10 +688,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm) | |||
688 | struct kvm_vcpu *vcpu; | 688 | struct kvm_vcpu *vcpu; |
689 | int i; | 689 | int i; |
690 | 690 | ||
691 | mutex_lock(&kvm->irq_lock); | ||
692 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); | 691 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); |
693 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); | 692 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); |
694 | mutex_unlock(&kvm->irq_lock); | ||
695 | 693 | ||
696 | /* | 694 | /* |
697 | * Provides NMI watchdog support via Virtual Wire mode. | 695 | * Provides NMI watchdog support via Virtual Wire mode. |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 01f151682802..d057c0cbd245 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -38,7 +38,15 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | |||
38 | s->isr_ack |= (1 << irq); | 38 | s->isr_ack |= (1 << irq); |
39 | if (s != &s->pics_state->pics[0]) | 39 | if (s != &s->pics_state->pics[0]) |
40 | irq += 8; | 40 | irq += 8; |
41 | /* | ||
42 | * We are dropping lock while calling ack notifiers since ack | ||
43 | * notifier callbacks for assigned devices call into PIC recursively. | ||
44 | * Other interrupt may be delivered to PIC while lock is dropped but | ||
45 | * it should be safe since PIC state is already updated at this stage. | ||
46 | */ | ||
47 | spin_unlock(&s->pics_state->lock); | ||
41 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); | 48 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); |
49 | spin_lock(&s->pics_state->lock); | ||
42 | } | 50 | } |
43 | 51 | ||
44 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | 52 | void kvm_pic_clear_isr_ack(struct kvm *kvm) |
@@ -176,16 +184,18 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) | |||
176 | static inline void pic_intack(struct kvm_kpic_state *s, int irq) | 184 | static inline void pic_intack(struct kvm_kpic_state *s, int irq) |
177 | { | 185 | { |
178 | s->isr |= 1 << irq; | 186 | s->isr |= 1 << irq; |
179 | if (s->auto_eoi) { | ||
180 | if (s->rotate_on_auto_eoi) | ||
181 | s->priority_add = (irq + 1) & 7; | ||
182 | pic_clear_isr(s, irq); | ||
183 | } | ||
184 | /* | 187 | /* |
185 | * We don't clear a level sensitive interrupt here | 188 | * We don't clear a level sensitive interrupt here |
186 | */ | 189 | */ |
187 | if (!(s->elcr & (1 << irq))) | 190 | if (!(s->elcr & (1 << irq))) |
188 | s->irr &= ~(1 << irq); | 191 | s->irr &= ~(1 << irq); |
192 | |||
193 | if (s->auto_eoi) { | ||
194 | if (s->rotate_on_auto_eoi) | ||
195 | s->priority_add = (irq + 1) & 7; | ||
196 | pic_clear_isr(s, irq); | ||
197 | } | ||
198 | |||
189 | } | 199 | } |
190 | 200 | ||
191 | int kvm_pic_read_irq(struct kvm *kvm) | 201 | int kvm_pic_read_irq(struct kvm *kvm) |
@@ -225,22 +235,11 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
225 | 235 | ||
226 | void kvm_pic_reset(struct kvm_kpic_state *s) | 236 | void kvm_pic_reset(struct kvm_kpic_state *s) |
227 | { | 237 | { |
228 | int irq, irqbase, n; | 238 | int irq; |
229 | struct kvm *kvm = s->pics_state->irq_request_opaque; | 239 | struct kvm *kvm = s->pics_state->irq_request_opaque; |
230 | struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu; | 240 | struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu; |
241 | u8 irr = s->irr, isr = s->imr; | ||
231 | 242 | ||
232 | if (s == &s->pics_state->pics[0]) | ||
233 | irqbase = 0; | ||
234 | else | ||
235 | irqbase = 8; | ||
236 | |||
237 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { | ||
238 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) | ||
239 | if (s->irr & (1 << irq) || s->isr & (1 << irq)) { | ||
240 | n = irq + irqbase; | ||
241 | kvm_notify_acked_irq(kvm, SELECT_PIC(n), n); | ||
242 | } | ||
243 | } | ||
244 | s->last_irr = 0; | 243 | s->last_irr = 0; |
245 | s->irr = 0; | 244 | s->irr = 0; |
246 | s->imr = 0; | 245 | s->imr = 0; |
@@ -256,6 +255,13 @@ void kvm_pic_reset(struct kvm_kpic_state *s) | |||
256 | s->rotate_on_auto_eoi = 0; | 255 | s->rotate_on_auto_eoi = 0; |
257 | s->special_fully_nested_mode = 0; | 256 | s->special_fully_nested_mode = 0; |
258 | s->init4 = 0; | 257 | s->init4 = 0; |
258 | |||
259 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { | ||
260 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) | ||
261 | if (irr & (1 << irq) || isr & (1 << irq)) { | ||
262 | pic_clear_isr(s, irq); | ||
263 | } | ||
264 | } | ||
259 | } | 265 | } |
260 | 266 | ||
261 | static void pic_ioport_write(void *opaque, u32 addr, u32 val) | 267 | static void pic_ioport_write(void *opaque, u32 addr, u32 val) |
@@ -298,9 +304,9 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
298 | priority = get_priority(s, s->isr); | 304 | priority = get_priority(s, s->isr); |
299 | if (priority != 8) { | 305 | if (priority != 8) { |
300 | irq = (priority + s->priority_add) & 7; | 306 | irq = (priority + s->priority_add) & 7; |
301 | pic_clear_isr(s, irq); | ||
302 | if (cmd == 5) | 307 | if (cmd == 5) |
303 | s->priority_add = (irq + 1) & 7; | 308 | s->priority_add = (irq + 1) & 7; |
309 | pic_clear_isr(s, irq); | ||
304 | pic_update_irq(s->pics_state); | 310 | pic_update_irq(s->pics_state); |
305 | } | 311 | } |
306 | break; | 312 | break; |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 7d6058a2fd38..be399e207d57 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -71,6 +71,7 @@ struct kvm_pic { | |||
71 | int output; /* intr from master PIC */ | 71 | int output; /* intr from master PIC */ |
72 | struct kvm_io_device dev; | 72 | struct kvm_io_device dev; |
73 | void (*ack_notifier)(void *opaque, int irq); | 73 | void (*ack_notifier)(void *opaque, int irq); |
74 | unsigned long irq_states[16]; | ||
74 | }; | 75 | }; |
75 | 76 | ||
76 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); | 77 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); |
@@ -85,7 +86,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) | |||
85 | 86 | ||
86 | static inline int irqchip_in_kernel(struct kvm *kvm) | 87 | static inline int irqchip_in_kernel(struct kvm *kvm) |
87 | { | 88 | { |
88 | return pic_irqchip(kvm) != NULL; | 89 | int ret; |
90 | |||
91 | ret = (pic_irqchip(kvm) != NULL); | ||
92 | smp_rmb(); | ||
93 | return ret; | ||
89 | } | 94 | } |
90 | 95 | ||
91 | void kvm_pic_reset(struct kvm_kpic_state *s); | 96 | void kvm_pic_reset(struct kvm_kpic_state *s); |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 23c217692ea9..cd60c0bd1b32 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -32,7 +32,6 @@ | |||
32 | #include <asm/current.h> | 32 | #include <asm/current.h> |
33 | #include <asm/apicdef.h> | 33 | #include <asm/apicdef.h> |
34 | #include <asm/atomic.h> | 34 | #include <asm/atomic.h> |
35 | #include <asm/apicdef.h> | ||
36 | #include "kvm_cache_regs.h" | 35 | #include "kvm_cache_regs.h" |
37 | #include "irq.h" | 36 | #include "irq.h" |
38 | #include "trace.h" | 37 | #include "trace.h" |
@@ -471,11 +470,8 @@ static void apic_set_eoi(struct kvm_lapic *apic) | |||
471 | trigger_mode = IOAPIC_LEVEL_TRIG; | 470 | trigger_mode = IOAPIC_LEVEL_TRIG; |
472 | else | 471 | else |
473 | trigger_mode = IOAPIC_EDGE_TRIG; | 472 | trigger_mode = IOAPIC_EDGE_TRIG; |
474 | if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) { | 473 | if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) |
475 | mutex_lock(&apic->vcpu->kvm->irq_lock); | ||
476 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | 474 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); |
477 | mutex_unlock(&apic->vcpu->kvm->irq_lock); | ||
478 | } | ||
479 | } | 475 | } |
480 | 476 | ||
481 | static void apic_send_ipi(struct kvm_lapic *apic) | 477 | static void apic_send_ipi(struct kvm_lapic *apic) |
@@ -504,9 +500,7 @@ static void apic_send_ipi(struct kvm_lapic *apic) | |||
504 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, | 500 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, |
505 | irq.vector); | 501 | irq.vector); |
506 | 502 | ||
507 | mutex_lock(&apic->vcpu->kvm->irq_lock); | ||
508 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); | 503 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); |
509 | mutex_unlock(&apic->vcpu->kvm->irq_lock); | ||
510 | } | 504 | } |
511 | 505 | ||
512 | static u32 apic_get_tmcct(struct kvm_lapic *apic) | 506 | static u32 apic_get_tmcct(struct kvm_lapic *apic) |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 818b92ad82cf..4c3e5b2314cb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -2789,7 +2789,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
2789 | if (r) | 2789 | if (r) |
2790 | goto out; | 2790 | goto out; |
2791 | 2791 | ||
2792 | er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0); | 2792 | er = emulate_instruction(vcpu, cr2, error_code, 0); |
2793 | 2793 | ||
2794 | switch (er) { | 2794 | switch (er) { |
2795 | case EMULATE_DONE: | 2795 | case EMULATE_DONE: |
@@ -2800,6 +2800,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
2800 | case EMULATE_FAIL: | 2800 | case EMULATE_FAIL: |
2801 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 2801 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
2802 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 2802 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
2803 | vcpu->run->internal.ndata = 0; | ||
2803 | return 0; | 2804 | return 0; |
2804 | default: | 2805 | default: |
2805 | BUG(); | 2806 | BUG(); |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 72558f8ff3f5..a6017132fba8 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -467,7 +467,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
467 | level = iterator.level; | 467 | level = iterator.level; |
468 | sptep = iterator.sptep; | 468 | sptep = iterator.sptep; |
469 | 469 | ||
470 | /* FIXME: properly handle invlpg on large guest pages */ | ||
471 | if (level == PT_PAGE_TABLE_LEVEL || | 470 | if (level == PT_PAGE_TABLE_LEVEL || |
472 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || | 471 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || |
473 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { | 472 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c17404add91f..3de0b37ec038 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -46,6 +46,7 @@ MODULE_LICENSE("GPL"); | |||
46 | #define SVM_FEATURE_NPT (1 << 0) | 46 | #define SVM_FEATURE_NPT (1 << 0) |
47 | #define SVM_FEATURE_LBRV (1 << 1) | 47 | #define SVM_FEATURE_LBRV (1 << 1) |
48 | #define SVM_FEATURE_SVML (1 << 2) | 48 | #define SVM_FEATURE_SVML (1 << 2) |
49 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | ||
49 | 50 | ||
50 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ | 51 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ |
51 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ | 52 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ |
@@ -53,15 +54,6 @@ MODULE_LICENSE("GPL"); | |||
53 | 54 | ||
54 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) | 55 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) |
55 | 56 | ||
56 | /* Turn on to get debugging output*/ | ||
57 | /* #define NESTED_DEBUG */ | ||
58 | |||
59 | #ifdef NESTED_DEBUG | ||
60 | #define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args) | ||
61 | #else | ||
62 | #define nsvm_printk(fmt, args...) do {} while(0) | ||
63 | #endif | ||
64 | |||
65 | static const u32 host_save_user_msrs[] = { | 57 | static const u32 host_save_user_msrs[] = { |
66 | #ifdef CONFIG_X86_64 | 58 | #ifdef CONFIG_X86_64 |
67 | MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, | 59 | MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, |
@@ -85,6 +77,9 @@ struct nested_state { | |||
85 | /* gpa pointers to the real vectors */ | 77 | /* gpa pointers to the real vectors */ |
86 | u64 vmcb_msrpm; | 78 | u64 vmcb_msrpm; |
87 | 79 | ||
80 | /* A VMEXIT is required but not yet emulated */ | ||
81 | bool exit_required; | ||
82 | |||
88 | /* cache for intercepts of the guest */ | 83 | /* cache for intercepts of the guest */ |
89 | u16 intercept_cr_read; | 84 | u16 intercept_cr_read; |
90 | u16 intercept_cr_write; | 85 | u16 intercept_cr_write; |
@@ -112,6 +107,8 @@ struct vcpu_svm { | |||
112 | u32 *msrpm; | 107 | u32 *msrpm; |
113 | 108 | ||
114 | struct nested_state nested; | 109 | struct nested_state nested; |
110 | |||
111 | bool nmi_singlestep; | ||
115 | }; | 112 | }; |
116 | 113 | ||
117 | /* enable NPT for AMD64 and X86 with PAE */ | 114 | /* enable NPT for AMD64 and X86 with PAE */ |
@@ -286,7 +283,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
286 | struct vcpu_svm *svm = to_svm(vcpu); | 283 | struct vcpu_svm *svm = to_svm(vcpu); |
287 | 284 | ||
288 | if (!svm->next_rip) { | 285 | if (!svm->next_rip) { |
289 | if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) != | 286 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != |
290 | EMULATE_DONE) | 287 | EMULATE_DONE) |
291 | printk(KERN_DEBUG "%s: NOP\n", __func__); | 288 | printk(KERN_DEBUG "%s: NOP\n", __func__); |
292 | return; | 289 | return; |
@@ -316,7 +313,7 @@ static void svm_hardware_disable(void *garbage) | |||
316 | cpu_svm_disable(); | 313 | cpu_svm_disable(); |
317 | } | 314 | } |
318 | 315 | ||
319 | static void svm_hardware_enable(void *garbage) | 316 | static int svm_hardware_enable(void *garbage) |
320 | { | 317 | { |
321 | 318 | ||
322 | struct svm_cpu_data *svm_data; | 319 | struct svm_cpu_data *svm_data; |
@@ -325,16 +322,21 @@ static void svm_hardware_enable(void *garbage) | |||
325 | struct desc_struct *gdt; | 322 | struct desc_struct *gdt; |
326 | int me = raw_smp_processor_id(); | 323 | int me = raw_smp_processor_id(); |
327 | 324 | ||
325 | rdmsrl(MSR_EFER, efer); | ||
326 | if (efer & EFER_SVME) | ||
327 | return -EBUSY; | ||
328 | |||
328 | if (!has_svm()) { | 329 | if (!has_svm()) { |
329 | printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); | 330 | printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n", |
330 | return; | 331 | me); |
332 | return -EINVAL; | ||
331 | } | 333 | } |
332 | svm_data = per_cpu(svm_data, me); | 334 | svm_data = per_cpu(svm_data, me); |
333 | 335 | ||
334 | if (!svm_data) { | 336 | if (!svm_data) { |
335 | printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", | 337 | printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n", |
336 | me); | 338 | me); |
337 | return; | 339 | return -EINVAL; |
338 | } | 340 | } |
339 | 341 | ||
340 | svm_data->asid_generation = 1; | 342 | svm_data->asid_generation = 1; |
@@ -345,11 +347,12 @@ static void svm_hardware_enable(void *garbage) | |||
345 | gdt = (struct desc_struct *)gdt_descr.base; | 347 | gdt = (struct desc_struct *)gdt_descr.base; |
346 | svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); | 348 | svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); |
347 | 349 | ||
348 | rdmsrl(MSR_EFER, efer); | ||
349 | wrmsrl(MSR_EFER, efer | EFER_SVME); | 350 | wrmsrl(MSR_EFER, efer | EFER_SVME); |
350 | 351 | ||
351 | wrmsrl(MSR_VM_HSAVE_PA, | 352 | wrmsrl(MSR_VM_HSAVE_PA, |
352 | page_to_pfn(svm_data->save_area) << PAGE_SHIFT); | 353 | page_to_pfn(svm_data->save_area) << PAGE_SHIFT); |
354 | |||
355 | return 0; | ||
353 | } | 356 | } |
354 | 357 | ||
355 | static void svm_cpu_uninit(int cpu) | 358 | static void svm_cpu_uninit(int cpu) |
@@ -476,7 +479,7 @@ static __init int svm_hardware_setup(void) | |||
476 | kvm_enable_efer_bits(EFER_SVME); | 479 | kvm_enable_efer_bits(EFER_SVME); |
477 | } | 480 | } |
478 | 481 | ||
479 | for_each_online_cpu(cpu) { | 482 | for_each_possible_cpu(cpu) { |
480 | r = svm_cpu_init(cpu); | 483 | r = svm_cpu_init(cpu); |
481 | if (r) | 484 | if (r) |
482 | goto err; | 485 | goto err; |
@@ -510,7 +513,7 @@ static __exit void svm_hardware_unsetup(void) | |||
510 | { | 513 | { |
511 | int cpu; | 514 | int cpu; |
512 | 515 | ||
513 | for_each_online_cpu(cpu) | 516 | for_each_possible_cpu(cpu) |
514 | svm_cpu_uninit(cpu); | 517 | svm_cpu_uninit(cpu); |
515 | 518 | ||
516 | __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); | 519 | __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); |
@@ -625,11 +628,12 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
625 | save->rip = 0x0000fff0; | 628 | save->rip = 0x0000fff0; |
626 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | 629 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; |
627 | 630 | ||
628 | /* | 631 | /* This is the guest-visible cr0 value. |
629 | * cr0 val on cpu init should be 0x60000010, we enable cpu | 632 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. |
630 | * cache by default. the orderly way is to enable cache in bios. | ||
631 | */ | 633 | */ |
632 | save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; | 634 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
635 | kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); | ||
636 | |||
633 | save->cr4 = X86_CR4_PAE; | 637 | save->cr4 = X86_CR4_PAE; |
634 | /* rdx = ?? */ | 638 | /* rdx = ?? */ |
635 | 639 | ||
@@ -644,8 +648,6 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
644 | control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| | 648 | control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| |
645 | INTERCEPT_CR3_MASK); | 649 | INTERCEPT_CR3_MASK); |
646 | save->g_pat = 0x0007040600070406ULL; | 650 | save->g_pat = 0x0007040600070406ULL; |
647 | /* enable caching because the QEMU Bios doesn't enable it */ | ||
648 | save->cr0 = X86_CR0_ET; | ||
649 | save->cr3 = 0; | 651 | save->cr3 = 0; |
650 | save->cr4 = 0; | 652 | save->cr4 = 0; |
651 | } | 653 | } |
@@ -654,6 +656,11 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
654 | svm->nested.vmcb = 0; | 656 | svm->nested.vmcb = 0; |
655 | svm->vcpu.arch.hflags = 0; | 657 | svm->vcpu.arch.hflags = 0; |
656 | 658 | ||
659 | if (svm_has(SVM_FEATURE_PAUSE_FILTER)) { | ||
660 | control->pause_filter_count = 3000; | ||
661 | control->intercept |= (1ULL << INTERCEPT_PAUSE); | ||
662 | } | ||
663 | |||
657 | enable_gif(svm); | 664 | enable_gif(svm); |
658 | } | 665 | } |
659 | 666 | ||
@@ -758,14 +765,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
758 | int i; | 765 | int i; |
759 | 766 | ||
760 | if (unlikely(cpu != vcpu->cpu)) { | 767 | if (unlikely(cpu != vcpu->cpu)) { |
761 | u64 tsc_this, delta; | 768 | u64 delta; |
762 | 769 | ||
763 | /* | 770 | /* |
764 | * Make sure that the guest sees a monotonically | 771 | * Make sure that the guest sees a monotonically |
765 | * increasing TSC. | 772 | * increasing TSC. |
766 | */ | 773 | */ |
767 | rdtscll(tsc_this); | 774 | delta = vcpu->arch.host_tsc - native_read_tsc(); |
768 | delta = vcpu->arch.host_tsc - tsc_this; | ||
769 | svm->vmcb->control.tsc_offset += delta; | 775 | svm->vmcb->control.tsc_offset += delta; |
770 | if (is_nested(svm)) | 776 | if (is_nested(svm)) |
771 | svm->nested.hsave->control.tsc_offset += delta; | 777 | svm->nested.hsave->control.tsc_offset += delta; |
@@ -787,7 +793,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
787 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 793 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
788 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 794 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
789 | 795 | ||
790 | rdtscll(vcpu->arch.host_tsc); | 796 | vcpu->arch.host_tsc = native_read_tsc(); |
791 | } | 797 | } |
792 | 798 | ||
793 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) | 799 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) |
@@ -1045,7 +1051,7 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) | |||
1045 | svm->vmcb->control.intercept_exceptions &= | 1051 | svm->vmcb->control.intercept_exceptions &= |
1046 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); | 1052 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); |
1047 | 1053 | ||
1048 | if (vcpu->arch.singlestep) | 1054 | if (svm->nmi_singlestep) |
1049 | svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); | 1055 | svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); |
1050 | 1056 | ||
1051 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | 1057 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { |
@@ -1060,26 +1066,16 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) | |||
1060 | vcpu->guest_debug = 0; | 1066 | vcpu->guest_debug = 0; |
1061 | } | 1067 | } |
1062 | 1068 | ||
1063 | static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | 1069 | static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) |
1064 | { | 1070 | { |
1065 | int old_debug = vcpu->guest_debug; | ||
1066 | struct vcpu_svm *svm = to_svm(vcpu); | 1071 | struct vcpu_svm *svm = to_svm(vcpu); |
1067 | 1072 | ||
1068 | vcpu->guest_debug = dbg->control; | ||
1069 | |||
1070 | update_db_intercept(vcpu); | ||
1071 | |||
1072 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1073 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
1073 | svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; | 1074 | svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; |
1074 | else | 1075 | else |
1075 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | 1076 | svm->vmcb->save.dr7 = vcpu->arch.dr7; |
1076 | 1077 | ||
1077 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 1078 | update_db_intercept(vcpu); |
1078 | svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
1079 | else if (old_debug & KVM_GUESTDBG_SINGLESTEP) | ||
1080 | svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
1081 | |||
1082 | return 0; | ||
1083 | } | 1079 | } |
1084 | 1080 | ||
1085 | static void load_host_msrs(struct kvm_vcpu *vcpu) | 1081 | static void load_host_msrs(struct kvm_vcpu *vcpu) |
@@ -1180,7 +1176,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, | |||
1180 | } | 1176 | } |
1181 | } | 1177 | } |
1182 | 1178 | ||
1183 | static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1179 | static int pf_interception(struct vcpu_svm *svm) |
1184 | { | 1180 | { |
1185 | u64 fault_address; | 1181 | u64 fault_address; |
1186 | u32 error_code; | 1182 | u32 error_code; |
@@ -1194,17 +1190,19 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1194 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); | 1190 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); |
1195 | } | 1191 | } |
1196 | 1192 | ||
1197 | static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1193 | static int db_interception(struct vcpu_svm *svm) |
1198 | { | 1194 | { |
1195 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
1196 | |||
1199 | if (!(svm->vcpu.guest_debug & | 1197 | if (!(svm->vcpu.guest_debug & |
1200 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && | 1198 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && |
1201 | !svm->vcpu.arch.singlestep) { | 1199 | !svm->nmi_singlestep) { |
1202 | kvm_queue_exception(&svm->vcpu, DB_VECTOR); | 1200 | kvm_queue_exception(&svm->vcpu, DB_VECTOR); |
1203 | return 1; | 1201 | return 1; |
1204 | } | 1202 | } |
1205 | 1203 | ||
1206 | if (svm->vcpu.arch.singlestep) { | 1204 | if (svm->nmi_singlestep) { |
1207 | svm->vcpu.arch.singlestep = false; | 1205 | svm->nmi_singlestep = false; |
1208 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) | 1206 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) |
1209 | svm->vmcb->save.rflags &= | 1207 | svm->vmcb->save.rflags &= |
1210 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | 1208 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); |
@@ -1223,25 +1221,27 @@ static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1223 | return 1; | 1221 | return 1; |
1224 | } | 1222 | } |
1225 | 1223 | ||
1226 | static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1224 | static int bp_interception(struct vcpu_svm *svm) |
1227 | { | 1225 | { |
1226 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
1227 | |||
1228 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 1228 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
1229 | kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; | 1229 | kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; |
1230 | kvm_run->debug.arch.exception = BP_VECTOR; | 1230 | kvm_run->debug.arch.exception = BP_VECTOR; |
1231 | return 0; | 1231 | return 0; |
1232 | } | 1232 | } |
1233 | 1233 | ||
1234 | static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1234 | static int ud_interception(struct vcpu_svm *svm) |
1235 | { | 1235 | { |
1236 | int er; | 1236 | int er; |
1237 | 1237 | ||
1238 | er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); | 1238 | er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD); |
1239 | if (er != EMULATE_DONE) | 1239 | if (er != EMULATE_DONE) |
1240 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 1240 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
1241 | return 1; | 1241 | return 1; |
1242 | } | 1242 | } |
1243 | 1243 | ||
1244 | static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1244 | static int nm_interception(struct vcpu_svm *svm) |
1245 | { | 1245 | { |
1246 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1246 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); |
1247 | if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) | 1247 | if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) |
@@ -1251,7 +1251,7 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1251 | return 1; | 1251 | return 1; |
1252 | } | 1252 | } |
1253 | 1253 | ||
1254 | static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1254 | static int mc_interception(struct vcpu_svm *svm) |
1255 | { | 1255 | { |
1256 | /* | 1256 | /* |
1257 | * On an #MC intercept the MCE handler is not called automatically in | 1257 | * On an #MC intercept the MCE handler is not called automatically in |
@@ -1264,8 +1264,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1264 | return 1; | 1264 | return 1; |
1265 | } | 1265 | } |
1266 | 1266 | ||
1267 | static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1267 | static int shutdown_interception(struct vcpu_svm *svm) |
1268 | { | 1268 | { |
1269 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
1270 | |||
1269 | /* | 1271 | /* |
1270 | * VMCB is undefined after a SHUTDOWN intercept | 1272 | * VMCB is undefined after a SHUTDOWN intercept |
1271 | * so reinitialize it. | 1273 | * so reinitialize it. |
@@ -1277,7 +1279,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1277 | return 0; | 1279 | return 0; |
1278 | } | 1280 | } |
1279 | 1281 | ||
1280 | static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1282 | static int io_interception(struct vcpu_svm *svm) |
1281 | { | 1283 | { |
1282 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ | 1284 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ |
1283 | int size, in, string; | 1285 | int size, in, string; |
@@ -1291,7 +1293,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1291 | 1293 | ||
1292 | if (string) { | 1294 | if (string) { |
1293 | if (emulate_instruction(&svm->vcpu, | 1295 | if (emulate_instruction(&svm->vcpu, |
1294 | kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) | 1296 | 0, 0, 0) == EMULATE_DO_MMIO) |
1295 | return 0; | 1297 | return 0; |
1296 | return 1; | 1298 | return 1; |
1297 | } | 1299 | } |
@@ -1301,33 +1303,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1301 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1303 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
1302 | 1304 | ||
1303 | skip_emulated_instruction(&svm->vcpu); | 1305 | skip_emulated_instruction(&svm->vcpu); |
1304 | return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); | 1306 | return kvm_emulate_pio(&svm->vcpu, in, size, port); |
1305 | } | 1307 | } |
1306 | 1308 | ||
1307 | static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1309 | static int nmi_interception(struct vcpu_svm *svm) |
1308 | { | 1310 | { |
1309 | return 1; | 1311 | return 1; |
1310 | } | 1312 | } |
1311 | 1313 | ||
1312 | static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1314 | static int intr_interception(struct vcpu_svm *svm) |
1313 | { | 1315 | { |
1314 | ++svm->vcpu.stat.irq_exits; | 1316 | ++svm->vcpu.stat.irq_exits; |
1315 | return 1; | 1317 | return 1; |
1316 | } | 1318 | } |
1317 | 1319 | ||
1318 | static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1320 | static int nop_on_interception(struct vcpu_svm *svm) |
1319 | { | 1321 | { |
1320 | return 1; | 1322 | return 1; |
1321 | } | 1323 | } |
1322 | 1324 | ||
1323 | static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1325 | static int halt_interception(struct vcpu_svm *svm) |
1324 | { | 1326 | { |
1325 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; | 1327 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; |
1326 | skip_emulated_instruction(&svm->vcpu); | 1328 | skip_emulated_instruction(&svm->vcpu); |
1327 | return kvm_emulate_halt(&svm->vcpu); | 1329 | return kvm_emulate_halt(&svm->vcpu); |
1328 | } | 1330 | } |
1329 | 1331 | ||
1330 | static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1332 | static int vmmcall_interception(struct vcpu_svm *svm) |
1331 | { | 1333 | { |
1332 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 1334 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
1333 | skip_emulated_instruction(&svm->vcpu); | 1335 | skip_emulated_instruction(&svm->vcpu); |
@@ -1378,8 +1380,15 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) | |||
1378 | 1380 | ||
1379 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; | 1381 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; |
1380 | 1382 | ||
1381 | if (nested_svm_exit_handled(svm)) { | 1383 | if (svm->nested.intercept & 1ULL) { |
1382 | nsvm_printk("VMexit -> INTR\n"); | 1384 | /* |
1385 | * The #vmexit can't be emulated here directly because this | ||
1386 | * code path runs with irqs and preemtion disabled. A | ||
1387 | * #vmexit emulation might sleep. Only signal request for | ||
1388 | * the #vmexit here. | ||
1389 | */ | ||
1390 | svm->nested.exit_required = true; | ||
1391 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); | ||
1383 | return 1; | 1392 | return 1; |
1384 | } | 1393 | } |
1385 | 1394 | ||
@@ -1390,10 +1399,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) | |||
1390 | { | 1399 | { |
1391 | struct page *page; | 1400 | struct page *page; |
1392 | 1401 | ||
1393 | down_read(¤t->mm->mmap_sem); | ||
1394 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); | 1402 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); |
1395 | up_read(¤t->mm->mmap_sem); | ||
1396 | |||
1397 | if (is_error_page(page)) | 1403 | if (is_error_page(page)) |
1398 | goto error; | 1404 | goto error; |
1399 | 1405 | ||
@@ -1532,14 +1538,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
1532 | } | 1538 | } |
1533 | default: { | 1539 | default: { |
1534 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); | 1540 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); |
1535 | nsvm_printk("exit code: 0x%x\n", exit_code); | ||
1536 | if (svm->nested.intercept & exit_bits) | 1541 | if (svm->nested.intercept & exit_bits) |
1537 | vmexit = NESTED_EXIT_DONE; | 1542 | vmexit = NESTED_EXIT_DONE; |
1538 | } | 1543 | } |
1539 | } | 1544 | } |
1540 | 1545 | ||
1541 | if (vmexit == NESTED_EXIT_DONE) { | 1546 | if (vmexit == NESTED_EXIT_DONE) { |
1542 | nsvm_printk("#VMEXIT reason=%04x\n", exit_code); | ||
1543 | nested_svm_vmexit(svm); | 1547 | nested_svm_vmexit(svm); |
1544 | } | 1548 | } |
1545 | 1549 | ||
@@ -1584,6 +1588,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1584 | struct vmcb *hsave = svm->nested.hsave; | 1588 | struct vmcb *hsave = svm->nested.hsave; |
1585 | struct vmcb *vmcb = svm->vmcb; | 1589 | struct vmcb *vmcb = svm->vmcb; |
1586 | 1590 | ||
1591 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, | ||
1592 | vmcb->control.exit_info_1, | ||
1593 | vmcb->control.exit_info_2, | ||
1594 | vmcb->control.exit_int_info, | ||
1595 | vmcb->control.exit_int_info_err); | ||
1596 | |||
1587 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); | 1597 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); |
1588 | if (!nested_vmcb) | 1598 | if (!nested_vmcb) |
1589 | return 1; | 1599 | return 1; |
@@ -1617,6 +1627,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1617 | nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; | 1627 | nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; |
1618 | nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; | 1628 | nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; |
1619 | nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; | 1629 | nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; |
1630 | |||
1631 | /* | ||
1632 | * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have | ||
1633 | * to make sure that we do not lose injected events. So check event_inj | ||
1634 | * here and copy it to exit_int_info if it is valid. | ||
1635 | * Exit_int_info and event_inj can't be both valid because the case | ||
1636 | * below only happens on a VMRUN instruction intercept which has | ||
1637 | * no valid exit_int_info set. | ||
1638 | */ | ||
1639 | if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { | ||
1640 | struct vmcb_control_area *nc = &nested_vmcb->control; | ||
1641 | |||
1642 | nc->exit_int_info = vmcb->control.event_inj; | ||
1643 | nc->exit_int_info_err = vmcb->control.event_inj_err; | ||
1644 | } | ||
1645 | |||
1620 | nested_vmcb->control.tlb_ctl = 0; | 1646 | nested_vmcb->control.tlb_ctl = 0; |
1621 | nested_vmcb->control.event_inj = 0; | 1647 | nested_vmcb->control.event_inj = 0; |
1622 | nested_vmcb->control.event_inj_err = 0; | 1648 | nested_vmcb->control.event_inj_err = 0; |
@@ -1628,10 +1654,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1628 | /* Restore the original control entries */ | 1654 | /* Restore the original control entries */ |
1629 | copy_vmcb_control_area(vmcb, hsave); | 1655 | copy_vmcb_control_area(vmcb, hsave); |
1630 | 1656 | ||
1631 | /* Kill any pending exceptions */ | ||
1632 | if (svm->vcpu.arch.exception.pending == true) | ||
1633 | nsvm_printk("WARNING: Pending Exception\n"); | ||
1634 | |||
1635 | kvm_clear_exception_queue(&svm->vcpu); | 1657 | kvm_clear_exception_queue(&svm->vcpu); |
1636 | kvm_clear_interrupt_queue(&svm->vcpu); | 1658 | kvm_clear_interrupt_queue(&svm->vcpu); |
1637 | 1659 | ||
@@ -1702,6 +1724,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1702 | /* nested_vmcb is our indicator if nested SVM is activated */ | 1724 | /* nested_vmcb is our indicator if nested SVM is activated */ |
1703 | svm->nested.vmcb = svm->vmcb->save.rax; | 1725 | svm->nested.vmcb = svm->vmcb->save.rax; |
1704 | 1726 | ||
1727 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, | ||
1728 | nested_vmcb->save.rip, | ||
1729 | nested_vmcb->control.int_ctl, | ||
1730 | nested_vmcb->control.event_inj, | ||
1731 | nested_vmcb->control.nested_ctl); | ||
1732 | |||
1705 | /* Clear internal status */ | 1733 | /* Clear internal status */ |
1706 | kvm_clear_exception_queue(&svm->vcpu); | 1734 | kvm_clear_exception_queue(&svm->vcpu); |
1707 | kvm_clear_interrupt_queue(&svm->vcpu); | 1735 | kvm_clear_interrupt_queue(&svm->vcpu); |
@@ -1789,28 +1817,15 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1789 | svm->nested.intercept = nested_vmcb->control.intercept; | 1817 | svm->nested.intercept = nested_vmcb->control.intercept; |
1790 | 1818 | ||
1791 | force_new_asid(&svm->vcpu); | 1819 | force_new_asid(&svm->vcpu); |
1792 | svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info; | ||
1793 | svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err; | ||
1794 | svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; | 1820 | svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; |
1795 | if (nested_vmcb->control.int_ctl & V_IRQ_MASK) { | ||
1796 | nsvm_printk("nSVM Injecting Interrupt: 0x%x\n", | ||
1797 | nested_vmcb->control.int_ctl); | ||
1798 | } | ||
1799 | if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) | 1821 | if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) |
1800 | svm->vcpu.arch.hflags |= HF_VINTR_MASK; | 1822 | svm->vcpu.arch.hflags |= HF_VINTR_MASK; |
1801 | else | 1823 | else |
1802 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; | 1824 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; |
1803 | 1825 | ||
1804 | nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n", | ||
1805 | nested_vmcb->control.exit_int_info, | ||
1806 | nested_vmcb->control.int_state); | ||
1807 | |||
1808 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; | 1826 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; |
1809 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; | 1827 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; |
1810 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; | 1828 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; |
1811 | if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID) | ||
1812 | nsvm_printk("Injecting Event: 0x%x\n", | ||
1813 | nested_vmcb->control.event_inj); | ||
1814 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; | 1829 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; |
1815 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; | 1830 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; |
1816 | 1831 | ||
@@ -1837,7 +1852,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) | |||
1837 | to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; | 1852 | to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; |
1838 | } | 1853 | } |
1839 | 1854 | ||
1840 | static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1855 | static int vmload_interception(struct vcpu_svm *svm) |
1841 | { | 1856 | { |
1842 | struct vmcb *nested_vmcb; | 1857 | struct vmcb *nested_vmcb; |
1843 | 1858 | ||
@@ -1857,7 +1872,7 @@ static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1857 | return 1; | 1872 | return 1; |
1858 | } | 1873 | } |
1859 | 1874 | ||
1860 | static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1875 | static int vmsave_interception(struct vcpu_svm *svm) |
1861 | { | 1876 | { |
1862 | struct vmcb *nested_vmcb; | 1877 | struct vmcb *nested_vmcb; |
1863 | 1878 | ||
@@ -1877,10 +1892,8 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1877 | return 1; | 1892 | return 1; |
1878 | } | 1893 | } |
1879 | 1894 | ||
1880 | static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1895 | static int vmrun_interception(struct vcpu_svm *svm) |
1881 | { | 1896 | { |
1882 | nsvm_printk("VMrun\n"); | ||
1883 | |||
1884 | if (nested_svm_check_permissions(svm)) | 1897 | if (nested_svm_check_permissions(svm)) |
1885 | return 1; | 1898 | return 1; |
1886 | 1899 | ||
@@ -1907,7 +1920,7 @@ failed: | |||
1907 | return 1; | 1920 | return 1; |
1908 | } | 1921 | } |
1909 | 1922 | ||
1910 | static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1923 | static int stgi_interception(struct vcpu_svm *svm) |
1911 | { | 1924 | { |
1912 | if (nested_svm_check_permissions(svm)) | 1925 | if (nested_svm_check_permissions(svm)) |
1913 | return 1; | 1926 | return 1; |
@@ -1920,7 +1933,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1920 | return 1; | 1933 | return 1; |
1921 | } | 1934 | } |
1922 | 1935 | ||
1923 | static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1936 | static int clgi_interception(struct vcpu_svm *svm) |
1924 | { | 1937 | { |
1925 | if (nested_svm_check_permissions(svm)) | 1938 | if (nested_svm_check_permissions(svm)) |
1926 | return 1; | 1939 | return 1; |
@@ -1937,10 +1950,12 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1937 | return 1; | 1950 | return 1; |
1938 | } | 1951 | } |
1939 | 1952 | ||
1940 | static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1953 | static int invlpga_interception(struct vcpu_svm *svm) |
1941 | { | 1954 | { |
1942 | struct kvm_vcpu *vcpu = &svm->vcpu; | 1955 | struct kvm_vcpu *vcpu = &svm->vcpu; |
1943 | nsvm_printk("INVLPGA\n"); | 1956 | |
1957 | trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], | ||
1958 | vcpu->arch.regs[VCPU_REGS_RAX]); | ||
1944 | 1959 | ||
1945 | /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ | 1960 | /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ |
1946 | kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); | 1961 | kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); |
@@ -1950,15 +1965,21 @@ static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1950 | return 1; | 1965 | return 1; |
1951 | } | 1966 | } |
1952 | 1967 | ||
1953 | static int invalid_op_interception(struct vcpu_svm *svm, | 1968 | static int skinit_interception(struct vcpu_svm *svm) |
1954 | struct kvm_run *kvm_run) | ||
1955 | { | 1969 | { |
1970 | trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]); | ||
1971 | |||
1956 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 1972 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
1957 | return 1; | 1973 | return 1; |
1958 | } | 1974 | } |
1959 | 1975 | ||
1960 | static int task_switch_interception(struct vcpu_svm *svm, | 1976 | static int invalid_op_interception(struct vcpu_svm *svm) |
1961 | struct kvm_run *kvm_run) | 1977 | { |
1978 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | ||
1979 | return 1; | ||
1980 | } | ||
1981 | |||
1982 | static int task_switch_interception(struct vcpu_svm *svm) | ||
1962 | { | 1983 | { |
1963 | u16 tss_selector; | 1984 | u16 tss_selector; |
1964 | int reason; | 1985 | int reason; |
@@ -2008,14 +2029,14 @@ static int task_switch_interception(struct vcpu_svm *svm, | |||
2008 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); | 2029 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); |
2009 | } | 2030 | } |
2010 | 2031 | ||
2011 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2032 | static int cpuid_interception(struct vcpu_svm *svm) |
2012 | { | 2033 | { |
2013 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; | 2034 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
2014 | kvm_emulate_cpuid(&svm->vcpu); | 2035 | kvm_emulate_cpuid(&svm->vcpu); |
2015 | return 1; | 2036 | return 1; |
2016 | } | 2037 | } |
2017 | 2038 | ||
2018 | static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2039 | static int iret_interception(struct vcpu_svm *svm) |
2019 | { | 2040 | { |
2020 | ++svm->vcpu.stat.nmi_window_exits; | 2041 | ++svm->vcpu.stat.nmi_window_exits; |
2021 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | 2042 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); |
@@ -2023,26 +2044,27 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
2023 | return 1; | 2044 | return 1; |
2024 | } | 2045 | } |
2025 | 2046 | ||
2026 | static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2047 | static int invlpg_interception(struct vcpu_svm *svm) |
2027 | { | 2048 | { |
2028 | if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) | 2049 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) |
2029 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | 2050 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); |
2030 | return 1; | 2051 | return 1; |
2031 | } | 2052 | } |
2032 | 2053 | ||
2033 | static int emulate_on_interception(struct vcpu_svm *svm, | 2054 | static int emulate_on_interception(struct vcpu_svm *svm) |
2034 | struct kvm_run *kvm_run) | ||
2035 | { | 2055 | { |
2036 | if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) | 2056 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) |
2037 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | 2057 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); |
2038 | return 1; | 2058 | return 1; |
2039 | } | 2059 | } |
2040 | 2060 | ||
2041 | static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2061 | static int cr8_write_interception(struct vcpu_svm *svm) |
2042 | { | 2062 | { |
2063 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
2064 | |||
2043 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); | 2065 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); |
2044 | /* instruction emulation calls kvm_set_cr8() */ | 2066 | /* instruction emulation calls kvm_set_cr8() */ |
2045 | emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); | 2067 | emulate_instruction(&svm->vcpu, 0, 0, 0); |
2046 | if (irqchip_in_kernel(svm->vcpu.kvm)) { | 2068 | if (irqchip_in_kernel(svm->vcpu.kvm)) { |
2047 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | 2069 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; |
2048 | return 1; | 2070 | return 1; |
@@ -2128,7 +2150,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2128 | return 0; | 2150 | return 0; |
2129 | } | 2151 | } |
2130 | 2152 | ||
2131 | static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2153 | static int rdmsr_interception(struct vcpu_svm *svm) |
2132 | { | 2154 | { |
2133 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 2155 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
2134 | u64 data; | 2156 | u64 data; |
@@ -2221,7 +2243,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2221 | return 0; | 2243 | return 0; |
2222 | } | 2244 | } |
2223 | 2245 | ||
2224 | static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2246 | static int wrmsr_interception(struct vcpu_svm *svm) |
2225 | { | 2247 | { |
2226 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 2248 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
2227 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) | 2249 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) |
@@ -2237,17 +2259,18 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
2237 | return 1; | 2259 | return 1; |
2238 | } | 2260 | } |
2239 | 2261 | ||
2240 | static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2262 | static int msr_interception(struct vcpu_svm *svm) |
2241 | { | 2263 | { |
2242 | if (svm->vmcb->control.exit_info_1) | 2264 | if (svm->vmcb->control.exit_info_1) |
2243 | return wrmsr_interception(svm, kvm_run); | 2265 | return wrmsr_interception(svm); |
2244 | else | 2266 | else |
2245 | return rdmsr_interception(svm, kvm_run); | 2267 | return rdmsr_interception(svm); |
2246 | } | 2268 | } |
2247 | 2269 | ||
2248 | static int interrupt_window_interception(struct vcpu_svm *svm, | 2270 | static int interrupt_window_interception(struct vcpu_svm *svm) |
2249 | struct kvm_run *kvm_run) | ||
2250 | { | 2271 | { |
2272 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
2273 | |||
2251 | svm_clear_vintr(svm); | 2274 | svm_clear_vintr(svm); |
2252 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | 2275 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; |
2253 | /* | 2276 | /* |
@@ -2265,8 +2288,13 @@ static int interrupt_window_interception(struct vcpu_svm *svm, | |||
2265 | return 1; | 2288 | return 1; |
2266 | } | 2289 | } |
2267 | 2290 | ||
2268 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | 2291 | static int pause_interception(struct vcpu_svm *svm) |
2269 | struct kvm_run *kvm_run) = { | 2292 | { |
2293 | kvm_vcpu_on_spin(&(svm->vcpu)); | ||
2294 | return 1; | ||
2295 | } | ||
2296 | |||
2297 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | ||
2270 | [SVM_EXIT_READ_CR0] = emulate_on_interception, | 2298 | [SVM_EXIT_READ_CR0] = emulate_on_interception, |
2271 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 2299 | [SVM_EXIT_READ_CR3] = emulate_on_interception, |
2272 | [SVM_EXIT_READ_CR4] = emulate_on_interception, | 2300 | [SVM_EXIT_READ_CR4] = emulate_on_interception, |
@@ -2301,6 +2329,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
2301 | [SVM_EXIT_CPUID] = cpuid_interception, | 2329 | [SVM_EXIT_CPUID] = cpuid_interception, |
2302 | [SVM_EXIT_IRET] = iret_interception, | 2330 | [SVM_EXIT_IRET] = iret_interception, |
2303 | [SVM_EXIT_INVD] = emulate_on_interception, | 2331 | [SVM_EXIT_INVD] = emulate_on_interception, |
2332 | [SVM_EXIT_PAUSE] = pause_interception, | ||
2304 | [SVM_EXIT_HLT] = halt_interception, | 2333 | [SVM_EXIT_HLT] = halt_interception, |
2305 | [SVM_EXIT_INVLPG] = invlpg_interception, | 2334 | [SVM_EXIT_INVLPG] = invlpg_interception, |
2306 | [SVM_EXIT_INVLPGA] = invlpga_interception, | 2335 | [SVM_EXIT_INVLPGA] = invlpga_interception, |
@@ -2314,26 +2343,36 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
2314 | [SVM_EXIT_VMSAVE] = vmsave_interception, | 2343 | [SVM_EXIT_VMSAVE] = vmsave_interception, |
2315 | [SVM_EXIT_STGI] = stgi_interception, | 2344 | [SVM_EXIT_STGI] = stgi_interception, |
2316 | [SVM_EXIT_CLGI] = clgi_interception, | 2345 | [SVM_EXIT_CLGI] = clgi_interception, |
2317 | [SVM_EXIT_SKINIT] = invalid_op_interception, | 2346 | [SVM_EXIT_SKINIT] = skinit_interception, |
2318 | [SVM_EXIT_WBINVD] = emulate_on_interception, | 2347 | [SVM_EXIT_WBINVD] = emulate_on_interception, |
2319 | [SVM_EXIT_MONITOR] = invalid_op_interception, | 2348 | [SVM_EXIT_MONITOR] = invalid_op_interception, |
2320 | [SVM_EXIT_MWAIT] = invalid_op_interception, | 2349 | [SVM_EXIT_MWAIT] = invalid_op_interception, |
2321 | [SVM_EXIT_NPF] = pf_interception, | 2350 | [SVM_EXIT_NPF] = pf_interception, |
2322 | }; | 2351 | }; |
2323 | 2352 | ||
2324 | static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 2353 | static int handle_exit(struct kvm_vcpu *vcpu) |
2325 | { | 2354 | { |
2326 | struct vcpu_svm *svm = to_svm(vcpu); | 2355 | struct vcpu_svm *svm = to_svm(vcpu); |
2356 | struct kvm_run *kvm_run = vcpu->run; | ||
2327 | u32 exit_code = svm->vmcb->control.exit_code; | 2357 | u32 exit_code = svm->vmcb->control.exit_code; |
2328 | 2358 | ||
2329 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); | 2359 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); |
2330 | 2360 | ||
2361 | if (unlikely(svm->nested.exit_required)) { | ||
2362 | nested_svm_vmexit(svm); | ||
2363 | svm->nested.exit_required = false; | ||
2364 | |||
2365 | return 1; | ||
2366 | } | ||
2367 | |||
2331 | if (is_nested(svm)) { | 2368 | if (is_nested(svm)) { |
2332 | int vmexit; | 2369 | int vmexit; |
2333 | 2370 | ||
2334 | nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", | 2371 | trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, |
2335 | exit_code, svm->vmcb->control.exit_info_1, | 2372 | svm->vmcb->control.exit_info_1, |
2336 | svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); | 2373 | svm->vmcb->control.exit_info_2, |
2374 | svm->vmcb->control.exit_int_info, | ||
2375 | svm->vmcb->control.exit_int_info_err); | ||
2337 | 2376 | ||
2338 | vmexit = nested_svm_exit_special(svm); | 2377 | vmexit = nested_svm_exit_special(svm); |
2339 | 2378 | ||
@@ -2383,7 +2422,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2383 | return 0; | 2422 | return 0; |
2384 | } | 2423 | } |
2385 | 2424 | ||
2386 | return svm_exit_handlers[exit_code](svm, kvm_run); | 2425 | return svm_exit_handlers[exit_code](svm); |
2387 | } | 2426 | } |
2388 | 2427 | ||
2389 | static void reload_tss(struct kvm_vcpu *vcpu) | 2428 | static void reload_tss(struct kvm_vcpu *vcpu) |
@@ -2460,20 +2499,47 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2460 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | 2499 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); |
2461 | } | 2500 | } |
2462 | 2501 | ||
2502 | static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) | ||
2503 | { | ||
2504 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2505 | |||
2506 | return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); | ||
2507 | } | ||
2508 | |||
2509 | static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | ||
2510 | { | ||
2511 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2512 | |||
2513 | if (masked) { | ||
2514 | svm->vcpu.arch.hflags |= HF_NMI_MASK; | ||
2515 | svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); | ||
2516 | } else { | ||
2517 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; | ||
2518 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | ||
2519 | } | ||
2520 | } | ||
2521 | |||
2463 | static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) | 2522 | static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) |
2464 | { | 2523 | { |
2465 | struct vcpu_svm *svm = to_svm(vcpu); | 2524 | struct vcpu_svm *svm = to_svm(vcpu); |
2466 | struct vmcb *vmcb = svm->vmcb; | 2525 | struct vmcb *vmcb = svm->vmcb; |
2467 | return (vmcb->save.rflags & X86_EFLAGS_IF) && | 2526 | int ret; |
2468 | !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && | 2527 | |
2469 | gif_set(svm) && | 2528 | if (!gif_set(svm) || |
2470 | !(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK)); | 2529 | (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) |
2530 | return 0; | ||
2531 | |||
2532 | ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); | ||
2533 | |||
2534 | if (is_nested(svm)) | ||
2535 | return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); | ||
2536 | |||
2537 | return ret; | ||
2471 | } | 2538 | } |
2472 | 2539 | ||
2473 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 2540 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
2474 | { | 2541 | { |
2475 | struct vcpu_svm *svm = to_svm(vcpu); | 2542 | struct vcpu_svm *svm = to_svm(vcpu); |
2476 | nsvm_printk("Trying to open IRQ window\n"); | ||
2477 | 2543 | ||
2478 | nested_svm_intr(svm); | 2544 | nested_svm_intr(svm); |
2479 | 2545 | ||
@@ -2498,7 +2564,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
2498 | /* Something prevents NMI from been injected. Single step over | 2564 | /* Something prevents NMI from been injected. Single step over |
2499 | possible problem (IRET or exception injection or interrupt | 2565 | possible problem (IRET or exception injection or interrupt |
2500 | shadow) */ | 2566 | shadow) */ |
2501 | vcpu->arch.singlestep = true; | 2567 | svm->nmi_singlestep = true; |
2502 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 2568 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
2503 | update_db_intercept(vcpu); | 2569 | update_db_intercept(vcpu); |
2504 | } | 2570 | } |
@@ -2588,13 +2654,20 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
2588 | #define R "e" | 2654 | #define R "e" |
2589 | #endif | 2655 | #endif |
2590 | 2656 | ||
2591 | static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2657 | static void svm_vcpu_run(struct kvm_vcpu *vcpu) |
2592 | { | 2658 | { |
2593 | struct vcpu_svm *svm = to_svm(vcpu); | 2659 | struct vcpu_svm *svm = to_svm(vcpu); |
2594 | u16 fs_selector; | 2660 | u16 fs_selector; |
2595 | u16 gs_selector; | 2661 | u16 gs_selector; |
2596 | u16 ldt_selector; | 2662 | u16 ldt_selector; |
2597 | 2663 | ||
2664 | /* | ||
2665 | * A vmexit emulation is required before the vcpu can be executed | ||
2666 | * again. | ||
2667 | */ | ||
2668 | if (unlikely(svm->nested.exit_required)) | ||
2669 | return; | ||
2670 | |||
2598 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | 2671 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; |
2599 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | 2672 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; |
2600 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | 2673 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; |
@@ -2893,6 +2966,8 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2893 | .queue_exception = svm_queue_exception, | 2966 | .queue_exception = svm_queue_exception, |
2894 | .interrupt_allowed = svm_interrupt_allowed, | 2967 | .interrupt_allowed = svm_interrupt_allowed, |
2895 | .nmi_allowed = svm_nmi_allowed, | 2968 | .nmi_allowed = svm_nmi_allowed, |
2969 | .get_nmi_mask = svm_get_nmi_mask, | ||
2970 | .set_nmi_mask = svm_set_nmi_mask, | ||
2896 | .enable_nmi_window = enable_nmi_window, | 2971 | .enable_nmi_window = enable_nmi_window, |
2897 | .enable_irq_window = enable_irq_window, | 2972 | .enable_irq_window = enable_irq_window, |
2898 | .update_cr8_intercept = update_cr8_intercept, | 2973 | .update_cr8_intercept = update_cr8_intercept, |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 0d480e77eacf..816e0449db0b 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -349,6 +349,171 @@ TRACE_EVENT(kvm_apic_accept_irq, | |||
349 | __entry->coalesced ? " (coalesced)" : "") | 349 | __entry->coalesced ? " (coalesced)" : "") |
350 | ); | 350 | ); |
351 | 351 | ||
352 | /* | ||
353 | * Tracepoint for nested VMRUN | ||
354 | */ | ||
355 | TRACE_EVENT(kvm_nested_vmrun, | ||
356 | TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl, | ||
357 | __u32 event_inj, bool npt), | ||
358 | TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt), | ||
359 | |||
360 | TP_STRUCT__entry( | ||
361 | __field( __u64, rip ) | ||
362 | __field( __u64, vmcb ) | ||
363 | __field( __u64, nested_rip ) | ||
364 | __field( __u32, int_ctl ) | ||
365 | __field( __u32, event_inj ) | ||
366 | __field( bool, npt ) | ||
367 | ), | ||
368 | |||
369 | TP_fast_assign( | ||
370 | __entry->rip = rip; | ||
371 | __entry->vmcb = vmcb; | ||
372 | __entry->nested_rip = nested_rip; | ||
373 | __entry->int_ctl = int_ctl; | ||
374 | __entry->event_inj = event_inj; | ||
375 | __entry->npt = npt; | ||
376 | ), | ||
377 | |||
378 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " | ||
379 | "event_inj: 0x%08x npt: %s\n", | ||
380 | __entry->rip, __entry->vmcb, __entry->nested_rip, | ||
381 | __entry->int_ctl, __entry->event_inj, | ||
382 | __entry->npt ? "on" : "off") | ||
383 | ); | ||
384 | |||
385 | /* | ||
386 | * Tracepoint for #VMEXIT while nested | ||
387 | */ | ||
388 | TRACE_EVENT(kvm_nested_vmexit, | ||
389 | TP_PROTO(__u64 rip, __u32 exit_code, | ||
390 | __u64 exit_info1, __u64 exit_info2, | ||
391 | __u32 exit_int_info, __u32 exit_int_info_err), | ||
392 | TP_ARGS(rip, exit_code, exit_info1, exit_info2, | ||
393 | exit_int_info, exit_int_info_err), | ||
394 | |||
395 | TP_STRUCT__entry( | ||
396 | __field( __u64, rip ) | ||
397 | __field( __u32, exit_code ) | ||
398 | __field( __u64, exit_info1 ) | ||
399 | __field( __u64, exit_info2 ) | ||
400 | __field( __u32, exit_int_info ) | ||
401 | __field( __u32, exit_int_info_err ) | ||
402 | ), | ||
403 | |||
404 | TP_fast_assign( | ||
405 | __entry->rip = rip; | ||
406 | __entry->exit_code = exit_code; | ||
407 | __entry->exit_info1 = exit_info1; | ||
408 | __entry->exit_info2 = exit_info2; | ||
409 | __entry->exit_int_info = exit_int_info; | ||
410 | __entry->exit_int_info_err = exit_int_info_err; | ||
411 | ), | ||
412 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " | ||
413 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | ||
414 | __entry->rip, | ||
415 | ftrace_print_symbols_seq(p, __entry->exit_code, | ||
416 | kvm_x86_ops->exit_reasons_str), | ||
417 | __entry->exit_info1, __entry->exit_info2, | ||
418 | __entry->exit_int_info, __entry->exit_int_info_err) | ||
419 | ); | ||
420 | |||
421 | /* | ||
422 | * Tracepoint for #VMEXIT reinjected to the guest | ||
423 | */ | ||
424 | TRACE_EVENT(kvm_nested_vmexit_inject, | ||
425 | TP_PROTO(__u32 exit_code, | ||
426 | __u64 exit_info1, __u64 exit_info2, | ||
427 | __u32 exit_int_info, __u32 exit_int_info_err), | ||
428 | TP_ARGS(exit_code, exit_info1, exit_info2, | ||
429 | exit_int_info, exit_int_info_err), | ||
430 | |||
431 | TP_STRUCT__entry( | ||
432 | __field( __u32, exit_code ) | ||
433 | __field( __u64, exit_info1 ) | ||
434 | __field( __u64, exit_info2 ) | ||
435 | __field( __u32, exit_int_info ) | ||
436 | __field( __u32, exit_int_info_err ) | ||
437 | ), | ||
438 | |||
439 | TP_fast_assign( | ||
440 | __entry->exit_code = exit_code; | ||
441 | __entry->exit_info1 = exit_info1; | ||
442 | __entry->exit_info2 = exit_info2; | ||
443 | __entry->exit_int_info = exit_int_info; | ||
444 | __entry->exit_int_info_err = exit_int_info_err; | ||
445 | ), | ||
446 | |||
447 | TP_printk("reason: %s ext_inf1: 0x%016llx " | ||
448 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | ||
449 | ftrace_print_symbols_seq(p, __entry->exit_code, | ||
450 | kvm_x86_ops->exit_reasons_str), | ||
451 | __entry->exit_info1, __entry->exit_info2, | ||
452 | __entry->exit_int_info, __entry->exit_int_info_err) | ||
453 | ); | ||
454 | |||
455 | /* | ||
456 | * Tracepoint for nested #vmexit because of interrupt pending | ||
457 | */ | ||
458 | TRACE_EVENT(kvm_nested_intr_vmexit, | ||
459 | TP_PROTO(__u64 rip), | ||
460 | TP_ARGS(rip), | ||
461 | |||
462 | TP_STRUCT__entry( | ||
463 | __field( __u64, rip ) | ||
464 | ), | ||
465 | |||
466 | TP_fast_assign( | ||
467 | __entry->rip = rip | ||
468 | ), | ||
469 | |||
470 | TP_printk("rip: 0x%016llx\n", __entry->rip) | ||
471 | ); | ||
472 | |||
473 | /* | ||
474 | * Tracepoint for nested #vmexit because of interrupt pending | ||
475 | */ | ||
476 | TRACE_EVENT(kvm_invlpga, | ||
477 | TP_PROTO(__u64 rip, int asid, u64 address), | ||
478 | TP_ARGS(rip, asid, address), | ||
479 | |||
480 | TP_STRUCT__entry( | ||
481 | __field( __u64, rip ) | ||
482 | __field( int, asid ) | ||
483 | __field( __u64, address ) | ||
484 | ), | ||
485 | |||
486 | TP_fast_assign( | ||
487 | __entry->rip = rip; | ||
488 | __entry->asid = asid; | ||
489 | __entry->address = address; | ||
490 | ), | ||
491 | |||
492 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", | ||
493 | __entry->rip, __entry->asid, __entry->address) | ||
494 | ); | ||
495 | |||
496 | /* | ||
497 | * Tracepoint for nested #vmexit because of interrupt pending | ||
498 | */ | ||
499 | TRACE_EVENT(kvm_skinit, | ||
500 | TP_PROTO(__u64 rip, __u32 slb), | ||
501 | TP_ARGS(rip, slb), | ||
502 | |||
503 | TP_STRUCT__entry( | ||
504 | __field( __u64, rip ) | ||
505 | __field( __u32, slb ) | ||
506 | ), | ||
507 | |||
508 | TP_fast_assign( | ||
509 | __entry->rip = rip; | ||
510 | __entry->slb = slb; | ||
511 | ), | ||
512 | |||
513 | TP_printk("rip: 0x%016llx slb: 0x%08x\n", | ||
514 | __entry->rip, __entry->slb) | ||
515 | ); | ||
516 | |||
352 | #endif /* _TRACE_KVM_H */ | 517 | #endif /* _TRACE_KVM_H */ |
353 | 518 | ||
354 | /* This part must be outside protection */ | 519 | /* This part must be outside protection */ |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ed53b42caba1..d4918d6fc924 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -61,12 +61,37 @@ module_param_named(unrestricted_guest, | |||
61 | static int __read_mostly emulate_invalid_guest_state = 0; | 61 | static int __read_mostly emulate_invalid_guest_state = 0; |
62 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 62 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
63 | 63 | ||
64 | /* | ||
65 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | ||
66 | * ple_gap: upper bound on the amount of time between two successive | ||
67 | * executions of PAUSE in a loop. Also indicate if ple enabled. | ||
68 | * According to test, this time is usually small than 41 cycles. | ||
69 | * ple_window: upper bound on the amount of time a guest is allowed to execute | ||
70 | * in a PAUSE loop. Tests indicate that most spinlocks are held for | ||
71 | * less than 2^12 cycles | ||
72 | * Time is measured based on a counter that runs at the same rate as the TSC, | ||
73 | * refer SDM volume 3b section 21.6.13 & 22.1.3. | ||
74 | */ | ||
75 | #define KVM_VMX_DEFAULT_PLE_GAP 41 | ||
76 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 | ||
77 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; | ||
78 | module_param(ple_gap, int, S_IRUGO); | ||
79 | |||
80 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | ||
81 | module_param(ple_window, int, S_IRUGO); | ||
82 | |||
64 | struct vmcs { | 83 | struct vmcs { |
65 | u32 revision_id; | 84 | u32 revision_id; |
66 | u32 abort; | 85 | u32 abort; |
67 | char data[0]; | 86 | char data[0]; |
68 | }; | 87 | }; |
69 | 88 | ||
89 | struct shared_msr_entry { | ||
90 | unsigned index; | ||
91 | u64 data; | ||
92 | u64 mask; | ||
93 | }; | ||
94 | |||
70 | struct vcpu_vmx { | 95 | struct vcpu_vmx { |
71 | struct kvm_vcpu vcpu; | 96 | struct kvm_vcpu vcpu; |
72 | struct list_head local_vcpus_link; | 97 | struct list_head local_vcpus_link; |
@@ -74,13 +99,12 @@ struct vcpu_vmx { | |||
74 | int launched; | 99 | int launched; |
75 | u8 fail; | 100 | u8 fail; |
76 | u32 idt_vectoring_info; | 101 | u32 idt_vectoring_info; |
77 | struct kvm_msr_entry *guest_msrs; | 102 | struct shared_msr_entry *guest_msrs; |
78 | struct kvm_msr_entry *host_msrs; | ||
79 | int nmsrs; | 103 | int nmsrs; |
80 | int save_nmsrs; | 104 | int save_nmsrs; |
81 | int msr_offset_efer; | ||
82 | #ifdef CONFIG_X86_64 | 105 | #ifdef CONFIG_X86_64 |
83 | int msr_offset_kernel_gs_base; | 106 | u64 msr_host_kernel_gs_base; |
107 | u64 msr_guest_kernel_gs_base; | ||
84 | #endif | 108 | #endif |
85 | struct vmcs *vmcs; | 109 | struct vmcs *vmcs; |
86 | struct { | 110 | struct { |
@@ -88,7 +112,6 @@ struct vcpu_vmx { | |||
88 | u16 fs_sel, gs_sel, ldt_sel; | 112 | u16 fs_sel, gs_sel, ldt_sel; |
89 | int gs_ldt_reload_needed; | 113 | int gs_ldt_reload_needed; |
90 | int fs_reload_needed; | 114 | int fs_reload_needed; |
91 | int guest_efer_loaded; | ||
92 | } host_state; | 115 | } host_state; |
93 | struct { | 116 | struct { |
94 | int vm86_active; | 117 | int vm86_active; |
@@ -107,7 +130,6 @@ struct vcpu_vmx { | |||
107 | } rmode; | 130 | } rmode; |
108 | int vpid; | 131 | int vpid; |
109 | bool emulation_required; | 132 | bool emulation_required; |
110 | enum emulation_result invalid_state_emulation_result; | ||
111 | 133 | ||
112 | /* Support for vnmi-less CPUs */ | 134 | /* Support for vnmi-less CPUs */ |
113 | int soft_vnmi_blocked; | 135 | int soft_vnmi_blocked; |
@@ -176,6 +198,8 @@ static struct kvm_vmx_segment_field { | |||
176 | VMX_SEGMENT_FIELD(LDTR), | 198 | VMX_SEGMENT_FIELD(LDTR), |
177 | }; | 199 | }; |
178 | 200 | ||
201 | static u64 host_efer; | ||
202 | |||
179 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu); | 203 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu); |
180 | 204 | ||
181 | /* | 205 | /* |
@@ -184,28 +208,12 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu); | |||
184 | */ | 208 | */ |
185 | static const u32 vmx_msr_index[] = { | 209 | static const u32 vmx_msr_index[] = { |
186 | #ifdef CONFIG_X86_64 | 210 | #ifdef CONFIG_X86_64 |
187 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE, | 211 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, |
188 | #endif | 212 | #endif |
189 | MSR_EFER, MSR_K6_STAR, | 213 | MSR_EFER, MSR_K6_STAR, |
190 | }; | 214 | }; |
191 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 215 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
192 | 216 | ||
193 | static void load_msrs(struct kvm_msr_entry *e, int n) | ||
194 | { | ||
195 | int i; | ||
196 | |||
197 | for (i = 0; i < n; ++i) | ||
198 | wrmsrl(e[i].index, e[i].data); | ||
199 | } | ||
200 | |||
201 | static void save_msrs(struct kvm_msr_entry *e, int n) | ||
202 | { | ||
203 | int i; | ||
204 | |||
205 | for (i = 0; i < n; ++i) | ||
206 | rdmsrl(e[i].index, e[i].data); | ||
207 | } | ||
208 | |||
209 | static inline int is_page_fault(u32 intr_info) | 217 | static inline int is_page_fault(u32 intr_info) |
210 | { | 218 | { |
211 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 219 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
@@ -320,6 +328,12 @@ static inline int cpu_has_vmx_unrestricted_guest(void) | |||
320 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 328 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
321 | } | 329 | } |
322 | 330 | ||
331 | static inline int cpu_has_vmx_ple(void) | ||
332 | { | ||
333 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
334 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
335 | } | ||
336 | |||
323 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 337 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) |
324 | { | 338 | { |
325 | return flexpriority_enabled && | 339 | return flexpriority_enabled && |
@@ -348,7 +362,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) | |||
348 | int i; | 362 | int i; |
349 | 363 | ||
350 | for (i = 0; i < vmx->nmsrs; ++i) | 364 | for (i = 0; i < vmx->nmsrs; ++i) |
351 | if (vmx->guest_msrs[i].index == msr) | 365 | if (vmx_msr_index[vmx->guest_msrs[i].index] == msr) |
352 | return i; | 366 | return i; |
353 | return -1; | 367 | return -1; |
354 | } | 368 | } |
@@ -379,7 +393,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa) | |||
379 | : : "a" (&operand), "c" (ext) : "cc", "memory"); | 393 | : : "a" (&operand), "c" (ext) : "cc", "memory"); |
380 | } | 394 | } |
381 | 395 | ||
382 | static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) | 396 | static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) |
383 | { | 397 | { |
384 | int i; | 398 | int i; |
385 | 399 | ||
@@ -570,17 +584,12 @@ static void reload_tss(void) | |||
570 | load_TR_desc(); | 584 | load_TR_desc(); |
571 | } | 585 | } |
572 | 586 | ||
573 | static void load_transition_efer(struct vcpu_vmx *vmx) | 587 | static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) |
574 | { | 588 | { |
575 | int efer_offset = vmx->msr_offset_efer; | ||
576 | u64 host_efer; | ||
577 | u64 guest_efer; | 589 | u64 guest_efer; |
578 | u64 ignore_bits; | 590 | u64 ignore_bits; |
579 | 591 | ||
580 | if (efer_offset < 0) | 592 | guest_efer = vmx->vcpu.arch.shadow_efer; |
581 | return; | ||
582 | host_efer = vmx->host_msrs[efer_offset].data; | ||
583 | guest_efer = vmx->guest_msrs[efer_offset].data; | ||
584 | 593 | ||
585 | /* | 594 | /* |
586 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless | 595 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless |
@@ -593,27 +602,17 @@ static void load_transition_efer(struct vcpu_vmx *vmx) | |||
593 | if (guest_efer & EFER_LMA) | 602 | if (guest_efer & EFER_LMA) |
594 | ignore_bits &= ~(u64)EFER_SCE; | 603 | ignore_bits &= ~(u64)EFER_SCE; |
595 | #endif | 604 | #endif |
596 | if ((guest_efer & ~ignore_bits) == (host_efer & ~ignore_bits)) | ||
597 | return; | ||
598 | |||
599 | vmx->host_state.guest_efer_loaded = 1; | ||
600 | guest_efer &= ~ignore_bits; | 605 | guest_efer &= ~ignore_bits; |
601 | guest_efer |= host_efer & ignore_bits; | 606 | guest_efer |= host_efer & ignore_bits; |
602 | wrmsrl(MSR_EFER, guest_efer); | 607 | vmx->guest_msrs[efer_offset].data = guest_efer; |
603 | vmx->vcpu.stat.efer_reload++; | 608 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; |
604 | } | 609 | return true; |
605 | |||
606 | static void reload_host_efer(struct vcpu_vmx *vmx) | ||
607 | { | ||
608 | if (vmx->host_state.guest_efer_loaded) { | ||
609 | vmx->host_state.guest_efer_loaded = 0; | ||
610 | load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1); | ||
611 | } | ||
612 | } | 610 | } |
613 | 611 | ||
614 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) | 612 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) |
615 | { | 613 | { |
616 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 614 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
615 | int i; | ||
617 | 616 | ||
618 | if (vmx->host_state.loaded) | 617 | if (vmx->host_state.loaded) |
619 | return; | 618 | return; |
@@ -650,13 +649,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
650 | #endif | 649 | #endif |
651 | 650 | ||
652 | #ifdef CONFIG_X86_64 | 651 | #ifdef CONFIG_X86_64 |
653 | if (is_long_mode(&vmx->vcpu)) | 652 | if (is_long_mode(&vmx->vcpu)) { |
654 | save_msrs(vmx->host_msrs + | 653 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
655 | vmx->msr_offset_kernel_gs_base, 1); | 654 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
656 | 655 | } | |
657 | #endif | 656 | #endif |
658 | load_msrs(vmx->guest_msrs, vmx->save_nmsrs); | 657 | for (i = 0; i < vmx->save_nmsrs; ++i) |
659 | load_transition_efer(vmx); | 658 | kvm_set_shared_msr(vmx->guest_msrs[i].index, |
659 | vmx->guest_msrs[i].data, | ||
660 | vmx->guest_msrs[i].mask); | ||
660 | } | 661 | } |
661 | 662 | ||
662 | static void __vmx_load_host_state(struct vcpu_vmx *vmx) | 663 | static void __vmx_load_host_state(struct vcpu_vmx *vmx) |
@@ -684,9 +685,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
684 | local_irq_restore(flags); | 685 | local_irq_restore(flags); |
685 | } | 686 | } |
686 | reload_tss(); | 687 | reload_tss(); |
687 | save_msrs(vmx->guest_msrs, vmx->save_nmsrs); | 688 | #ifdef CONFIG_X86_64 |
688 | load_msrs(vmx->host_msrs, vmx->save_nmsrs); | 689 | if (is_long_mode(&vmx->vcpu)) { |
689 | reload_host_efer(vmx); | 690 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
691 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | ||
692 | } | ||
693 | #endif | ||
690 | } | 694 | } |
691 | 695 | ||
692 | static void vmx_load_host_state(struct vcpu_vmx *vmx) | 696 | static void vmx_load_host_state(struct vcpu_vmx *vmx) |
@@ -877,19 +881,14 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
877 | /* | 881 | /* |
878 | * Swap MSR entry in host/guest MSR entry array. | 882 | * Swap MSR entry in host/guest MSR entry array. |
879 | */ | 883 | */ |
880 | #ifdef CONFIG_X86_64 | ||
881 | static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | 884 | static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) |
882 | { | 885 | { |
883 | struct kvm_msr_entry tmp; | 886 | struct shared_msr_entry tmp; |
884 | 887 | ||
885 | tmp = vmx->guest_msrs[to]; | 888 | tmp = vmx->guest_msrs[to]; |
886 | vmx->guest_msrs[to] = vmx->guest_msrs[from]; | 889 | vmx->guest_msrs[to] = vmx->guest_msrs[from]; |
887 | vmx->guest_msrs[from] = tmp; | 890 | vmx->guest_msrs[from] = tmp; |
888 | tmp = vmx->host_msrs[to]; | ||
889 | vmx->host_msrs[to] = vmx->host_msrs[from]; | ||
890 | vmx->host_msrs[from] = tmp; | ||
891 | } | 891 | } |
892 | #endif | ||
893 | 892 | ||
894 | /* | 893 | /* |
895 | * Set up the vmcs to automatically save and restore system | 894 | * Set up the vmcs to automatically save and restore system |
@@ -898,15 +897,13 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | |||
898 | */ | 897 | */ |
899 | static void setup_msrs(struct vcpu_vmx *vmx) | 898 | static void setup_msrs(struct vcpu_vmx *vmx) |
900 | { | 899 | { |
901 | int save_nmsrs; | 900 | int save_nmsrs, index; |
902 | unsigned long *msr_bitmap; | 901 | unsigned long *msr_bitmap; |
903 | 902 | ||
904 | vmx_load_host_state(vmx); | 903 | vmx_load_host_state(vmx); |
905 | save_nmsrs = 0; | 904 | save_nmsrs = 0; |
906 | #ifdef CONFIG_X86_64 | 905 | #ifdef CONFIG_X86_64 |
907 | if (is_long_mode(&vmx->vcpu)) { | 906 | if (is_long_mode(&vmx->vcpu)) { |
908 | int index; | ||
909 | |||
910 | index = __find_msr_index(vmx, MSR_SYSCALL_MASK); | 907 | index = __find_msr_index(vmx, MSR_SYSCALL_MASK); |
911 | if (index >= 0) | 908 | if (index >= 0) |
912 | move_msr_up(vmx, index, save_nmsrs++); | 909 | move_msr_up(vmx, index, save_nmsrs++); |
@@ -916,9 +913,6 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
916 | index = __find_msr_index(vmx, MSR_CSTAR); | 913 | index = __find_msr_index(vmx, MSR_CSTAR); |
917 | if (index >= 0) | 914 | if (index >= 0) |
918 | move_msr_up(vmx, index, save_nmsrs++); | 915 | move_msr_up(vmx, index, save_nmsrs++); |
919 | index = __find_msr_index(vmx, MSR_KERNEL_GS_BASE); | ||
920 | if (index >= 0) | ||
921 | move_msr_up(vmx, index, save_nmsrs++); | ||
922 | /* | 916 | /* |
923 | * MSR_K6_STAR is only needed on long mode guests, and only | 917 | * MSR_K6_STAR is only needed on long mode guests, and only |
924 | * if efer.sce is enabled. | 918 | * if efer.sce is enabled. |
@@ -928,13 +922,11 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
928 | move_msr_up(vmx, index, save_nmsrs++); | 922 | move_msr_up(vmx, index, save_nmsrs++); |
929 | } | 923 | } |
930 | #endif | 924 | #endif |
931 | vmx->save_nmsrs = save_nmsrs; | 925 | index = __find_msr_index(vmx, MSR_EFER); |
926 | if (index >= 0 && update_transition_efer(vmx, index)) | ||
927 | move_msr_up(vmx, index, save_nmsrs++); | ||
932 | 928 | ||
933 | #ifdef CONFIG_X86_64 | 929 | vmx->save_nmsrs = save_nmsrs; |
934 | vmx->msr_offset_kernel_gs_base = | ||
935 | __find_msr_index(vmx, MSR_KERNEL_GS_BASE); | ||
936 | #endif | ||
937 | vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER); | ||
938 | 930 | ||
939 | if (cpu_has_vmx_msr_bitmap()) { | 931 | if (cpu_has_vmx_msr_bitmap()) { |
940 | if (is_long_mode(&vmx->vcpu)) | 932 | if (is_long_mode(&vmx->vcpu)) |
@@ -976,7 +968,7 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) | |||
976 | static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | 968 | static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) |
977 | { | 969 | { |
978 | u64 data; | 970 | u64 data; |
979 | struct kvm_msr_entry *msr; | 971 | struct shared_msr_entry *msr; |
980 | 972 | ||
981 | if (!pdata) { | 973 | if (!pdata) { |
982 | printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); | 974 | printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); |
@@ -991,9 +983,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
991 | case MSR_GS_BASE: | 983 | case MSR_GS_BASE: |
992 | data = vmcs_readl(GUEST_GS_BASE); | 984 | data = vmcs_readl(GUEST_GS_BASE); |
993 | break; | 985 | break; |
986 | case MSR_KERNEL_GS_BASE: | ||
987 | vmx_load_host_state(to_vmx(vcpu)); | ||
988 | data = to_vmx(vcpu)->msr_guest_kernel_gs_base; | ||
989 | break; | ||
990 | #endif | ||
994 | case MSR_EFER: | 991 | case MSR_EFER: |
995 | return kvm_get_msr_common(vcpu, msr_index, pdata); | 992 | return kvm_get_msr_common(vcpu, msr_index, pdata); |
996 | #endif | ||
997 | case MSR_IA32_TSC: | 993 | case MSR_IA32_TSC: |
998 | data = guest_read_tsc(); | 994 | data = guest_read_tsc(); |
999 | break; | 995 | break; |
@@ -1007,6 +1003,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
1007 | data = vmcs_readl(GUEST_SYSENTER_ESP); | 1003 | data = vmcs_readl(GUEST_SYSENTER_ESP); |
1008 | break; | 1004 | break; |
1009 | default: | 1005 | default: |
1006 | vmx_load_host_state(to_vmx(vcpu)); | ||
1010 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 1007 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
1011 | if (msr) { | 1008 | if (msr) { |
1012 | vmx_load_host_state(to_vmx(vcpu)); | 1009 | vmx_load_host_state(to_vmx(vcpu)); |
@@ -1028,7 +1025,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
1028 | static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | 1025 | static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) |
1029 | { | 1026 | { |
1030 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1027 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1031 | struct kvm_msr_entry *msr; | 1028 | struct shared_msr_entry *msr; |
1032 | u64 host_tsc; | 1029 | u64 host_tsc; |
1033 | int ret = 0; | 1030 | int ret = 0; |
1034 | 1031 | ||
@@ -1044,6 +1041,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1044 | case MSR_GS_BASE: | 1041 | case MSR_GS_BASE: |
1045 | vmcs_writel(GUEST_GS_BASE, data); | 1042 | vmcs_writel(GUEST_GS_BASE, data); |
1046 | break; | 1043 | break; |
1044 | case MSR_KERNEL_GS_BASE: | ||
1045 | vmx_load_host_state(vmx); | ||
1046 | vmx->msr_guest_kernel_gs_base = data; | ||
1047 | break; | ||
1047 | #endif | 1048 | #endif |
1048 | case MSR_IA32_SYSENTER_CS: | 1049 | case MSR_IA32_SYSENTER_CS: |
1049 | vmcs_write32(GUEST_SYSENTER_CS, data); | 1050 | vmcs_write32(GUEST_SYSENTER_CS, data); |
@@ -1097,30 +1098,14 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | |||
1097 | } | 1098 | } |
1098 | } | 1099 | } |
1099 | 1100 | ||
1100 | static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | 1101 | static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) |
1101 | { | 1102 | { |
1102 | int old_debug = vcpu->guest_debug; | ||
1103 | unsigned long flags; | ||
1104 | |||
1105 | vcpu->guest_debug = dbg->control; | ||
1106 | if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) | ||
1107 | vcpu->guest_debug = 0; | ||
1108 | |||
1109 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1103 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
1110 | vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); | 1104 | vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); |
1111 | else | 1105 | else |
1112 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); | 1106 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); |
1113 | 1107 | ||
1114 | flags = vmcs_readl(GUEST_RFLAGS); | ||
1115 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
1116 | flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
1117 | else if (old_debug & KVM_GUESTDBG_SINGLESTEP) | ||
1118 | flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
1119 | vmcs_writel(GUEST_RFLAGS, flags); | ||
1120 | |||
1121 | update_exception_bitmap(vcpu); | 1108 | update_exception_bitmap(vcpu); |
1122 | |||
1123 | return 0; | ||
1124 | } | 1109 | } |
1125 | 1110 | ||
1126 | static __init int cpu_has_kvm_support(void) | 1111 | static __init int cpu_has_kvm_support(void) |
@@ -1139,12 +1124,15 @@ static __init int vmx_disabled_by_bios(void) | |||
1139 | /* locked but not enabled */ | 1124 | /* locked but not enabled */ |
1140 | } | 1125 | } |
1141 | 1126 | ||
1142 | static void hardware_enable(void *garbage) | 1127 | static int hardware_enable(void *garbage) |
1143 | { | 1128 | { |
1144 | int cpu = raw_smp_processor_id(); | 1129 | int cpu = raw_smp_processor_id(); |
1145 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | 1130 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); |
1146 | u64 old; | 1131 | u64 old; |
1147 | 1132 | ||
1133 | if (read_cr4() & X86_CR4_VMXE) | ||
1134 | return -EBUSY; | ||
1135 | |||
1148 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); | 1136 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); |
1149 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); | 1137 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); |
1150 | if ((old & (FEATURE_CONTROL_LOCKED | | 1138 | if ((old & (FEATURE_CONTROL_LOCKED | |
@@ -1159,6 +1147,10 @@ static void hardware_enable(void *garbage) | |||
1159 | asm volatile (ASM_VMX_VMXON_RAX | 1147 | asm volatile (ASM_VMX_VMXON_RAX |
1160 | : : "a"(&phys_addr), "m"(phys_addr) | 1148 | : : "a"(&phys_addr), "m"(phys_addr) |
1161 | : "memory", "cc"); | 1149 | : "memory", "cc"); |
1150 | |||
1151 | ept_sync_global(); | ||
1152 | |||
1153 | return 0; | ||
1162 | } | 1154 | } |
1163 | 1155 | ||
1164 | static void vmclear_local_vcpus(void) | 1156 | static void vmclear_local_vcpus(void) |
@@ -1250,7 +1242,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1250 | SECONDARY_EXEC_WBINVD_EXITING | | 1242 | SECONDARY_EXEC_WBINVD_EXITING | |
1251 | SECONDARY_EXEC_ENABLE_VPID | | 1243 | SECONDARY_EXEC_ENABLE_VPID | |
1252 | SECONDARY_EXEC_ENABLE_EPT | | 1244 | SECONDARY_EXEC_ENABLE_EPT | |
1253 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 1245 | SECONDARY_EXEC_UNRESTRICTED_GUEST | |
1246 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
1254 | if (adjust_vmx_controls(min2, opt2, | 1247 | if (adjust_vmx_controls(min2, opt2, |
1255 | MSR_IA32_VMX_PROCBASED_CTLS2, | 1248 | MSR_IA32_VMX_PROCBASED_CTLS2, |
1256 | &_cpu_based_2nd_exec_control) < 0) | 1249 | &_cpu_based_2nd_exec_control) < 0) |
@@ -1344,15 +1337,17 @@ static void free_kvm_area(void) | |||
1344 | { | 1337 | { |
1345 | int cpu; | 1338 | int cpu; |
1346 | 1339 | ||
1347 | for_each_online_cpu(cpu) | 1340 | for_each_possible_cpu(cpu) { |
1348 | free_vmcs(per_cpu(vmxarea, cpu)); | 1341 | free_vmcs(per_cpu(vmxarea, cpu)); |
1342 | per_cpu(vmxarea, cpu) = NULL; | ||
1343 | } | ||
1349 | } | 1344 | } |
1350 | 1345 | ||
1351 | static __init int alloc_kvm_area(void) | 1346 | static __init int alloc_kvm_area(void) |
1352 | { | 1347 | { |
1353 | int cpu; | 1348 | int cpu; |
1354 | 1349 | ||
1355 | for_each_online_cpu(cpu) { | 1350 | for_each_possible_cpu(cpu) { |
1356 | struct vmcs *vmcs; | 1351 | struct vmcs *vmcs; |
1357 | 1352 | ||
1358 | vmcs = alloc_vmcs_cpu(cpu); | 1353 | vmcs = alloc_vmcs_cpu(cpu); |
@@ -1394,6 +1389,9 @@ static __init int hardware_setup(void) | |||
1394 | if (enable_ept && !cpu_has_vmx_ept_2m_page()) | 1389 | if (enable_ept && !cpu_has_vmx_ept_2m_page()) |
1395 | kvm_disable_largepages(); | 1390 | kvm_disable_largepages(); |
1396 | 1391 | ||
1392 | if (!cpu_has_vmx_ple()) | ||
1393 | ple_gap = 0; | ||
1394 | |||
1397 | return alloc_kvm_area(); | 1395 | return alloc_kvm_area(); |
1398 | } | 1396 | } |
1399 | 1397 | ||
@@ -1536,8 +1534,16 @@ continue_rmode: | |||
1536 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 1534 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
1537 | { | 1535 | { |
1538 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1536 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1539 | struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); | 1537 | struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); |
1538 | |||
1539 | if (!msr) | ||
1540 | return; | ||
1540 | 1541 | ||
1542 | /* | ||
1543 | * Force kernel_gs_base reloading before EFER changes, as control | ||
1544 | * of this msr depends on is_long_mode(). | ||
1545 | */ | ||
1546 | vmx_load_host_state(to_vmx(vcpu)); | ||
1541 | vcpu->arch.shadow_efer = efer; | 1547 | vcpu->arch.shadow_efer = efer; |
1542 | if (!msr) | 1548 | if (!msr) |
1543 | return; | 1549 | return; |
@@ -1727,6 +1733,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
1727 | vmcs_write64(EPT_POINTER, eptp); | 1733 | vmcs_write64(EPT_POINTER, eptp); |
1728 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : | 1734 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : |
1729 | vcpu->kvm->arch.ept_identity_map_addr; | 1735 | vcpu->kvm->arch.ept_identity_map_addr; |
1736 | ept_load_pdptrs(vcpu); | ||
1730 | } | 1737 | } |
1731 | 1738 | ||
1732 | vmx_flush_tlb(vcpu); | 1739 | vmx_flush_tlb(vcpu); |
@@ -2302,13 +2309,22 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2302 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 2309 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
2303 | if (vmx->vpid == 0) | 2310 | if (vmx->vpid == 0) |
2304 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | 2311 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
2305 | if (!enable_ept) | 2312 | if (!enable_ept) { |
2306 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | 2313 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; |
2314 | enable_unrestricted_guest = 0; | ||
2315 | } | ||
2307 | if (!enable_unrestricted_guest) | 2316 | if (!enable_unrestricted_guest) |
2308 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 2317 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
2318 | if (!ple_gap) | ||
2319 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
2309 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 2320 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
2310 | } | 2321 | } |
2311 | 2322 | ||
2323 | if (ple_gap) { | ||
2324 | vmcs_write32(PLE_GAP, ple_gap); | ||
2325 | vmcs_write32(PLE_WINDOW, ple_window); | ||
2326 | } | ||
2327 | |||
2312 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); | 2328 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); |
2313 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); | 2329 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); |
2314 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ | 2330 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ |
@@ -2376,10 +2392,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2376 | if (wrmsr_safe(index, data_low, data_high) < 0) | 2392 | if (wrmsr_safe(index, data_low, data_high) < 0) |
2377 | continue; | 2393 | continue; |
2378 | data = data_low | ((u64)data_high << 32); | 2394 | data = data_low | ((u64)data_high << 32); |
2379 | vmx->host_msrs[j].index = index; | 2395 | vmx->guest_msrs[j].index = i; |
2380 | vmx->host_msrs[j].reserved = 0; | 2396 | vmx->guest_msrs[j].data = 0; |
2381 | vmx->host_msrs[j].data = data; | 2397 | vmx->guest_msrs[j].mask = -1ull; |
2382 | vmx->guest_msrs[j] = vmx->host_msrs[j]; | ||
2383 | ++vmx->nmsrs; | 2398 | ++vmx->nmsrs; |
2384 | } | 2399 | } |
2385 | 2400 | ||
@@ -2510,7 +2525,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2510 | if (vmx->vpid != 0) | 2525 | if (vmx->vpid != 0) |
2511 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | 2526 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); |
2512 | 2527 | ||
2513 | vmx->vcpu.arch.cr0 = 0x60000010; | 2528 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
2514 | vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ | 2529 | vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ |
2515 | vmx_set_cr4(&vmx->vcpu, 0); | 2530 | vmx_set_cr4(&vmx->vcpu, 0); |
2516 | vmx_set_efer(&vmx->vcpu, 0); | 2531 | vmx_set_efer(&vmx->vcpu, 0); |
@@ -2627,6 +2642,34 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2627 | GUEST_INTR_STATE_NMI)); | 2642 | GUEST_INTR_STATE_NMI)); |
2628 | } | 2643 | } |
2629 | 2644 | ||
2645 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | ||
2646 | { | ||
2647 | if (!cpu_has_virtual_nmis()) | ||
2648 | return to_vmx(vcpu)->soft_vnmi_blocked; | ||
2649 | else | ||
2650 | return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
2651 | GUEST_INTR_STATE_NMI); | ||
2652 | } | ||
2653 | |||
2654 | static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | ||
2655 | { | ||
2656 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2657 | |||
2658 | if (!cpu_has_virtual_nmis()) { | ||
2659 | if (vmx->soft_vnmi_blocked != masked) { | ||
2660 | vmx->soft_vnmi_blocked = masked; | ||
2661 | vmx->vnmi_blocked_time = 0; | ||
2662 | } | ||
2663 | } else { | ||
2664 | if (masked) | ||
2665 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
2666 | GUEST_INTR_STATE_NMI); | ||
2667 | else | ||
2668 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
2669 | GUEST_INTR_STATE_NMI); | ||
2670 | } | ||
2671 | } | ||
2672 | |||
2630 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 2673 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
2631 | { | 2674 | { |
2632 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | 2675 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && |
@@ -2659,7 +2702,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
2659 | * Cause the #SS fault with 0 error code in VM86 mode. | 2702 | * Cause the #SS fault with 0 error code in VM86 mode. |
2660 | */ | 2703 | */ |
2661 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) | 2704 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) |
2662 | if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE) | 2705 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) |
2663 | return 1; | 2706 | return 1; |
2664 | /* | 2707 | /* |
2665 | * Forward all other exceptions that are valid in real mode. | 2708 | * Forward all other exceptions that are valid in real mode. |
@@ -2710,15 +2753,16 @@ static void kvm_machine_check(void) | |||
2710 | #endif | 2753 | #endif |
2711 | } | 2754 | } |
2712 | 2755 | ||
2713 | static int handle_machine_check(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2756 | static int handle_machine_check(struct kvm_vcpu *vcpu) |
2714 | { | 2757 | { |
2715 | /* already handled by vcpu_run */ | 2758 | /* already handled by vcpu_run */ |
2716 | return 1; | 2759 | return 1; |
2717 | } | 2760 | } |
2718 | 2761 | ||
2719 | static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2762 | static int handle_exception(struct kvm_vcpu *vcpu) |
2720 | { | 2763 | { |
2721 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2764 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2765 | struct kvm_run *kvm_run = vcpu->run; | ||
2722 | u32 intr_info, ex_no, error_code; | 2766 | u32 intr_info, ex_no, error_code; |
2723 | unsigned long cr2, rip, dr6; | 2767 | unsigned long cr2, rip, dr6; |
2724 | u32 vect_info; | 2768 | u32 vect_info; |
@@ -2728,12 +2772,17 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2728 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 2772 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
2729 | 2773 | ||
2730 | if (is_machine_check(intr_info)) | 2774 | if (is_machine_check(intr_info)) |
2731 | return handle_machine_check(vcpu, kvm_run); | 2775 | return handle_machine_check(vcpu); |
2732 | 2776 | ||
2733 | if ((vect_info & VECTORING_INFO_VALID_MASK) && | 2777 | if ((vect_info & VECTORING_INFO_VALID_MASK) && |
2734 | !is_page_fault(intr_info)) | 2778 | !is_page_fault(intr_info)) { |
2735 | printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " | 2779 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
2736 | "intr info 0x%x\n", __func__, vect_info, intr_info); | 2780 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; |
2781 | vcpu->run->internal.ndata = 2; | ||
2782 | vcpu->run->internal.data[0] = vect_info; | ||
2783 | vcpu->run->internal.data[1] = intr_info; | ||
2784 | return 0; | ||
2785 | } | ||
2737 | 2786 | ||
2738 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) | 2787 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) |
2739 | return 1; /* already handled by vmx_vcpu_run() */ | 2788 | return 1; /* already handled by vmx_vcpu_run() */ |
@@ -2744,7 +2793,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2744 | } | 2793 | } |
2745 | 2794 | ||
2746 | if (is_invalid_opcode(intr_info)) { | 2795 | if (is_invalid_opcode(intr_info)) { |
2747 | er = emulate_instruction(vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); | 2796 | er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD); |
2748 | if (er != EMULATE_DONE) | 2797 | if (er != EMULATE_DONE) |
2749 | kvm_queue_exception(vcpu, UD_VECTOR); | 2798 | kvm_queue_exception(vcpu, UD_VECTOR); |
2750 | return 1; | 2799 | return 1; |
@@ -2803,20 +2852,19 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2803 | return 0; | 2852 | return 0; |
2804 | } | 2853 | } |
2805 | 2854 | ||
2806 | static int handle_external_interrupt(struct kvm_vcpu *vcpu, | 2855 | static int handle_external_interrupt(struct kvm_vcpu *vcpu) |
2807 | struct kvm_run *kvm_run) | ||
2808 | { | 2856 | { |
2809 | ++vcpu->stat.irq_exits; | 2857 | ++vcpu->stat.irq_exits; |
2810 | return 1; | 2858 | return 1; |
2811 | } | 2859 | } |
2812 | 2860 | ||
2813 | static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2861 | static int handle_triple_fault(struct kvm_vcpu *vcpu) |
2814 | { | 2862 | { |
2815 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | 2863 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
2816 | return 0; | 2864 | return 0; |
2817 | } | 2865 | } |
2818 | 2866 | ||
2819 | static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2867 | static int handle_io(struct kvm_vcpu *vcpu) |
2820 | { | 2868 | { |
2821 | unsigned long exit_qualification; | 2869 | unsigned long exit_qualification; |
2822 | int size, in, string; | 2870 | int size, in, string; |
@@ -2827,8 +2875,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2827 | string = (exit_qualification & 16) != 0; | 2875 | string = (exit_qualification & 16) != 0; |
2828 | 2876 | ||
2829 | if (string) { | 2877 | if (string) { |
2830 | if (emulate_instruction(vcpu, | 2878 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) |
2831 | kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) | ||
2832 | return 0; | 2879 | return 0; |
2833 | return 1; | 2880 | return 1; |
2834 | } | 2881 | } |
@@ -2838,7 +2885,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2838 | port = exit_qualification >> 16; | 2885 | port = exit_qualification >> 16; |
2839 | 2886 | ||
2840 | skip_emulated_instruction(vcpu); | 2887 | skip_emulated_instruction(vcpu); |
2841 | return kvm_emulate_pio(vcpu, kvm_run, in, size, port); | 2888 | return kvm_emulate_pio(vcpu, in, size, port); |
2842 | } | 2889 | } |
2843 | 2890 | ||
2844 | static void | 2891 | static void |
@@ -2852,7 +2899,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
2852 | hypercall[2] = 0xc1; | 2899 | hypercall[2] = 0xc1; |
2853 | } | 2900 | } |
2854 | 2901 | ||
2855 | static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2902 | static int handle_cr(struct kvm_vcpu *vcpu) |
2856 | { | 2903 | { |
2857 | unsigned long exit_qualification, val; | 2904 | unsigned long exit_qualification, val; |
2858 | int cr; | 2905 | int cr; |
@@ -2887,7 +2934,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2887 | return 1; | 2934 | return 1; |
2888 | if (cr8_prev <= cr8) | 2935 | if (cr8_prev <= cr8) |
2889 | return 1; | 2936 | return 1; |
2890 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; | 2937 | vcpu->run->exit_reason = KVM_EXIT_SET_TPR; |
2891 | return 0; | 2938 | return 0; |
2892 | } | 2939 | } |
2893 | }; | 2940 | }; |
@@ -2922,13 +2969,13 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2922 | default: | 2969 | default: |
2923 | break; | 2970 | break; |
2924 | } | 2971 | } |
2925 | kvm_run->exit_reason = 0; | 2972 | vcpu->run->exit_reason = 0; |
2926 | pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", | 2973 | pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", |
2927 | (int)(exit_qualification >> 4) & 3, cr); | 2974 | (int)(exit_qualification >> 4) & 3, cr); |
2928 | return 0; | 2975 | return 0; |
2929 | } | 2976 | } |
2930 | 2977 | ||
2931 | static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2978 | static int handle_dr(struct kvm_vcpu *vcpu) |
2932 | { | 2979 | { |
2933 | unsigned long exit_qualification; | 2980 | unsigned long exit_qualification; |
2934 | unsigned long val; | 2981 | unsigned long val; |
@@ -2944,13 +2991,13 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2944 | * guest debugging itself. | 2991 | * guest debugging itself. |
2945 | */ | 2992 | */ |
2946 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { | 2993 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { |
2947 | kvm_run->debug.arch.dr6 = vcpu->arch.dr6; | 2994 | vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; |
2948 | kvm_run->debug.arch.dr7 = dr; | 2995 | vcpu->run->debug.arch.dr7 = dr; |
2949 | kvm_run->debug.arch.pc = | 2996 | vcpu->run->debug.arch.pc = |
2950 | vmcs_readl(GUEST_CS_BASE) + | 2997 | vmcs_readl(GUEST_CS_BASE) + |
2951 | vmcs_readl(GUEST_RIP); | 2998 | vmcs_readl(GUEST_RIP); |
2952 | kvm_run->debug.arch.exception = DB_VECTOR; | 2999 | vcpu->run->debug.arch.exception = DB_VECTOR; |
2953 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 3000 | vcpu->run->exit_reason = KVM_EXIT_DEBUG; |
2954 | return 0; | 3001 | return 0; |
2955 | } else { | 3002 | } else { |
2956 | vcpu->arch.dr7 &= ~DR7_GD; | 3003 | vcpu->arch.dr7 &= ~DR7_GD; |
@@ -3016,13 +3063,13 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3016 | return 1; | 3063 | return 1; |
3017 | } | 3064 | } |
3018 | 3065 | ||
3019 | static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3066 | static int handle_cpuid(struct kvm_vcpu *vcpu) |
3020 | { | 3067 | { |
3021 | kvm_emulate_cpuid(vcpu); | 3068 | kvm_emulate_cpuid(vcpu); |
3022 | return 1; | 3069 | return 1; |
3023 | } | 3070 | } |
3024 | 3071 | ||
3025 | static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3072 | static int handle_rdmsr(struct kvm_vcpu *vcpu) |
3026 | { | 3073 | { |
3027 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3074 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; |
3028 | u64 data; | 3075 | u64 data; |
@@ -3041,7 +3088,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3041 | return 1; | 3088 | return 1; |
3042 | } | 3089 | } |
3043 | 3090 | ||
3044 | static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3091 | static int handle_wrmsr(struct kvm_vcpu *vcpu) |
3045 | { | 3092 | { |
3046 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3093 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; |
3047 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) | 3094 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) |
@@ -3058,14 +3105,12 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3058 | return 1; | 3105 | return 1; |
3059 | } | 3106 | } |
3060 | 3107 | ||
3061 | static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu, | 3108 | static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) |
3062 | struct kvm_run *kvm_run) | ||
3063 | { | 3109 | { |
3064 | return 1; | 3110 | return 1; |
3065 | } | 3111 | } |
3066 | 3112 | ||
3067 | static int handle_interrupt_window(struct kvm_vcpu *vcpu, | 3113 | static int handle_interrupt_window(struct kvm_vcpu *vcpu) |
3068 | struct kvm_run *kvm_run) | ||
3069 | { | 3114 | { |
3070 | u32 cpu_based_vm_exec_control; | 3115 | u32 cpu_based_vm_exec_control; |
3071 | 3116 | ||
@@ -3081,34 +3126,34 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
3081 | * possible | 3126 | * possible |
3082 | */ | 3127 | */ |
3083 | if (!irqchip_in_kernel(vcpu->kvm) && | 3128 | if (!irqchip_in_kernel(vcpu->kvm) && |
3084 | kvm_run->request_interrupt_window && | 3129 | vcpu->run->request_interrupt_window && |
3085 | !kvm_cpu_has_interrupt(vcpu)) { | 3130 | !kvm_cpu_has_interrupt(vcpu)) { |
3086 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 3131 | vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
3087 | return 0; | 3132 | return 0; |
3088 | } | 3133 | } |
3089 | return 1; | 3134 | return 1; |
3090 | } | 3135 | } |
3091 | 3136 | ||
3092 | static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3137 | static int handle_halt(struct kvm_vcpu *vcpu) |
3093 | { | 3138 | { |
3094 | skip_emulated_instruction(vcpu); | 3139 | skip_emulated_instruction(vcpu); |
3095 | return kvm_emulate_halt(vcpu); | 3140 | return kvm_emulate_halt(vcpu); |
3096 | } | 3141 | } |
3097 | 3142 | ||
3098 | static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3143 | static int handle_vmcall(struct kvm_vcpu *vcpu) |
3099 | { | 3144 | { |
3100 | skip_emulated_instruction(vcpu); | 3145 | skip_emulated_instruction(vcpu); |
3101 | kvm_emulate_hypercall(vcpu); | 3146 | kvm_emulate_hypercall(vcpu); |
3102 | return 1; | 3147 | return 1; |
3103 | } | 3148 | } |
3104 | 3149 | ||
3105 | static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3150 | static int handle_vmx_insn(struct kvm_vcpu *vcpu) |
3106 | { | 3151 | { |
3107 | kvm_queue_exception(vcpu, UD_VECTOR); | 3152 | kvm_queue_exception(vcpu, UD_VECTOR); |
3108 | return 1; | 3153 | return 1; |
3109 | } | 3154 | } |
3110 | 3155 | ||
3111 | static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3156 | static int handle_invlpg(struct kvm_vcpu *vcpu) |
3112 | { | 3157 | { |
3113 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3158 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
3114 | 3159 | ||
@@ -3117,14 +3162,14 @@ static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3117 | return 1; | 3162 | return 1; |
3118 | } | 3163 | } |
3119 | 3164 | ||
3120 | static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3165 | static int handle_wbinvd(struct kvm_vcpu *vcpu) |
3121 | { | 3166 | { |
3122 | skip_emulated_instruction(vcpu); | 3167 | skip_emulated_instruction(vcpu); |
3123 | /* TODO: Add support for VT-d/pass-through device */ | 3168 | /* TODO: Add support for VT-d/pass-through device */ |
3124 | return 1; | 3169 | return 1; |
3125 | } | 3170 | } |
3126 | 3171 | ||
3127 | static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3172 | static int handle_apic_access(struct kvm_vcpu *vcpu) |
3128 | { | 3173 | { |
3129 | unsigned long exit_qualification; | 3174 | unsigned long exit_qualification; |
3130 | enum emulation_result er; | 3175 | enum emulation_result er; |
@@ -3133,7 +3178,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3133 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3178 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
3134 | offset = exit_qualification & 0xffful; | 3179 | offset = exit_qualification & 0xffful; |
3135 | 3180 | ||
3136 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 3181 | er = emulate_instruction(vcpu, 0, 0, 0); |
3137 | 3182 | ||
3138 | if (er != EMULATE_DONE) { | 3183 | if (er != EMULATE_DONE) { |
3139 | printk(KERN_ERR | 3184 | printk(KERN_ERR |
@@ -3144,7 +3189,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3144 | return 1; | 3189 | return 1; |
3145 | } | 3190 | } |
3146 | 3191 | ||
3147 | static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3192 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
3148 | { | 3193 | { |
3149 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3194 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3150 | unsigned long exit_qualification; | 3195 | unsigned long exit_qualification; |
@@ -3198,7 +3243,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3198 | return 1; | 3243 | return 1; |
3199 | } | 3244 | } |
3200 | 3245 | ||
3201 | static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3246 | static int handle_ept_violation(struct kvm_vcpu *vcpu) |
3202 | { | 3247 | { |
3203 | unsigned long exit_qualification; | 3248 | unsigned long exit_qualification; |
3204 | gpa_t gpa; | 3249 | gpa_t gpa; |
@@ -3219,8 +3264,8 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3219 | vmcs_readl(GUEST_LINEAR_ADDRESS)); | 3264 | vmcs_readl(GUEST_LINEAR_ADDRESS)); |
3220 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", | 3265 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", |
3221 | (long unsigned int)exit_qualification); | 3266 | (long unsigned int)exit_qualification); |
3222 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3267 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; |
3223 | kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; | 3268 | vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; |
3224 | return 0; | 3269 | return 0; |
3225 | } | 3270 | } |
3226 | 3271 | ||
@@ -3290,7 +3335,7 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte, | |||
3290 | } | 3335 | } |
3291 | } | 3336 | } |
3292 | 3337 | ||
3293 | static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3338 | static int handle_ept_misconfig(struct kvm_vcpu *vcpu) |
3294 | { | 3339 | { |
3295 | u64 sptes[4]; | 3340 | u64 sptes[4]; |
3296 | int nr_sptes, i; | 3341 | int nr_sptes, i; |
@@ -3306,13 +3351,13 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3306 | for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i) | 3351 | for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i) |
3307 | ept_misconfig_inspect_spte(vcpu, sptes[i-1], i); | 3352 | ept_misconfig_inspect_spte(vcpu, sptes[i-1], i); |
3308 | 3353 | ||
3309 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3354 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; |
3310 | kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; | 3355 | vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; |
3311 | 3356 | ||
3312 | return 0; | 3357 | return 0; |
3313 | } | 3358 | } |
3314 | 3359 | ||
3315 | static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3360 | static int handle_nmi_window(struct kvm_vcpu *vcpu) |
3316 | { | 3361 | { |
3317 | u32 cpu_based_vm_exec_control; | 3362 | u32 cpu_based_vm_exec_control; |
3318 | 3363 | ||
@@ -3325,36 +3370,50 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3325 | return 1; | 3370 | return 1; |
3326 | } | 3371 | } |
3327 | 3372 | ||
3328 | static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | 3373 | static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) |
3329 | struct kvm_run *kvm_run) | ||
3330 | { | 3374 | { |
3331 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3375 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3332 | enum emulation_result err = EMULATE_DONE; | 3376 | enum emulation_result err = EMULATE_DONE; |
3333 | 3377 | int ret = 1; | |
3334 | local_irq_enable(); | ||
3335 | preempt_enable(); | ||
3336 | 3378 | ||
3337 | while (!guest_state_valid(vcpu)) { | 3379 | while (!guest_state_valid(vcpu)) { |
3338 | err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 3380 | err = emulate_instruction(vcpu, 0, 0, 0); |
3339 | 3381 | ||
3340 | if (err == EMULATE_DO_MMIO) | 3382 | if (err == EMULATE_DO_MMIO) { |
3341 | break; | 3383 | ret = 0; |
3384 | goto out; | ||
3385 | } | ||
3342 | 3386 | ||
3343 | if (err != EMULATE_DONE) { | 3387 | if (err != EMULATE_DONE) { |
3344 | kvm_report_emulation_failure(vcpu, "emulation failure"); | 3388 | kvm_report_emulation_failure(vcpu, "emulation failure"); |
3345 | break; | 3389 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
3390 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
3391 | vcpu->run->internal.ndata = 0; | ||
3392 | ret = 0; | ||
3393 | goto out; | ||
3346 | } | 3394 | } |
3347 | 3395 | ||
3348 | if (signal_pending(current)) | 3396 | if (signal_pending(current)) |
3349 | break; | 3397 | goto out; |
3350 | if (need_resched()) | 3398 | if (need_resched()) |
3351 | schedule(); | 3399 | schedule(); |
3352 | } | 3400 | } |
3353 | 3401 | ||
3354 | preempt_disable(); | 3402 | vmx->emulation_required = 0; |
3355 | local_irq_disable(); | 3403 | out: |
3404 | return ret; | ||
3405 | } | ||
3356 | 3406 | ||
3357 | vmx->invalid_state_emulation_result = err; | 3407 | /* |
3408 | * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE | ||
3409 | * exiting, so only get here on cpu with PAUSE-Loop-Exiting. | ||
3410 | */ | ||
3411 | static int handle_pause(struct kvm_vcpu *vcpu) | ||
3412 | { | ||
3413 | skip_emulated_instruction(vcpu); | ||
3414 | kvm_vcpu_on_spin(vcpu); | ||
3415 | |||
3416 | return 1; | ||
3358 | } | 3417 | } |
3359 | 3418 | ||
3360 | /* | 3419 | /* |
@@ -3362,8 +3421,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | |||
3362 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 3421 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
3363 | * to be done to userspace and return 0. | 3422 | * to be done to userspace and return 0. |
3364 | */ | 3423 | */ |
3365 | static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | 3424 | static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { |
3366 | struct kvm_run *kvm_run) = { | ||
3367 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception, | 3425 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception, |
3368 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, | 3426 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, |
3369 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, | 3427 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, |
@@ -3394,6 +3452,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
3394 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, | 3452 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, |
3395 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | 3453 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, |
3396 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, | 3454 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, |
3455 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, | ||
3397 | }; | 3456 | }; |
3398 | 3457 | ||
3399 | static const int kvm_vmx_max_exit_handlers = | 3458 | static const int kvm_vmx_max_exit_handlers = |
@@ -3403,7 +3462,7 @@ static const int kvm_vmx_max_exit_handlers = | |||
3403 | * The guest has exited. See if we can fix it or if we need userspace | 3462 | * The guest has exited. See if we can fix it or if we need userspace |
3404 | * assistance. | 3463 | * assistance. |
3405 | */ | 3464 | */ |
3406 | static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 3465 | static int vmx_handle_exit(struct kvm_vcpu *vcpu) |
3407 | { | 3466 | { |
3408 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3467 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3409 | u32 exit_reason = vmx->exit_reason; | 3468 | u32 exit_reason = vmx->exit_reason; |
@@ -3411,13 +3470,9 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3411 | 3470 | ||
3412 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); | 3471 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); |
3413 | 3472 | ||
3414 | /* If we need to emulate an MMIO from handle_invalid_guest_state | 3473 | /* If guest state is invalid, start emulating */ |
3415 | * we just return 0 */ | 3474 | if (vmx->emulation_required && emulate_invalid_guest_state) |
3416 | if (vmx->emulation_required && emulate_invalid_guest_state) { | 3475 | return handle_invalid_guest_state(vcpu); |
3417 | if (guest_state_valid(vcpu)) | ||
3418 | vmx->emulation_required = 0; | ||
3419 | return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO; | ||
3420 | } | ||
3421 | 3476 | ||
3422 | /* Access CR3 don't cause VMExit in paging mode, so we need | 3477 | /* Access CR3 don't cause VMExit in paging mode, so we need |
3423 | * to sync with guest real CR3. */ | 3478 | * to sync with guest real CR3. */ |
@@ -3425,8 +3480,8 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3425 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | 3480 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); |
3426 | 3481 | ||
3427 | if (unlikely(vmx->fail)) { | 3482 | if (unlikely(vmx->fail)) { |
3428 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 3483 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
3429 | kvm_run->fail_entry.hardware_entry_failure_reason | 3484 | vcpu->run->fail_entry.hardware_entry_failure_reason |
3430 | = vmcs_read32(VM_INSTRUCTION_ERROR); | 3485 | = vmcs_read32(VM_INSTRUCTION_ERROR); |
3431 | return 0; | 3486 | return 0; |
3432 | } | 3487 | } |
@@ -3459,10 +3514,10 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3459 | 3514 | ||
3460 | if (exit_reason < kvm_vmx_max_exit_handlers | 3515 | if (exit_reason < kvm_vmx_max_exit_handlers |
3461 | && kvm_vmx_exit_handlers[exit_reason]) | 3516 | && kvm_vmx_exit_handlers[exit_reason]) |
3462 | return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); | 3517 | return kvm_vmx_exit_handlers[exit_reason](vcpu); |
3463 | else { | 3518 | else { |
3464 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3519 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; |
3465 | kvm_run->hw.hardware_exit_reason = exit_reason; | 3520 | vcpu->run->hw.hardware_exit_reason = exit_reason; |
3466 | } | 3521 | } |
3467 | return 0; | 3522 | return 0; |
3468 | } | 3523 | } |
@@ -3600,23 +3655,18 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) | |||
3600 | #define Q "l" | 3655 | #define Q "l" |
3601 | #endif | 3656 | #endif |
3602 | 3657 | ||
3603 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3658 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu) |
3604 | { | 3659 | { |
3605 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3660 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3606 | 3661 | ||
3607 | if (enable_ept && is_paging(vcpu)) { | ||
3608 | vmcs_writel(GUEST_CR3, vcpu->arch.cr3); | ||
3609 | ept_load_pdptrs(vcpu); | ||
3610 | } | ||
3611 | /* Record the guest's net vcpu time for enforced NMI injections. */ | 3662 | /* Record the guest's net vcpu time for enforced NMI injections. */ |
3612 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) | 3663 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) |
3613 | vmx->entry_time = ktime_get(); | 3664 | vmx->entry_time = ktime_get(); |
3614 | 3665 | ||
3615 | /* Handle invalid guest state instead of entering VMX */ | 3666 | /* Don't enter VMX if guest state is invalid, let the exit handler |
3616 | if (vmx->emulation_required && emulate_invalid_guest_state) { | 3667 | start emulation until we arrive back to a valid state */ |
3617 | handle_invalid_guest_state(vcpu, kvm_run); | 3668 | if (vmx->emulation_required && emulate_invalid_guest_state) |
3618 | return; | 3669 | return; |
3619 | } | ||
3620 | 3670 | ||
3621 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) | 3671 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) |
3622 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); | 3672 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); |
@@ -3775,7 +3825,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
3775 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | 3825 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); |
3776 | spin_unlock(&vmx_vpid_lock); | 3826 | spin_unlock(&vmx_vpid_lock); |
3777 | vmx_free_vmcs(vcpu); | 3827 | vmx_free_vmcs(vcpu); |
3778 | kfree(vmx->host_msrs); | ||
3779 | kfree(vmx->guest_msrs); | 3828 | kfree(vmx->guest_msrs); |
3780 | kvm_vcpu_uninit(vcpu); | 3829 | kvm_vcpu_uninit(vcpu); |
3781 | kmem_cache_free(kvm_vcpu_cache, vmx); | 3830 | kmem_cache_free(kvm_vcpu_cache, vmx); |
@@ -3802,10 +3851,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
3802 | goto uninit_vcpu; | 3851 | goto uninit_vcpu; |
3803 | } | 3852 | } |
3804 | 3853 | ||
3805 | vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
3806 | if (!vmx->host_msrs) | ||
3807 | goto free_guest_msrs; | ||
3808 | |||
3809 | vmx->vmcs = alloc_vmcs(); | 3854 | vmx->vmcs = alloc_vmcs(); |
3810 | if (!vmx->vmcs) | 3855 | if (!vmx->vmcs) |
3811 | goto free_msrs; | 3856 | goto free_msrs; |
@@ -3836,8 +3881,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
3836 | free_vmcs: | 3881 | free_vmcs: |
3837 | free_vmcs(vmx->vmcs); | 3882 | free_vmcs(vmx->vmcs); |
3838 | free_msrs: | 3883 | free_msrs: |
3839 | kfree(vmx->host_msrs); | ||
3840 | free_guest_msrs: | ||
3841 | kfree(vmx->guest_msrs); | 3884 | kfree(vmx->guest_msrs); |
3842 | uninit_vcpu: | 3885 | uninit_vcpu: |
3843 | kvm_vcpu_uninit(&vmx->vcpu); | 3886 | kvm_vcpu_uninit(&vmx->vcpu); |
@@ -3973,6 +4016,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3973 | .queue_exception = vmx_queue_exception, | 4016 | .queue_exception = vmx_queue_exception, |
3974 | .interrupt_allowed = vmx_interrupt_allowed, | 4017 | .interrupt_allowed = vmx_interrupt_allowed, |
3975 | .nmi_allowed = vmx_nmi_allowed, | 4018 | .nmi_allowed = vmx_nmi_allowed, |
4019 | .get_nmi_mask = vmx_get_nmi_mask, | ||
4020 | .set_nmi_mask = vmx_set_nmi_mask, | ||
3976 | .enable_nmi_window = enable_nmi_window, | 4021 | .enable_nmi_window = enable_nmi_window, |
3977 | .enable_irq_window = enable_irq_window, | 4022 | .enable_irq_window = enable_irq_window, |
3978 | .update_cr8_intercept = update_cr8_intercept, | 4023 | .update_cr8_intercept = update_cr8_intercept, |
@@ -3987,7 +4032,12 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3987 | 4032 | ||
3988 | static int __init vmx_init(void) | 4033 | static int __init vmx_init(void) |
3989 | { | 4034 | { |
3990 | int r; | 4035 | int r, i; |
4036 | |||
4037 | rdmsrl_safe(MSR_EFER, &host_efer); | ||
4038 | |||
4039 | for (i = 0; i < NR_VMX_MSR; ++i) | ||
4040 | kvm_define_shared_msr(i, vmx_msr_index[i]); | ||
3991 | 4041 | ||
3992 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); | 4042 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); |
3993 | if (!vmx_io_bitmap_a) | 4043 | if (!vmx_io_bitmap_a) |
@@ -4049,8 +4099,6 @@ static int __init vmx_init(void) | |||
4049 | if (bypass_guest_pf) | 4099 | if (bypass_guest_pf) |
4050 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); | 4100 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); |
4051 | 4101 | ||
4052 | ept_sync_global(); | ||
4053 | |||
4054 | return 0; | 4102 | return 0; |
4055 | 4103 | ||
4056 | out3: | 4104 | out3: |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4fc80174191c..9d068966fb2a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/iommu.h> | 37 | #include <linux/iommu.h> |
38 | #include <linux/intel-iommu.h> | 38 | #include <linux/intel-iommu.h> |
39 | #include <linux/cpufreq.h> | 39 | #include <linux/cpufreq.h> |
40 | #include <linux/user-return-notifier.h> | ||
40 | #include <trace/events/kvm.h> | 41 | #include <trace/events/kvm.h> |
41 | #undef TRACE_INCLUDE_FILE | 42 | #undef TRACE_INCLUDE_FILE |
42 | #define CREATE_TRACE_POINTS | 43 | #define CREATE_TRACE_POINTS |
@@ -88,6 +89,25 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); | |||
88 | int ignore_msrs = 0; | 89 | int ignore_msrs = 0; |
89 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); | 90 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); |
90 | 91 | ||
92 | #define KVM_NR_SHARED_MSRS 16 | ||
93 | |||
94 | struct kvm_shared_msrs_global { | ||
95 | int nr; | ||
96 | struct kvm_shared_msr { | ||
97 | u32 msr; | ||
98 | u64 value; | ||
99 | } msrs[KVM_NR_SHARED_MSRS]; | ||
100 | }; | ||
101 | |||
102 | struct kvm_shared_msrs { | ||
103 | struct user_return_notifier urn; | ||
104 | bool registered; | ||
105 | u64 current_value[KVM_NR_SHARED_MSRS]; | ||
106 | }; | ||
107 | |||
108 | static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; | ||
109 | static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs); | ||
110 | |||
91 | struct kvm_stats_debugfs_item debugfs_entries[] = { | 111 | struct kvm_stats_debugfs_item debugfs_entries[] = { |
92 | { "pf_fixed", VCPU_STAT(pf_fixed) }, | 112 | { "pf_fixed", VCPU_STAT(pf_fixed) }, |
93 | { "pf_guest", VCPU_STAT(pf_guest) }, | 113 | { "pf_guest", VCPU_STAT(pf_guest) }, |
@@ -124,6 +144,72 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
124 | { NULL } | 144 | { NULL } |
125 | }; | 145 | }; |
126 | 146 | ||
147 | static void kvm_on_user_return(struct user_return_notifier *urn) | ||
148 | { | ||
149 | unsigned slot; | ||
150 | struct kvm_shared_msr *global; | ||
151 | struct kvm_shared_msrs *locals | ||
152 | = container_of(urn, struct kvm_shared_msrs, urn); | ||
153 | |||
154 | for (slot = 0; slot < shared_msrs_global.nr; ++slot) { | ||
155 | global = &shared_msrs_global.msrs[slot]; | ||
156 | if (global->value != locals->current_value[slot]) { | ||
157 | wrmsrl(global->msr, global->value); | ||
158 | locals->current_value[slot] = global->value; | ||
159 | } | ||
160 | } | ||
161 | locals->registered = false; | ||
162 | user_return_notifier_unregister(urn); | ||
163 | } | ||
164 | |||
165 | void kvm_define_shared_msr(unsigned slot, u32 msr) | ||
166 | { | ||
167 | int cpu; | ||
168 | u64 value; | ||
169 | |||
170 | if (slot >= shared_msrs_global.nr) | ||
171 | shared_msrs_global.nr = slot + 1; | ||
172 | shared_msrs_global.msrs[slot].msr = msr; | ||
173 | rdmsrl_safe(msr, &value); | ||
174 | shared_msrs_global.msrs[slot].value = value; | ||
175 | for_each_online_cpu(cpu) | ||
176 | per_cpu(shared_msrs, cpu).current_value[slot] = value; | ||
177 | } | ||
178 | EXPORT_SYMBOL_GPL(kvm_define_shared_msr); | ||
179 | |||
180 | static void kvm_shared_msr_cpu_online(void) | ||
181 | { | ||
182 | unsigned i; | ||
183 | struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs); | ||
184 | |||
185 | for (i = 0; i < shared_msrs_global.nr; ++i) | ||
186 | locals->current_value[i] = shared_msrs_global.msrs[i].value; | ||
187 | } | ||
188 | |||
189 | void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) | ||
190 | { | ||
191 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); | ||
192 | |||
193 | if (((value ^ smsr->current_value[slot]) & mask) == 0) | ||
194 | return; | ||
195 | smsr->current_value[slot] = value; | ||
196 | wrmsrl(shared_msrs_global.msrs[slot].msr, value); | ||
197 | if (!smsr->registered) { | ||
198 | smsr->urn.on_user_return = kvm_on_user_return; | ||
199 | user_return_notifier_register(&smsr->urn); | ||
200 | smsr->registered = true; | ||
201 | } | ||
202 | } | ||
203 | EXPORT_SYMBOL_GPL(kvm_set_shared_msr); | ||
204 | |||
205 | static void drop_user_return_notifiers(void *ignore) | ||
206 | { | ||
207 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); | ||
208 | |||
209 | if (smsr->registered) | ||
210 | kvm_on_user_return(&smsr->urn); | ||
211 | } | ||
212 | |||
127 | unsigned long segment_base(u16 selector) | 213 | unsigned long segment_base(u16 selector) |
128 | { | 214 | { |
129 | struct descriptor_table gdt; | 215 | struct descriptor_table gdt; |
@@ -485,16 +571,19 @@ static inline u32 bit(int bitno) | |||
485 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. | 571 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. |
486 | * | 572 | * |
487 | * This list is modified at module load time to reflect the | 573 | * This list is modified at module load time to reflect the |
488 | * capabilities of the host cpu. | 574 | * capabilities of the host cpu. This capabilities test skips MSRs that are |
575 | * kvm-specific. Those are put in the beginning of the list. | ||
489 | */ | 576 | */ |
577 | |||
578 | #define KVM_SAVE_MSRS_BEGIN 2 | ||
490 | static u32 msrs_to_save[] = { | 579 | static u32 msrs_to_save[] = { |
580 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | ||
491 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 581 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
492 | MSR_K6_STAR, | 582 | MSR_K6_STAR, |
493 | #ifdef CONFIG_X86_64 | 583 | #ifdef CONFIG_X86_64 |
494 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | 584 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, |
495 | #endif | 585 | #endif |
496 | MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 586 | MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA |
497 | MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA | ||
498 | }; | 587 | }; |
499 | 588 | ||
500 | static unsigned num_msrs_to_save; | 589 | static unsigned num_msrs_to_save; |
@@ -678,7 +767,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
678 | /* With all the info we got, fill in the values */ | 767 | /* With all the info we got, fill in the values */ |
679 | 768 | ||
680 | vcpu->hv_clock.system_time = ts.tv_nsec + | 769 | vcpu->hv_clock.system_time = ts.tv_nsec + |
681 | (NSEC_PER_SEC * (u64)ts.tv_sec); | 770 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; |
771 | |||
682 | /* | 772 | /* |
683 | * The interface expects us to write an even number signaling that the | 773 | * The interface expects us to write an even number signaling that the |
684 | * update is finished. Since the guest won't see the intermediate | 774 | * update is finished. Since the guest won't see the intermediate |
@@ -836,6 +926,38 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
836 | return 0; | 926 | return 0; |
837 | } | 927 | } |
838 | 928 | ||
929 | static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) | ||
930 | { | ||
931 | struct kvm *kvm = vcpu->kvm; | ||
932 | int lm = is_long_mode(vcpu); | ||
933 | u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64 | ||
934 | : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32; | ||
935 | u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 | ||
936 | : kvm->arch.xen_hvm_config.blob_size_32; | ||
937 | u32 page_num = data & ~PAGE_MASK; | ||
938 | u64 page_addr = data & PAGE_MASK; | ||
939 | u8 *page; | ||
940 | int r; | ||
941 | |||
942 | r = -E2BIG; | ||
943 | if (page_num >= blob_size) | ||
944 | goto out; | ||
945 | r = -ENOMEM; | ||
946 | page = kzalloc(PAGE_SIZE, GFP_KERNEL); | ||
947 | if (!page) | ||
948 | goto out; | ||
949 | r = -EFAULT; | ||
950 | if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE)) | ||
951 | goto out_free; | ||
952 | if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE)) | ||
953 | goto out_free; | ||
954 | r = 0; | ||
955 | out_free: | ||
956 | kfree(page); | ||
957 | out: | ||
958 | return r; | ||
959 | } | ||
960 | |||
839 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 961 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
840 | { | 962 | { |
841 | switch (msr) { | 963 | switch (msr) { |
@@ -951,6 +1073,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
951 | "0x%x data 0x%llx\n", msr, data); | 1073 | "0x%x data 0x%llx\n", msr, data); |
952 | break; | 1074 | break; |
953 | default: | 1075 | default: |
1076 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | ||
1077 | return xen_hvm_config(vcpu, data); | ||
954 | if (!ignore_msrs) { | 1078 | if (!ignore_msrs) { |
955 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", | 1079 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", |
956 | msr, data); | 1080 | msr, data); |
@@ -1225,6 +1349,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1225 | case KVM_CAP_PIT2: | 1349 | case KVM_CAP_PIT2: |
1226 | case KVM_CAP_PIT_STATE2: | 1350 | case KVM_CAP_PIT_STATE2: |
1227 | case KVM_CAP_SET_IDENTITY_MAP_ADDR: | 1351 | case KVM_CAP_SET_IDENTITY_MAP_ADDR: |
1352 | case KVM_CAP_XEN_HVM: | ||
1353 | case KVM_CAP_ADJUST_CLOCK: | ||
1354 | case KVM_CAP_VCPU_EVENTS: | ||
1228 | r = 1; | 1355 | r = 1; |
1229 | break; | 1356 | break; |
1230 | case KVM_CAP_COALESCED_MMIO: | 1357 | case KVM_CAP_COALESCED_MMIO: |
@@ -1239,8 +1366,8 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1239 | case KVM_CAP_NR_MEMSLOTS: | 1366 | case KVM_CAP_NR_MEMSLOTS: |
1240 | r = KVM_MEMORY_SLOTS; | 1367 | r = KVM_MEMORY_SLOTS; |
1241 | break; | 1368 | break; |
1242 | case KVM_CAP_PV_MMU: | 1369 | case KVM_CAP_PV_MMU: /* obsolete */ |
1243 | r = !tdp_enabled; | 1370 | r = 0; |
1244 | break; | 1371 | break; |
1245 | case KVM_CAP_IOMMU: | 1372 | case KVM_CAP_IOMMU: |
1246 | r = iommu_found(); | 1373 | r = iommu_found(); |
@@ -1327,6 +1454,12 @@ out: | |||
1327 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 1454 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
1328 | { | 1455 | { |
1329 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 1456 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
1457 | if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { | ||
1458 | unsigned long khz = cpufreq_quick_get(cpu); | ||
1459 | if (!khz) | ||
1460 | khz = tsc_khz; | ||
1461 | per_cpu(cpu_tsc_khz, cpu) = khz; | ||
1462 | } | ||
1330 | kvm_request_guest_time_update(vcpu); | 1463 | kvm_request_guest_time_update(vcpu); |
1331 | } | 1464 | } |
1332 | 1465 | ||
@@ -1760,6 +1893,61 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
1760 | return 0; | 1893 | return 0; |
1761 | } | 1894 | } |
1762 | 1895 | ||
1896 | static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | ||
1897 | struct kvm_vcpu_events *events) | ||
1898 | { | ||
1899 | vcpu_load(vcpu); | ||
1900 | |||
1901 | events->exception.injected = vcpu->arch.exception.pending; | ||
1902 | events->exception.nr = vcpu->arch.exception.nr; | ||
1903 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; | ||
1904 | events->exception.error_code = vcpu->arch.exception.error_code; | ||
1905 | |||
1906 | events->interrupt.injected = vcpu->arch.interrupt.pending; | ||
1907 | events->interrupt.nr = vcpu->arch.interrupt.nr; | ||
1908 | events->interrupt.soft = vcpu->arch.interrupt.soft; | ||
1909 | |||
1910 | events->nmi.injected = vcpu->arch.nmi_injected; | ||
1911 | events->nmi.pending = vcpu->arch.nmi_pending; | ||
1912 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); | ||
1913 | |||
1914 | events->sipi_vector = vcpu->arch.sipi_vector; | ||
1915 | |||
1916 | events->flags = 0; | ||
1917 | |||
1918 | vcpu_put(vcpu); | ||
1919 | } | ||
1920 | |||
1921 | static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | ||
1922 | struct kvm_vcpu_events *events) | ||
1923 | { | ||
1924 | if (events->flags) | ||
1925 | return -EINVAL; | ||
1926 | |||
1927 | vcpu_load(vcpu); | ||
1928 | |||
1929 | vcpu->arch.exception.pending = events->exception.injected; | ||
1930 | vcpu->arch.exception.nr = events->exception.nr; | ||
1931 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; | ||
1932 | vcpu->arch.exception.error_code = events->exception.error_code; | ||
1933 | |||
1934 | vcpu->arch.interrupt.pending = events->interrupt.injected; | ||
1935 | vcpu->arch.interrupt.nr = events->interrupt.nr; | ||
1936 | vcpu->arch.interrupt.soft = events->interrupt.soft; | ||
1937 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) | ||
1938 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
1939 | |||
1940 | vcpu->arch.nmi_injected = events->nmi.injected; | ||
1941 | vcpu->arch.nmi_pending = events->nmi.pending; | ||
1942 | kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); | ||
1943 | |||
1944 | vcpu->arch.sipi_vector = events->sipi_vector; | ||
1945 | |||
1946 | vcpu_put(vcpu); | ||
1947 | |||
1948 | return 0; | ||
1949 | } | ||
1950 | |||
1763 | long kvm_arch_vcpu_ioctl(struct file *filp, | 1951 | long kvm_arch_vcpu_ioctl(struct file *filp, |
1764 | unsigned int ioctl, unsigned long arg) | 1952 | unsigned int ioctl, unsigned long arg) |
1765 | { | 1953 | { |
@@ -1770,6 +1958,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
1770 | 1958 | ||
1771 | switch (ioctl) { | 1959 | switch (ioctl) { |
1772 | case KVM_GET_LAPIC: { | 1960 | case KVM_GET_LAPIC: { |
1961 | r = -EINVAL; | ||
1962 | if (!vcpu->arch.apic) | ||
1963 | goto out; | ||
1773 | lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 1964 | lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
1774 | 1965 | ||
1775 | r = -ENOMEM; | 1966 | r = -ENOMEM; |
@@ -1785,6 +1976,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
1785 | break; | 1976 | break; |
1786 | } | 1977 | } |
1787 | case KVM_SET_LAPIC: { | 1978 | case KVM_SET_LAPIC: { |
1979 | r = -EINVAL; | ||
1980 | if (!vcpu->arch.apic) | ||
1981 | goto out; | ||
1788 | lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 1982 | lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
1789 | r = -ENOMEM; | 1983 | r = -ENOMEM; |
1790 | if (!lapic) | 1984 | if (!lapic) |
@@ -1911,6 +2105,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
1911 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); | 2105 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); |
1912 | break; | 2106 | break; |
1913 | } | 2107 | } |
2108 | case KVM_GET_VCPU_EVENTS: { | ||
2109 | struct kvm_vcpu_events events; | ||
2110 | |||
2111 | kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events); | ||
2112 | |||
2113 | r = -EFAULT; | ||
2114 | if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events))) | ||
2115 | break; | ||
2116 | r = 0; | ||
2117 | break; | ||
2118 | } | ||
2119 | case KVM_SET_VCPU_EVENTS: { | ||
2120 | struct kvm_vcpu_events events; | ||
2121 | |||
2122 | r = -EFAULT; | ||
2123 | if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events))) | ||
2124 | break; | ||
2125 | |||
2126 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); | ||
2127 | break; | ||
2128 | } | ||
1914 | default: | 2129 | default: |
1915 | r = -EINVAL; | 2130 | r = -EINVAL; |
1916 | } | 2131 | } |
@@ -2039,9 +2254,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
2039 | sizeof(struct kvm_pic_state)); | 2254 | sizeof(struct kvm_pic_state)); |
2040 | break; | 2255 | break; |
2041 | case KVM_IRQCHIP_IOAPIC: | 2256 | case KVM_IRQCHIP_IOAPIC: |
2042 | memcpy(&chip->chip.ioapic, | 2257 | r = kvm_get_ioapic(kvm, &chip->chip.ioapic); |
2043 | ioapic_irqchip(kvm), | ||
2044 | sizeof(struct kvm_ioapic_state)); | ||
2045 | break; | 2258 | break; |
2046 | default: | 2259 | default: |
2047 | r = -EINVAL; | 2260 | r = -EINVAL; |
@@ -2071,11 +2284,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
2071 | spin_unlock(&pic_irqchip(kvm)->lock); | 2284 | spin_unlock(&pic_irqchip(kvm)->lock); |
2072 | break; | 2285 | break; |
2073 | case KVM_IRQCHIP_IOAPIC: | 2286 | case KVM_IRQCHIP_IOAPIC: |
2074 | mutex_lock(&kvm->irq_lock); | 2287 | r = kvm_set_ioapic(kvm, &chip->chip.ioapic); |
2075 | memcpy(ioapic_irqchip(kvm), | ||
2076 | &chip->chip.ioapic, | ||
2077 | sizeof(struct kvm_ioapic_state)); | ||
2078 | mutex_unlock(&kvm->irq_lock); | ||
2079 | break; | 2288 | break; |
2080 | default: | 2289 | default: |
2081 | r = -EINVAL; | 2290 | r = -EINVAL; |
@@ -2183,7 +2392,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2183 | { | 2392 | { |
2184 | struct kvm *kvm = filp->private_data; | 2393 | struct kvm *kvm = filp->private_data; |
2185 | void __user *argp = (void __user *)arg; | 2394 | void __user *argp = (void __user *)arg; |
2186 | int r = -EINVAL; | 2395 | int r = -ENOTTY; |
2187 | /* | 2396 | /* |
2188 | * This union makes it completely explicit to gcc-3.x | 2397 | * This union makes it completely explicit to gcc-3.x |
2189 | * that these two variables' stack usage should be | 2398 | * that these two variables' stack usage should be |
@@ -2245,25 +2454,39 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2245 | if (r) | 2454 | if (r) |
2246 | goto out; | 2455 | goto out; |
2247 | break; | 2456 | break; |
2248 | case KVM_CREATE_IRQCHIP: | 2457 | case KVM_CREATE_IRQCHIP: { |
2458 | struct kvm_pic *vpic; | ||
2459 | |||
2460 | mutex_lock(&kvm->lock); | ||
2461 | r = -EEXIST; | ||
2462 | if (kvm->arch.vpic) | ||
2463 | goto create_irqchip_unlock; | ||
2249 | r = -ENOMEM; | 2464 | r = -ENOMEM; |
2250 | kvm->arch.vpic = kvm_create_pic(kvm); | 2465 | vpic = kvm_create_pic(kvm); |
2251 | if (kvm->arch.vpic) { | 2466 | if (vpic) { |
2252 | r = kvm_ioapic_init(kvm); | 2467 | r = kvm_ioapic_init(kvm); |
2253 | if (r) { | 2468 | if (r) { |
2254 | kfree(kvm->arch.vpic); | 2469 | kfree(vpic); |
2255 | kvm->arch.vpic = NULL; | 2470 | goto create_irqchip_unlock; |
2256 | goto out; | ||
2257 | } | 2471 | } |
2258 | } else | 2472 | } else |
2259 | goto out; | 2473 | goto create_irqchip_unlock; |
2474 | smp_wmb(); | ||
2475 | kvm->arch.vpic = vpic; | ||
2476 | smp_wmb(); | ||
2260 | r = kvm_setup_default_irq_routing(kvm); | 2477 | r = kvm_setup_default_irq_routing(kvm); |
2261 | if (r) { | 2478 | if (r) { |
2479 | mutex_lock(&kvm->irq_lock); | ||
2262 | kfree(kvm->arch.vpic); | 2480 | kfree(kvm->arch.vpic); |
2263 | kfree(kvm->arch.vioapic); | 2481 | kfree(kvm->arch.vioapic); |
2264 | goto out; | 2482 | kvm->arch.vpic = NULL; |
2483 | kvm->arch.vioapic = NULL; | ||
2484 | mutex_unlock(&kvm->irq_lock); | ||
2265 | } | 2485 | } |
2486 | create_irqchip_unlock: | ||
2487 | mutex_unlock(&kvm->lock); | ||
2266 | break; | 2488 | break; |
2489 | } | ||
2267 | case KVM_CREATE_PIT: | 2490 | case KVM_CREATE_PIT: |
2268 | u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; | 2491 | u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; |
2269 | goto create_pit; | 2492 | goto create_pit; |
@@ -2293,10 +2516,8 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2293 | goto out; | 2516 | goto out; |
2294 | if (irqchip_in_kernel(kvm)) { | 2517 | if (irqchip_in_kernel(kvm)) { |
2295 | __s32 status; | 2518 | __s32 status; |
2296 | mutex_lock(&kvm->irq_lock); | ||
2297 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 2519 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
2298 | irq_event.irq, irq_event.level); | 2520 | irq_event.irq, irq_event.level); |
2299 | mutex_unlock(&kvm->irq_lock); | ||
2300 | if (ioctl == KVM_IRQ_LINE_STATUS) { | 2521 | if (ioctl == KVM_IRQ_LINE_STATUS) { |
2301 | irq_event.status = status; | 2522 | irq_event.status = status; |
2302 | if (copy_to_user(argp, &irq_event, | 2523 | if (copy_to_user(argp, &irq_event, |
@@ -2422,6 +2643,55 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2422 | r = 0; | 2643 | r = 0; |
2423 | break; | 2644 | break; |
2424 | } | 2645 | } |
2646 | case KVM_XEN_HVM_CONFIG: { | ||
2647 | r = -EFAULT; | ||
2648 | if (copy_from_user(&kvm->arch.xen_hvm_config, argp, | ||
2649 | sizeof(struct kvm_xen_hvm_config))) | ||
2650 | goto out; | ||
2651 | r = -EINVAL; | ||
2652 | if (kvm->arch.xen_hvm_config.flags) | ||
2653 | goto out; | ||
2654 | r = 0; | ||
2655 | break; | ||
2656 | } | ||
2657 | case KVM_SET_CLOCK: { | ||
2658 | struct timespec now; | ||
2659 | struct kvm_clock_data user_ns; | ||
2660 | u64 now_ns; | ||
2661 | s64 delta; | ||
2662 | |||
2663 | r = -EFAULT; | ||
2664 | if (copy_from_user(&user_ns, argp, sizeof(user_ns))) | ||
2665 | goto out; | ||
2666 | |||
2667 | r = -EINVAL; | ||
2668 | if (user_ns.flags) | ||
2669 | goto out; | ||
2670 | |||
2671 | r = 0; | ||
2672 | ktime_get_ts(&now); | ||
2673 | now_ns = timespec_to_ns(&now); | ||
2674 | delta = user_ns.clock - now_ns; | ||
2675 | kvm->arch.kvmclock_offset = delta; | ||
2676 | break; | ||
2677 | } | ||
2678 | case KVM_GET_CLOCK: { | ||
2679 | struct timespec now; | ||
2680 | struct kvm_clock_data user_ns; | ||
2681 | u64 now_ns; | ||
2682 | |||
2683 | ktime_get_ts(&now); | ||
2684 | now_ns = timespec_to_ns(&now); | ||
2685 | user_ns.clock = kvm->arch.kvmclock_offset + now_ns; | ||
2686 | user_ns.flags = 0; | ||
2687 | |||
2688 | r = -EFAULT; | ||
2689 | if (copy_to_user(argp, &user_ns, sizeof(user_ns))) | ||
2690 | goto out; | ||
2691 | r = 0; | ||
2692 | break; | ||
2693 | } | ||
2694 | |||
2425 | default: | 2695 | default: |
2426 | ; | 2696 | ; |
2427 | } | 2697 | } |
@@ -2434,7 +2704,8 @@ static void kvm_init_msr_list(void) | |||
2434 | u32 dummy[2]; | 2704 | u32 dummy[2]; |
2435 | unsigned i, j; | 2705 | unsigned i, j; |
2436 | 2706 | ||
2437 | for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { | 2707 | /* skip the first msrs in the list. KVM-specific */ |
2708 | for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) { | ||
2438 | if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) | 2709 | if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) |
2439 | continue; | 2710 | continue; |
2440 | if (j < i) | 2711 | if (j < i) |
@@ -2758,13 +3029,13 @@ static void cache_all_regs(struct kvm_vcpu *vcpu) | |||
2758 | } | 3029 | } |
2759 | 3030 | ||
2760 | int emulate_instruction(struct kvm_vcpu *vcpu, | 3031 | int emulate_instruction(struct kvm_vcpu *vcpu, |
2761 | struct kvm_run *run, | ||
2762 | unsigned long cr2, | 3032 | unsigned long cr2, |
2763 | u16 error_code, | 3033 | u16 error_code, |
2764 | int emulation_type) | 3034 | int emulation_type) |
2765 | { | 3035 | { |
2766 | int r, shadow_mask; | 3036 | int r, shadow_mask; |
2767 | struct decode_cache *c; | 3037 | struct decode_cache *c; |
3038 | struct kvm_run *run = vcpu->run; | ||
2768 | 3039 | ||
2769 | kvm_clear_exception_queue(vcpu); | 3040 | kvm_clear_exception_queue(vcpu); |
2770 | vcpu->arch.mmio_fault_cr2 = cr2; | 3041 | vcpu->arch.mmio_fault_cr2 = cr2; |
@@ -2784,7 +3055,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
2784 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 3055 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
2785 | 3056 | ||
2786 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 3057 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
2787 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); | 3058 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); |
2788 | vcpu->arch.emulate_ctxt.mode = | 3059 | vcpu->arch.emulate_ctxt.mode = |
2789 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | 3060 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) |
2790 | ? X86EMUL_MODE_REAL : cs_l | 3061 | ? X86EMUL_MODE_REAL : cs_l |
@@ -2862,7 +3133,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
2862 | return EMULATE_DO_MMIO; | 3133 | return EMULATE_DO_MMIO; |
2863 | } | 3134 | } |
2864 | 3135 | ||
2865 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 3136 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
2866 | 3137 | ||
2867 | if (vcpu->mmio_is_write) { | 3138 | if (vcpu->mmio_is_write) { |
2868 | vcpu->mmio_needed = 0; | 3139 | vcpu->mmio_needed = 0; |
@@ -2970,8 +3241,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu) | |||
2970 | return r; | 3241 | return r; |
2971 | } | 3242 | } |
2972 | 3243 | ||
2973 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 3244 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) |
2974 | int size, unsigned port) | ||
2975 | { | 3245 | { |
2976 | unsigned long val; | 3246 | unsigned long val; |
2977 | 3247 | ||
@@ -3000,7 +3270,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
3000 | } | 3270 | } |
3001 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); | 3271 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); |
3002 | 3272 | ||
3003 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 3273 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, |
3004 | int size, unsigned long count, int down, | 3274 | int size, unsigned long count, int down, |
3005 | gva_t address, int rep, unsigned port) | 3275 | gva_t address, int rep, unsigned port) |
3006 | { | 3276 | { |
@@ -3073,9 +3343,6 @@ static void bounce_off(void *info) | |||
3073 | /* nothing */ | 3343 | /* nothing */ |
3074 | } | 3344 | } |
3075 | 3345 | ||
3076 | static unsigned int ref_freq; | ||
3077 | static unsigned long tsc_khz_ref; | ||
3078 | |||
3079 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | 3346 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, |
3080 | void *data) | 3347 | void *data) |
3081 | { | 3348 | { |
@@ -3084,14 +3351,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
3084 | struct kvm_vcpu *vcpu; | 3351 | struct kvm_vcpu *vcpu; |
3085 | int i, send_ipi = 0; | 3352 | int i, send_ipi = 0; |
3086 | 3353 | ||
3087 | if (!ref_freq) | ||
3088 | ref_freq = freq->old; | ||
3089 | |||
3090 | if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) | 3354 | if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) |
3091 | return 0; | 3355 | return 0; |
3092 | if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) | 3356 | if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) |
3093 | return 0; | 3357 | return 0; |
3094 | per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); | 3358 | per_cpu(cpu_tsc_khz, freq->cpu) = freq->new; |
3095 | 3359 | ||
3096 | spin_lock(&kvm_lock); | 3360 | spin_lock(&kvm_lock); |
3097 | list_for_each_entry(kvm, &vm_list, vm_list) { | 3361 | list_for_each_entry(kvm, &vm_list, vm_list) { |
@@ -3128,9 +3392,28 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = { | |||
3128 | .notifier_call = kvmclock_cpufreq_notifier | 3392 | .notifier_call = kvmclock_cpufreq_notifier |
3129 | }; | 3393 | }; |
3130 | 3394 | ||
3395 | static void kvm_timer_init(void) | ||
3396 | { | ||
3397 | int cpu; | ||
3398 | |||
3399 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | ||
3400 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, | ||
3401 | CPUFREQ_TRANSITION_NOTIFIER); | ||
3402 | for_each_online_cpu(cpu) { | ||
3403 | unsigned long khz = cpufreq_get(cpu); | ||
3404 | if (!khz) | ||
3405 | khz = tsc_khz; | ||
3406 | per_cpu(cpu_tsc_khz, cpu) = khz; | ||
3407 | } | ||
3408 | } else { | ||
3409 | for_each_possible_cpu(cpu) | ||
3410 | per_cpu(cpu_tsc_khz, cpu) = tsc_khz; | ||
3411 | } | ||
3412 | } | ||
3413 | |||
3131 | int kvm_arch_init(void *opaque) | 3414 | int kvm_arch_init(void *opaque) |
3132 | { | 3415 | { |
3133 | int r, cpu; | 3416 | int r; |
3134 | struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; | 3417 | struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; |
3135 | 3418 | ||
3136 | if (kvm_x86_ops) { | 3419 | if (kvm_x86_ops) { |
@@ -3162,13 +3445,7 @@ int kvm_arch_init(void *opaque) | |||
3162 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 3445 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
3163 | PT_DIRTY_MASK, PT64_NX_MASK, 0); | 3446 | PT_DIRTY_MASK, PT64_NX_MASK, 0); |
3164 | 3447 | ||
3165 | for_each_possible_cpu(cpu) | 3448 | kvm_timer_init(); |
3166 | per_cpu(cpu_tsc_khz, cpu) = tsc_khz; | ||
3167 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | ||
3168 | tsc_khz_ref = tsc_khz; | ||
3169 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, | ||
3170 | CPUFREQ_TRANSITION_NOTIFIER); | ||
3171 | } | ||
3172 | 3449 | ||
3173 | return 0; | 3450 | return 0; |
3174 | 3451 | ||
@@ -3296,7 +3573,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | |||
3296 | unsigned long *rflags) | 3573 | unsigned long *rflags) |
3297 | { | 3574 | { |
3298 | kvm_lmsw(vcpu, msw); | 3575 | kvm_lmsw(vcpu, msw); |
3299 | *rflags = kvm_x86_ops->get_rflags(vcpu); | 3576 | *rflags = kvm_get_rflags(vcpu); |
3300 | } | 3577 | } |
3301 | 3578 | ||
3302 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | 3579 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) |
@@ -3334,7 +3611,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | |||
3334 | switch (cr) { | 3611 | switch (cr) { |
3335 | case 0: | 3612 | case 0: |
3336 | kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); | 3613 | kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); |
3337 | *rflags = kvm_x86_ops->get_rflags(vcpu); | 3614 | *rflags = kvm_get_rflags(vcpu); |
3338 | break; | 3615 | break; |
3339 | case 2: | 3616 | case 2: |
3340 | vcpu->arch.cr2 = val; | 3617 | vcpu->arch.cr2 = val; |
@@ -3454,18 +3731,18 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | |||
3454 | * | 3731 | * |
3455 | * No need to exit to userspace if we already have an interrupt queued. | 3732 | * No need to exit to userspace if we already have an interrupt queued. |
3456 | */ | 3733 | */ |
3457 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, | 3734 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) |
3458 | struct kvm_run *kvm_run) | ||
3459 | { | 3735 | { |
3460 | return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && | 3736 | return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && |
3461 | kvm_run->request_interrupt_window && | 3737 | vcpu->run->request_interrupt_window && |
3462 | kvm_arch_interrupt_allowed(vcpu)); | 3738 | kvm_arch_interrupt_allowed(vcpu)); |
3463 | } | 3739 | } |
3464 | 3740 | ||
3465 | static void post_kvm_run_save(struct kvm_vcpu *vcpu, | 3741 | static void post_kvm_run_save(struct kvm_vcpu *vcpu) |
3466 | struct kvm_run *kvm_run) | ||
3467 | { | 3742 | { |
3468 | kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; | 3743 | struct kvm_run *kvm_run = vcpu->run; |
3744 | |||
3745 | kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; | ||
3469 | kvm_run->cr8 = kvm_get_cr8(vcpu); | 3746 | kvm_run->cr8 = kvm_get_cr8(vcpu); |
3470 | kvm_run->apic_base = kvm_get_apic_base(vcpu); | 3747 | kvm_run->apic_base = kvm_get_apic_base(vcpu); |
3471 | if (irqchip_in_kernel(vcpu->kvm)) | 3748 | if (irqchip_in_kernel(vcpu->kvm)) |
@@ -3526,7 +3803,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) | |||
3526 | kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); | 3803 | kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); |
3527 | } | 3804 | } |
3528 | 3805 | ||
3529 | static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3806 | static void inject_pending_event(struct kvm_vcpu *vcpu) |
3530 | { | 3807 | { |
3531 | /* try to reinject previous events if any */ | 3808 | /* try to reinject previous events if any */ |
3532 | if (vcpu->arch.exception.pending) { | 3809 | if (vcpu->arch.exception.pending) { |
@@ -3562,11 +3839,11 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3562 | } | 3839 | } |
3563 | } | 3840 | } |
3564 | 3841 | ||
3565 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3842 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
3566 | { | 3843 | { |
3567 | int r; | 3844 | int r; |
3568 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 3845 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
3569 | kvm_run->request_interrupt_window; | 3846 | vcpu->run->request_interrupt_window; |
3570 | 3847 | ||
3571 | if (vcpu->requests) | 3848 | if (vcpu->requests) |
3572 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | 3849 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) |
@@ -3587,12 +3864,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3587 | kvm_x86_ops->tlb_flush(vcpu); | 3864 | kvm_x86_ops->tlb_flush(vcpu); |
3588 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, | 3865 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, |
3589 | &vcpu->requests)) { | 3866 | &vcpu->requests)) { |
3590 | kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; | 3867 | vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; |
3591 | r = 0; | 3868 | r = 0; |
3592 | goto out; | 3869 | goto out; |
3593 | } | 3870 | } |
3594 | if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { | 3871 | if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { |
3595 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | 3872 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
3596 | r = 0; | 3873 | r = 0; |
3597 | goto out; | 3874 | goto out; |
3598 | } | 3875 | } |
@@ -3616,7 +3893,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3616 | goto out; | 3893 | goto out; |
3617 | } | 3894 | } |
3618 | 3895 | ||
3619 | inject_pending_event(vcpu, kvm_run); | 3896 | inject_pending_event(vcpu); |
3620 | 3897 | ||
3621 | /* enable NMI/IRQ window open exits if needed */ | 3898 | /* enable NMI/IRQ window open exits if needed */ |
3622 | if (vcpu->arch.nmi_pending) | 3899 | if (vcpu->arch.nmi_pending) |
@@ -3642,7 +3919,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3642 | } | 3919 | } |
3643 | 3920 | ||
3644 | trace_kvm_entry(vcpu->vcpu_id); | 3921 | trace_kvm_entry(vcpu->vcpu_id); |
3645 | kvm_x86_ops->run(vcpu, kvm_run); | 3922 | kvm_x86_ops->run(vcpu); |
3646 | 3923 | ||
3647 | /* | 3924 | /* |
3648 | * If the guest has used debug registers, at least dr7 | 3925 | * If the guest has used debug registers, at least dr7 |
@@ -3684,13 +3961,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3684 | 3961 | ||
3685 | kvm_lapic_sync_from_vapic(vcpu); | 3962 | kvm_lapic_sync_from_vapic(vcpu); |
3686 | 3963 | ||
3687 | r = kvm_x86_ops->handle_exit(kvm_run, vcpu); | 3964 | r = kvm_x86_ops->handle_exit(vcpu); |
3688 | out: | 3965 | out: |
3689 | return r; | 3966 | return r; |
3690 | } | 3967 | } |
3691 | 3968 | ||
3692 | 3969 | ||
3693 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3970 | static int __vcpu_run(struct kvm_vcpu *vcpu) |
3694 | { | 3971 | { |
3695 | int r; | 3972 | int r; |
3696 | 3973 | ||
@@ -3710,7 +3987,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3710 | r = 1; | 3987 | r = 1; |
3711 | while (r > 0) { | 3988 | while (r > 0) { |
3712 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | 3989 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) |
3713 | r = vcpu_enter_guest(vcpu, kvm_run); | 3990 | r = vcpu_enter_guest(vcpu); |
3714 | else { | 3991 | else { |
3715 | up_read(&vcpu->kvm->slots_lock); | 3992 | up_read(&vcpu->kvm->slots_lock); |
3716 | kvm_vcpu_block(vcpu); | 3993 | kvm_vcpu_block(vcpu); |
@@ -3738,14 +4015,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3738 | if (kvm_cpu_has_pending_timer(vcpu)) | 4015 | if (kvm_cpu_has_pending_timer(vcpu)) |
3739 | kvm_inject_pending_timer_irqs(vcpu); | 4016 | kvm_inject_pending_timer_irqs(vcpu); |
3740 | 4017 | ||
3741 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | 4018 | if (dm_request_for_irq_injection(vcpu)) { |
3742 | r = -EINTR; | 4019 | r = -EINTR; |
3743 | kvm_run->exit_reason = KVM_EXIT_INTR; | 4020 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
3744 | ++vcpu->stat.request_irq_exits; | 4021 | ++vcpu->stat.request_irq_exits; |
3745 | } | 4022 | } |
3746 | if (signal_pending(current)) { | 4023 | if (signal_pending(current)) { |
3747 | r = -EINTR; | 4024 | r = -EINTR; |
3748 | kvm_run->exit_reason = KVM_EXIT_INTR; | 4025 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
3749 | ++vcpu->stat.signal_exits; | 4026 | ++vcpu->stat.signal_exits; |
3750 | } | 4027 | } |
3751 | if (need_resched()) { | 4028 | if (need_resched()) { |
@@ -3756,7 +4033,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3756 | } | 4033 | } |
3757 | 4034 | ||
3758 | up_read(&vcpu->kvm->slots_lock); | 4035 | up_read(&vcpu->kvm->slots_lock); |
3759 | post_kvm_run_save(vcpu, kvm_run); | 4036 | post_kvm_run_save(vcpu); |
3760 | 4037 | ||
3761 | vapic_exit(vcpu); | 4038 | vapic_exit(vcpu); |
3762 | 4039 | ||
@@ -3789,15 +4066,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3789 | if (r) | 4066 | if (r) |
3790 | goto out; | 4067 | goto out; |
3791 | } | 4068 | } |
3792 | #if CONFIG_HAS_IOMEM | ||
3793 | if (vcpu->mmio_needed) { | 4069 | if (vcpu->mmio_needed) { |
3794 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | 4070 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); |
3795 | vcpu->mmio_read_completed = 1; | 4071 | vcpu->mmio_read_completed = 1; |
3796 | vcpu->mmio_needed = 0; | 4072 | vcpu->mmio_needed = 0; |
3797 | 4073 | ||
3798 | down_read(&vcpu->kvm->slots_lock); | 4074 | down_read(&vcpu->kvm->slots_lock); |
3799 | r = emulate_instruction(vcpu, kvm_run, | 4075 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, |
3800 | vcpu->arch.mmio_fault_cr2, 0, | ||
3801 | EMULTYPE_NO_DECODE); | 4076 | EMULTYPE_NO_DECODE); |
3802 | up_read(&vcpu->kvm->slots_lock); | 4077 | up_read(&vcpu->kvm->slots_lock); |
3803 | if (r == EMULATE_DO_MMIO) { | 4078 | if (r == EMULATE_DO_MMIO) { |
@@ -3808,12 +4083,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3808 | goto out; | 4083 | goto out; |
3809 | } | 4084 | } |
3810 | } | 4085 | } |
3811 | #endif | ||
3812 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) | 4086 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) |
3813 | kvm_register_write(vcpu, VCPU_REGS_RAX, | 4087 | kvm_register_write(vcpu, VCPU_REGS_RAX, |
3814 | kvm_run->hypercall.ret); | 4088 | kvm_run->hypercall.ret); |
3815 | 4089 | ||
3816 | r = __vcpu_run(vcpu, kvm_run); | 4090 | r = __vcpu_run(vcpu); |
3817 | 4091 | ||
3818 | out: | 4092 | out: |
3819 | if (vcpu->sigset_active) | 4093 | if (vcpu->sigset_active) |
@@ -3847,13 +4121,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
3847 | #endif | 4121 | #endif |
3848 | 4122 | ||
3849 | regs->rip = kvm_rip_read(vcpu); | 4123 | regs->rip = kvm_rip_read(vcpu); |
3850 | regs->rflags = kvm_x86_ops->get_rflags(vcpu); | 4124 | regs->rflags = kvm_get_rflags(vcpu); |
3851 | |||
3852 | /* | ||
3853 | * Don't leak debug flags in case they were set for guest debugging | ||
3854 | */ | ||
3855 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
3856 | regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
3857 | 4125 | ||
3858 | vcpu_put(vcpu); | 4126 | vcpu_put(vcpu); |
3859 | 4127 | ||
@@ -3881,12 +4149,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
3881 | kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); | 4149 | kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); |
3882 | kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); | 4150 | kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); |
3883 | kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); | 4151 | kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); |
3884 | |||
3885 | #endif | 4152 | #endif |
3886 | 4153 | ||
3887 | kvm_rip_write(vcpu, regs->rip); | 4154 | kvm_rip_write(vcpu, regs->rip); |
3888 | kvm_x86_ops->set_rflags(vcpu, regs->rflags); | 4155 | kvm_set_rflags(vcpu, regs->rflags); |
3889 | |||
3890 | 4156 | ||
3891 | vcpu->arch.exception.pending = false; | 4157 | vcpu->arch.exception.pending = false; |
3892 | 4158 | ||
@@ -4105,7 +4371,7 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | |||
4105 | { | 4371 | { |
4106 | return (seg != VCPU_SREG_LDTR) && | 4372 | return (seg != VCPU_SREG_LDTR) && |
4107 | (seg != VCPU_SREG_TR) && | 4373 | (seg != VCPU_SREG_TR) && |
4108 | (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM); | 4374 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); |
4109 | } | 4375 | } |
4110 | 4376 | ||
4111 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 4377 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, |
@@ -4133,7 +4399,7 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, | |||
4133 | { | 4399 | { |
4134 | tss->cr3 = vcpu->arch.cr3; | 4400 | tss->cr3 = vcpu->arch.cr3; |
4135 | tss->eip = kvm_rip_read(vcpu); | 4401 | tss->eip = kvm_rip_read(vcpu); |
4136 | tss->eflags = kvm_x86_ops->get_rflags(vcpu); | 4402 | tss->eflags = kvm_get_rflags(vcpu); |
4137 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 4403 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
4138 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 4404 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
4139 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); | 4405 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); |
@@ -4157,7 +4423,7 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, | |||
4157 | kvm_set_cr3(vcpu, tss->cr3); | 4423 | kvm_set_cr3(vcpu, tss->cr3); |
4158 | 4424 | ||
4159 | kvm_rip_write(vcpu, tss->eip); | 4425 | kvm_rip_write(vcpu, tss->eip); |
4160 | kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); | 4426 | kvm_set_rflags(vcpu, tss->eflags | 2); |
4161 | 4427 | ||
4162 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); | 4428 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); |
4163 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); | 4429 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); |
@@ -4195,7 +4461,7 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu, | |||
4195 | struct tss_segment_16 *tss) | 4461 | struct tss_segment_16 *tss) |
4196 | { | 4462 | { |
4197 | tss->ip = kvm_rip_read(vcpu); | 4463 | tss->ip = kvm_rip_read(vcpu); |
4198 | tss->flag = kvm_x86_ops->get_rflags(vcpu); | 4464 | tss->flag = kvm_get_rflags(vcpu); |
4199 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 4465 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
4200 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 4466 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
4201 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); | 4467 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); |
@@ -4210,14 +4476,13 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu, | |||
4210 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | 4476 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); |
4211 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | 4477 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); |
4212 | tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); | 4478 | tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); |
4213 | tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
4214 | } | 4479 | } |
4215 | 4480 | ||
4216 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, | 4481 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, |
4217 | struct tss_segment_16 *tss) | 4482 | struct tss_segment_16 *tss) |
4218 | { | 4483 | { |
4219 | kvm_rip_write(vcpu, tss->ip); | 4484 | kvm_rip_write(vcpu, tss->ip); |
4220 | kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); | 4485 | kvm_set_rflags(vcpu, tss->flag | 2); |
4221 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); | 4486 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); |
4222 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); | 4487 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); |
4223 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); | 4488 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); |
@@ -4363,8 +4628,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4363 | } | 4628 | } |
4364 | 4629 | ||
4365 | if (reason == TASK_SWITCH_IRET) { | 4630 | if (reason == TASK_SWITCH_IRET) { |
4366 | u32 eflags = kvm_x86_ops->get_rflags(vcpu); | 4631 | u32 eflags = kvm_get_rflags(vcpu); |
4367 | kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); | 4632 | kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); |
4368 | } | 4633 | } |
4369 | 4634 | ||
4370 | /* set back link to prev task only if NT bit is set in eflags | 4635 | /* set back link to prev task only if NT bit is set in eflags |
@@ -4372,11 +4637,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4372 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | 4637 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) |
4373 | old_tss_sel = 0xffff; | 4638 | old_tss_sel = 0xffff; |
4374 | 4639 | ||
4375 | /* set back link to prev task only if NT bit is set in eflags | ||
4376 | note that old_tss_sel is not used afetr this point */ | ||
4377 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
4378 | old_tss_sel = 0xffff; | ||
4379 | |||
4380 | if (nseg_desc.type & 8) | 4640 | if (nseg_desc.type & 8) |
4381 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, | 4641 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, |
4382 | old_tss_base, &nseg_desc); | 4642 | old_tss_base, &nseg_desc); |
@@ -4385,8 +4645,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4385 | old_tss_base, &nseg_desc); | 4645 | old_tss_base, &nseg_desc); |
4386 | 4646 | ||
4387 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | 4647 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { |
4388 | u32 eflags = kvm_x86_ops->get_rflags(vcpu); | 4648 | u32 eflags = kvm_get_rflags(vcpu); |
4389 | kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT); | 4649 | kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT); |
4390 | } | 4650 | } |
4391 | 4651 | ||
4392 | if (reason != TASK_SWITCH_IRET) { | 4652 | if (reason != TASK_SWITCH_IRET) { |
@@ -4438,8 +4698,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4438 | 4698 | ||
4439 | mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; | 4699 | mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; |
4440 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 4700 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
4441 | if (!is_long_mode(vcpu) && is_pae(vcpu)) | 4701 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
4442 | load_pdptrs(vcpu, vcpu->arch.cr3); | 4702 | load_pdptrs(vcpu, vcpu->arch.cr3); |
4703 | mmu_reset_needed = 1; | ||
4704 | } | ||
4443 | 4705 | ||
4444 | if (mmu_reset_needed) | 4706 | if (mmu_reset_needed) |
4445 | kvm_mmu_reset_context(vcpu); | 4707 | kvm_mmu_reset_context(vcpu); |
@@ -4480,12 +4742,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4480 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | 4742 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, |
4481 | struct kvm_guest_debug *dbg) | 4743 | struct kvm_guest_debug *dbg) |
4482 | { | 4744 | { |
4745 | unsigned long rflags; | ||
4483 | int i, r; | 4746 | int i, r; |
4484 | 4747 | ||
4485 | vcpu_load(vcpu); | 4748 | vcpu_load(vcpu); |
4486 | 4749 | ||
4487 | if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) == | 4750 | if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { |
4488 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) { | 4751 | r = -EBUSY; |
4752 | if (vcpu->arch.exception.pending) | ||
4753 | goto unlock_out; | ||
4754 | if (dbg->control & KVM_GUESTDBG_INJECT_DB) | ||
4755 | kvm_queue_exception(vcpu, DB_VECTOR); | ||
4756 | else | ||
4757 | kvm_queue_exception(vcpu, BP_VECTOR); | ||
4758 | } | ||
4759 | |||
4760 | /* | ||
4761 | * Read rflags as long as potentially injected trace flags are still | ||
4762 | * filtered out. | ||
4763 | */ | ||
4764 | rflags = kvm_get_rflags(vcpu); | ||
4765 | |||
4766 | vcpu->guest_debug = dbg->control; | ||
4767 | if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) | ||
4768 | vcpu->guest_debug = 0; | ||
4769 | |||
4770 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { | ||
4489 | for (i = 0; i < KVM_NR_DB_REGS; ++i) | 4771 | for (i = 0; i < KVM_NR_DB_REGS; ++i) |
4490 | vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; | 4772 | vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; |
4491 | vcpu->arch.switch_db_regs = | 4773 | vcpu->arch.switch_db_regs = |
@@ -4496,13 +4778,23 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
4496 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); | 4778 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); |
4497 | } | 4779 | } |
4498 | 4780 | ||
4499 | r = kvm_x86_ops->set_guest_debug(vcpu, dbg); | 4781 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { |
4782 | vcpu->arch.singlestep_cs = | ||
4783 | get_segment_selector(vcpu, VCPU_SREG_CS); | ||
4784 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu); | ||
4785 | } | ||
4786 | |||
4787 | /* | ||
4788 | * Trigger an rflags update that will inject or remove the trace | ||
4789 | * flags. | ||
4790 | */ | ||
4791 | kvm_set_rflags(vcpu, rflags); | ||
4792 | |||
4793 | kvm_x86_ops->set_guest_debug(vcpu, dbg); | ||
4500 | 4794 | ||
4501 | if (dbg->control & KVM_GUESTDBG_INJECT_DB) | 4795 | r = 0; |
4502 | kvm_queue_exception(vcpu, DB_VECTOR); | ||
4503 | else if (dbg->control & KVM_GUESTDBG_INJECT_BP) | ||
4504 | kvm_queue_exception(vcpu, BP_VECTOR); | ||
4505 | 4796 | ||
4797 | unlock_out: | ||
4506 | vcpu_put(vcpu); | 4798 | vcpu_put(vcpu); |
4507 | 4799 | ||
4508 | return r; | 4800 | return r; |
@@ -4703,14 +4995,26 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4703 | return kvm_x86_ops->vcpu_reset(vcpu); | 4995 | return kvm_x86_ops->vcpu_reset(vcpu); |
4704 | } | 4996 | } |
4705 | 4997 | ||
4706 | void kvm_arch_hardware_enable(void *garbage) | 4998 | int kvm_arch_hardware_enable(void *garbage) |
4707 | { | 4999 | { |
4708 | kvm_x86_ops->hardware_enable(garbage); | 5000 | /* |
5001 | * Since this may be called from a hotplug notifcation, | ||
5002 | * we can't get the CPU frequency directly. | ||
5003 | */ | ||
5004 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | ||
5005 | int cpu = raw_smp_processor_id(); | ||
5006 | per_cpu(cpu_tsc_khz, cpu) = 0; | ||
5007 | } | ||
5008 | |||
5009 | kvm_shared_msr_cpu_online(); | ||
5010 | |||
5011 | return kvm_x86_ops->hardware_enable(garbage); | ||
4709 | } | 5012 | } |
4710 | 5013 | ||
4711 | void kvm_arch_hardware_disable(void *garbage) | 5014 | void kvm_arch_hardware_disable(void *garbage) |
4712 | { | 5015 | { |
4713 | kvm_x86_ops->hardware_disable(garbage); | 5016 | kvm_x86_ops->hardware_disable(garbage); |
5017 | drop_user_return_notifiers(garbage); | ||
4714 | } | 5018 | } |
4715 | 5019 | ||
4716 | int kvm_arch_hardware_setup(void) | 5020 | int kvm_arch_hardware_setup(void) |
@@ -4948,8 +5252,36 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
4948 | return kvm_x86_ops->interrupt_allowed(vcpu); | 5252 | return kvm_x86_ops->interrupt_allowed(vcpu); |
4949 | } | 5253 | } |
4950 | 5254 | ||
5255 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) | ||
5256 | { | ||
5257 | unsigned long rflags; | ||
5258 | |||
5259 | rflags = kvm_x86_ops->get_rflags(vcpu); | ||
5260 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
5261 | rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
5262 | return rflags; | ||
5263 | } | ||
5264 | EXPORT_SYMBOL_GPL(kvm_get_rflags); | ||
5265 | |||
5266 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | ||
5267 | { | ||
5268 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && | ||
5269 | vcpu->arch.singlestep_cs == | ||
5270 | get_segment_selector(vcpu, VCPU_SREG_CS) && | ||
5271 | vcpu->arch.singlestep_rip == kvm_rip_read(vcpu)) | ||
5272 | rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
5273 | kvm_x86_ops->set_rflags(vcpu, rflags); | ||
5274 | } | ||
5275 | EXPORT_SYMBOL_GPL(kvm_set_rflags); | ||
5276 | |||
4951 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); | 5277 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); |
4952 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); | 5278 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); |
4953 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); | 5279 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); |
4954 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); | 5280 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); |
4955 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); | 5281 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); |
5282 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); | ||
5283 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); | ||
5284 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); | ||
5285 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); | ||
5286 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); | ||
5287 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); | ||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 73ffd5536f62..d406c5239019 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -146,10 +146,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
146 | use_gbpages = direct_gbpages; | 146 | use_gbpages = direct_gbpages; |
147 | #endif | 147 | #endif |
148 | 148 | ||
149 | set_nx(); | ||
150 | if (nx_enabled) | ||
151 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | ||
152 | |||
153 | /* Enable PSE if available */ | 149 | /* Enable PSE if available */ |
154 | if (cpu_has_pse) | 150 | if (cpu_has_pse) |
155 | set_in_cr4(X86_CR4_PSE); | 151 | set_in_cr4(X86_CR4_PSE); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 30938c1d8d5d..c973f8e2a6cf 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -412,7 +412,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) | |||
412 | pkmap_page_table = pte; | 412 | pkmap_page_table = pte; |
413 | } | 413 | } |
414 | 414 | ||
415 | static void __init add_one_highpage_init(struct page *page, int pfn) | 415 | static void __init add_one_highpage_init(struct page *page) |
416 | { | 416 | { |
417 | ClearPageReserved(page); | 417 | ClearPageReserved(page); |
418 | init_page_count(page); | 418 | init_page_count(page); |
@@ -445,7 +445,7 @@ static int __init add_highpages_work_fn(unsigned long start_pfn, | |||
445 | if (!pfn_valid(node_pfn)) | 445 | if (!pfn_valid(node_pfn)) |
446 | continue; | 446 | continue; |
447 | page = pfn_to_page(node_pfn); | 447 | page = pfn_to_page(node_pfn); |
448 | add_one_highpage_init(page, node_pfn); | 448 | add_one_highpage_init(page); |
449 | } | 449 | } |
450 | 450 | ||
451 | return 0; | 451 | return 0; |
@@ -703,8 +703,8 @@ void __init find_low_pfn_range(void) | |||
703 | } | 703 | } |
704 | 704 | ||
705 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 705 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
706 | void __init initmem_init(unsigned long start_pfn, | 706 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
707 | unsigned long end_pfn) | 707 | int acpi, int k8) |
708 | { | 708 | { |
709 | #ifdef CONFIG_HIGHMEM | 709 | #ifdef CONFIG_HIGHMEM |
710 | highstart_pfn = highend_pfn = max_pfn; | 710 | highstart_pfn = highend_pfn = max_pfn; |
@@ -997,7 +997,7 @@ static noinline int do_test_wp_bit(void) | |||
997 | const int rodata_test_data = 0xC3; | 997 | const int rodata_test_data = 0xC3; |
998 | EXPORT_SYMBOL_GPL(rodata_test_data); | 998 | EXPORT_SYMBOL_GPL(rodata_test_data); |
999 | 999 | ||
1000 | static int kernel_set_to_readonly; | 1000 | int kernel_set_to_readonly __read_mostly; |
1001 | 1001 | ||
1002 | void set_kernel_text_rw(void) | 1002 | void set_kernel_text_rw(void) |
1003 | { | 1003 | { |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5a4398a6006b..5198b9bb34ef 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -568,7 +568,8 @@ kernel_physical_mapping_init(unsigned long start, | |||
568 | } | 568 | } |
569 | 569 | ||
570 | #ifndef CONFIG_NUMA | 570 | #ifndef CONFIG_NUMA |
571 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) | 571 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
572 | int acpi, int k8) | ||
572 | { | 573 | { |
573 | unsigned long bootmap_size, bootmap; | 574 | unsigned long bootmap_size, bootmap; |
574 | 575 | ||
@@ -694,12 +695,12 @@ void __init mem_init(void) | |||
694 | const int rodata_test_data = 0xC3; | 695 | const int rodata_test_data = 0xC3; |
695 | EXPORT_SYMBOL_GPL(rodata_test_data); | 696 | EXPORT_SYMBOL_GPL(rodata_test_data); |
696 | 697 | ||
697 | static int kernel_set_to_readonly; | 698 | int kernel_set_to_readonly; |
698 | 699 | ||
699 | void set_kernel_text_rw(void) | 700 | void set_kernel_text_rw(void) |
700 | { | 701 | { |
701 | unsigned long start = PFN_ALIGN(_stext); | 702 | unsigned long start = PFN_ALIGN(_text); |
702 | unsigned long end = PFN_ALIGN(__start_rodata); | 703 | unsigned long end = PFN_ALIGN(__stop___ex_table); |
703 | 704 | ||
704 | if (!kernel_set_to_readonly) | 705 | if (!kernel_set_to_readonly) |
705 | return; | 706 | return; |
@@ -707,13 +708,18 @@ void set_kernel_text_rw(void) | |||
707 | pr_debug("Set kernel text: %lx - %lx for read write\n", | 708 | pr_debug("Set kernel text: %lx - %lx for read write\n", |
708 | start, end); | 709 | start, end); |
709 | 710 | ||
711 | /* | ||
712 | * Make the kernel identity mapping for text RW. Kernel text | ||
713 | * mapping will always be RO. Refer to the comment in | ||
714 | * static_protections() in pageattr.c | ||
715 | */ | ||
710 | set_memory_rw(start, (end - start) >> PAGE_SHIFT); | 716 | set_memory_rw(start, (end - start) >> PAGE_SHIFT); |
711 | } | 717 | } |
712 | 718 | ||
713 | void set_kernel_text_ro(void) | 719 | void set_kernel_text_ro(void) |
714 | { | 720 | { |
715 | unsigned long start = PFN_ALIGN(_stext); | 721 | unsigned long start = PFN_ALIGN(_text); |
716 | unsigned long end = PFN_ALIGN(__start_rodata); | 722 | unsigned long end = PFN_ALIGN(__stop___ex_table); |
717 | 723 | ||
718 | if (!kernel_set_to_readonly) | 724 | if (!kernel_set_to_readonly) |
719 | return; | 725 | return; |
@@ -721,14 +727,21 @@ void set_kernel_text_ro(void) | |||
721 | pr_debug("Set kernel text: %lx - %lx for read only\n", | 727 | pr_debug("Set kernel text: %lx - %lx for read only\n", |
722 | start, end); | 728 | start, end); |
723 | 729 | ||
730 | /* | ||
731 | * Set the kernel identity mapping for text RO. | ||
732 | */ | ||
724 | set_memory_ro(start, (end - start) >> PAGE_SHIFT); | 733 | set_memory_ro(start, (end - start) >> PAGE_SHIFT); |
725 | } | 734 | } |
726 | 735 | ||
727 | void mark_rodata_ro(void) | 736 | void mark_rodata_ro(void) |
728 | { | 737 | { |
729 | unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); | 738 | unsigned long start = PFN_ALIGN(_text); |
730 | unsigned long rodata_start = | 739 | unsigned long rodata_start = |
731 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; | 740 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; |
741 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; | ||
742 | unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); | ||
743 | unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); | ||
744 | unsigned long data_start = (unsigned long) &_sdata; | ||
732 | 745 | ||
733 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | 746 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
734 | (end - start) >> 10); | 747 | (end - start) >> 10); |
@@ -751,6 +764,14 @@ void mark_rodata_ro(void) | |||
751 | printk(KERN_INFO "Testing CPA: again\n"); | 764 | printk(KERN_INFO "Testing CPA: again\n"); |
752 | set_memory_ro(start, (end-start) >> PAGE_SHIFT); | 765 | set_memory_ro(start, (end-start) >> PAGE_SHIFT); |
753 | #endif | 766 | #endif |
767 | |||
768 | free_init_pages("unused kernel memory", | ||
769 | (unsigned long) page_address(virt_to_page(text_end)), | ||
770 | (unsigned long) | ||
771 | page_address(virt_to_page(rodata_start))); | ||
772 | free_init_pages("unused kernel memory", | ||
773 | (unsigned long) page_address(virt_to_page(rodata_end)), | ||
774 | (unsigned long) page_address(virt_to_page(data_start))); | ||
754 | } | 775 | } |
755 | 776 | ||
756 | #endif | 777 | #endif |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 2feb9bdedaaf..c246d259822d 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -281,30 +281,6 @@ void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) | |||
281 | } | 281 | } |
282 | EXPORT_SYMBOL(ioremap_cache); | 282 | EXPORT_SYMBOL(ioremap_cache); |
283 | 283 | ||
284 | static void __iomem *ioremap_default(resource_size_t phys_addr, | ||
285 | unsigned long size) | ||
286 | { | ||
287 | unsigned long flags; | ||
288 | void __iomem *ret; | ||
289 | int err; | ||
290 | |||
291 | /* | ||
292 | * - WB for WB-able memory and no other conflicting mappings | ||
293 | * - UC_MINUS for non-WB-able memory with no other conflicting mappings | ||
294 | * - Inherit from confliting mappings otherwise | ||
295 | */ | ||
296 | err = reserve_memtype(phys_addr, phys_addr + size, | ||
297 | _PAGE_CACHE_WB, &flags); | ||
298 | if (err < 0) | ||
299 | return NULL; | ||
300 | |||
301 | ret = __ioremap_caller(phys_addr, size, flags, | ||
302 | __builtin_return_address(0)); | ||
303 | |||
304 | free_memtype(phys_addr, phys_addr + size); | ||
305 | return ret; | ||
306 | } | ||
307 | |||
308 | void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, | 284 | void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, |
309 | unsigned long prot_val) | 285 | unsigned long prot_val) |
310 | { | 286 | { |
@@ -380,7 +356,7 @@ void *xlate_dev_mem_ptr(unsigned long phys) | |||
380 | if (page_is_ram(start >> PAGE_SHIFT)) | 356 | if (page_is_ram(start >> PAGE_SHIFT)) |
381 | return __va(phys); | 357 | return __va(phys); |
382 | 358 | ||
383 | addr = (void __force *)ioremap_default(start, PAGE_SIZE); | 359 | addr = (void __force *)ioremap_cache(start, PAGE_SIZE); |
384 | if (addr) | 360 | if (addr) |
385 | addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); | 361 | addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); |
386 | 362 | ||
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 268f8255280f..970ed579d4e4 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c | |||
@@ -24,6 +24,9 @@ | |||
24 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
25 | #include <asm/k8.h> | 25 | #include <asm/k8.h> |
26 | 26 | ||
27 | static struct bootnode __initdata nodes[8]; | ||
28 | static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; | ||
29 | |||
27 | static __init int find_northbridge(void) | 30 | static __init int find_northbridge(void) |
28 | { | 31 | { |
29 | int num; | 32 | int num; |
@@ -54,18 +57,6 @@ static __init void early_get_boot_cpu_id(void) | |||
54 | * need to get boot_cpu_id so can use that to create apicid_to_node | 57 | * need to get boot_cpu_id so can use that to create apicid_to_node |
55 | * in k8_scan_nodes() | 58 | * in k8_scan_nodes() |
56 | */ | 59 | */ |
57 | /* | ||
58 | * Find possible boot-time SMP configuration: | ||
59 | */ | ||
60 | #ifdef CONFIG_X86_MPPARSE | ||
61 | early_find_smp_config(); | ||
62 | #endif | ||
63 | #ifdef CONFIG_ACPI | ||
64 | /* | ||
65 | * Read APIC information from ACPI tables. | ||
66 | */ | ||
67 | early_acpi_boot_init(); | ||
68 | #endif | ||
69 | #ifdef CONFIG_X86_MPPARSE | 60 | #ifdef CONFIG_X86_MPPARSE |
70 | /* | 61 | /* |
71 | * get boot-time SMP configuration: | 62 | * get boot-time SMP configuration: |
@@ -76,12 +67,26 @@ static __init void early_get_boot_cpu_id(void) | |||
76 | early_init_lapic_mapping(); | 67 | early_init_lapic_mapping(); |
77 | } | 68 | } |
78 | 69 | ||
79 | int __init k8_scan_nodes(unsigned long start, unsigned long end) | 70 | int __init k8_get_nodes(struct bootnode *physnodes) |
80 | { | 71 | { |
81 | unsigned numnodes, cores, bits, apicid_base; | 72 | int i; |
73 | int ret = 0; | ||
74 | |||
75 | for_each_node_mask(i, nodes_parsed) { | ||
76 | physnodes[ret].start = nodes[i].start; | ||
77 | physnodes[ret].end = nodes[i].end; | ||
78 | ret++; | ||
79 | } | ||
80 | return ret; | ||
81 | } | ||
82 | |||
83 | int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn) | ||
84 | { | ||
85 | unsigned long start = PFN_PHYS(start_pfn); | ||
86 | unsigned long end = PFN_PHYS(end_pfn); | ||
87 | unsigned numnodes; | ||
82 | unsigned long prevbase; | 88 | unsigned long prevbase; |
83 | struct bootnode nodes[8]; | 89 | int i, nb, found = 0; |
84 | int i, j, nb, found = 0; | ||
85 | u32 nodeid, reg; | 90 | u32 nodeid, reg; |
86 | 91 | ||
87 | if (!early_pci_allowed()) | 92 | if (!early_pci_allowed()) |
@@ -91,16 +96,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
91 | if (nb < 0) | 96 | if (nb < 0) |
92 | return nb; | 97 | return nb; |
93 | 98 | ||
94 | printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb); | 99 | pr_info("Scanning NUMA topology in Northbridge %d\n", nb); |
95 | 100 | ||
96 | reg = read_pci_config(0, nb, 0, 0x60); | 101 | reg = read_pci_config(0, nb, 0, 0x60); |
97 | numnodes = ((reg >> 4) & 0xF) + 1; | 102 | numnodes = ((reg >> 4) & 0xF) + 1; |
98 | if (numnodes <= 1) | 103 | if (numnodes <= 1) |
99 | return -1; | 104 | return -1; |
100 | 105 | ||
101 | printk(KERN_INFO "Number of nodes %d\n", numnodes); | 106 | pr_info("Number of physical nodes %d\n", numnodes); |
102 | 107 | ||
103 | memset(&nodes, 0, sizeof(nodes)); | ||
104 | prevbase = 0; | 108 | prevbase = 0; |
105 | for (i = 0; i < 8; i++) { | 109 | for (i = 0; i < 8; i++) { |
106 | unsigned long base, limit; | 110 | unsigned long base, limit; |
@@ -111,28 +115,28 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
111 | nodeid = limit & 7; | 115 | nodeid = limit & 7; |
112 | if ((base & 3) == 0) { | 116 | if ((base & 3) == 0) { |
113 | if (i < numnodes) | 117 | if (i < numnodes) |
114 | printk("Skipping disabled node %d\n", i); | 118 | pr_info("Skipping disabled node %d\n", i); |
115 | continue; | 119 | continue; |
116 | } | 120 | } |
117 | if (nodeid >= numnodes) { | 121 | if (nodeid >= numnodes) { |
118 | printk("Ignoring excess node %d (%lx:%lx)\n", nodeid, | 122 | pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid, |
119 | base, limit); | 123 | base, limit); |
120 | continue; | 124 | continue; |
121 | } | 125 | } |
122 | 126 | ||
123 | if (!limit) { | 127 | if (!limit) { |
124 | printk(KERN_INFO "Skipping node entry %d (base %lx)\n", | 128 | pr_info("Skipping node entry %d (base %lx)\n", |
125 | i, base); | 129 | i, base); |
126 | continue; | 130 | continue; |
127 | } | 131 | } |
128 | if ((base >> 8) & 3 || (limit >> 8) & 3) { | 132 | if ((base >> 8) & 3 || (limit >> 8) & 3) { |
129 | printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n", | 133 | pr_err("Node %d using interleaving mode %lx/%lx\n", |
130 | nodeid, (base>>8)&3, (limit>>8) & 3); | 134 | nodeid, (base >> 8) & 3, (limit >> 8) & 3); |
131 | return -1; | 135 | return -1; |
132 | } | 136 | } |
133 | if (node_isset(nodeid, node_possible_map)) { | 137 | if (node_isset(nodeid, nodes_parsed)) { |
134 | printk(KERN_INFO "Node %d already present. Skipping\n", | 138 | pr_info("Node %d already present, skipping\n", |
135 | nodeid); | 139 | nodeid); |
136 | continue; | 140 | continue; |
137 | } | 141 | } |
138 | 142 | ||
@@ -141,8 +145,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
141 | limit |= (1<<24)-1; | 145 | limit |= (1<<24)-1; |
142 | limit++; | 146 | limit++; |
143 | 147 | ||
144 | if (limit > max_pfn << PAGE_SHIFT) | 148 | if (limit > end) |
145 | limit = max_pfn << PAGE_SHIFT; | 149 | limit = end; |
146 | if (limit <= base) | 150 | if (limit <= base) |
147 | continue; | 151 | continue; |
148 | 152 | ||
@@ -154,24 +158,24 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
154 | if (limit > end) | 158 | if (limit > end) |
155 | limit = end; | 159 | limit = end; |
156 | if (limit == base) { | 160 | if (limit == base) { |
157 | printk(KERN_ERR "Empty node %d\n", nodeid); | 161 | pr_err("Empty node %d\n", nodeid); |
158 | continue; | 162 | continue; |
159 | } | 163 | } |
160 | if (limit < base) { | 164 | if (limit < base) { |
161 | printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n", | 165 | pr_err("Node %d bogus settings %lx-%lx.\n", |
162 | nodeid, base, limit); | 166 | nodeid, base, limit); |
163 | continue; | 167 | continue; |
164 | } | 168 | } |
165 | 169 | ||
166 | /* Could sort here, but pun for now. Should not happen anyroads. */ | 170 | /* Could sort here, but pun for now. Should not happen anyroads. */ |
167 | if (prevbase > base) { | 171 | if (prevbase > base) { |
168 | printk(KERN_ERR "Node map not sorted %lx,%lx\n", | 172 | pr_err("Node map not sorted %lx,%lx\n", |
169 | prevbase, base); | 173 | prevbase, base); |
170 | return -1; | 174 | return -1; |
171 | } | 175 | } |
172 | 176 | ||
173 | printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n", | 177 | pr_info("Node %d MemBase %016lx Limit %016lx\n", |
174 | nodeid, base, limit); | 178 | nodeid, base, limit); |
175 | 179 | ||
176 | found++; | 180 | found++; |
177 | 181 | ||
@@ -180,18 +184,29 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
180 | 184 | ||
181 | prevbase = base; | 185 | prevbase = base; |
182 | 186 | ||
183 | node_set(nodeid, node_possible_map); | 187 | node_set(nodeid, nodes_parsed); |
184 | } | 188 | } |
185 | 189 | ||
186 | if (!found) | 190 | if (!found) |
187 | return -1; | 191 | return -1; |
192 | return 0; | ||
193 | } | ||
194 | |||
195 | int __init k8_scan_nodes(void) | ||
196 | { | ||
197 | unsigned int bits; | ||
198 | unsigned int cores; | ||
199 | unsigned int apicid_base; | ||
200 | int i; | ||
188 | 201 | ||
202 | BUG_ON(nodes_empty(nodes_parsed)); | ||
203 | node_possible_map = nodes_parsed; | ||
189 | memnode_shift = compute_hash_shift(nodes, 8, NULL); | 204 | memnode_shift = compute_hash_shift(nodes, 8, NULL); |
190 | if (memnode_shift < 0) { | 205 | if (memnode_shift < 0) { |
191 | printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); | 206 | pr_err("No NUMA node hash function found. Contact maintainer\n"); |
192 | return -1; | 207 | return -1; |
193 | } | 208 | } |
194 | printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift); | 209 | pr_info("Using node hash shift of %d\n", memnode_shift); |
195 | 210 | ||
196 | /* use the coreid bits from early_identify_cpu */ | 211 | /* use the coreid bits from early_identify_cpu */ |
197 | bits = boot_cpu_data.x86_coreid_bits; | 212 | bits = boot_cpu_data.x86_coreid_bits; |
@@ -200,14 +215,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
200 | /* need to get boot_cpu_id early for system with apicid lifting */ | 215 | /* need to get boot_cpu_id early for system with apicid lifting */ |
201 | early_get_boot_cpu_id(); | 216 | early_get_boot_cpu_id(); |
202 | if (boot_cpu_physical_apicid > 0) { | 217 | if (boot_cpu_physical_apicid > 0) { |
203 | printk(KERN_INFO "BSP APIC ID: %02x\n", | 218 | pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); |
204 | boot_cpu_physical_apicid); | ||
205 | apicid_base = boot_cpu_physical_apicid; | 219 | apicid_base = boot_cpu_physical_apicid; |
206 | } | 220 | } |
207 | 221 | ||
208 | for (i = 0; i < 8; i++) { | 222 | for_each_node_mask(i, node_possible_map) { |
209 | if (nodes[i].start == nodes[i].end) | 223 | int j; |
210 | continue; | ||
211 | 224 | ||
212 | e820_register_active_regions(i, | 225 | e820_register_active_regions(i, |
213 | nodes[i].start >> PAGE_SHIFT, | 226 | nodes[i].start >> PAGE_SHIFT, |
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index d2530062fe00..b20760ca7244 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -347,8 +347,8 @@ static void init_remap_allocator(int nid) | |||
347 | (ulong) node_remap_end_vaddr[nid]); | 347 | (ulong) node_remap_end_vaddr[nid]); |
348 | } | 348 | } |
349 | 349 | ||
350 | void __init initmem_init(unsigned long start_pfn, | 350 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
351 | unsigned long end_pfn) | 351 | int acpi, int k8) |
352 | { | 352 | { |
353 | int nid; | 353 | int nid; |
354 | long kva_target_pfn; | 354 | long kva_target_pfn; |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 459913beac71..83bbc70d11bb 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -239,8 +239,14 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
239 | bootmap = early_node_mem(nodeid, bootmap_start, end, | 239 | bootmap = early_node_mem(nodeid, bootmap_start, end, |
240 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); | 240 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); |
241 | if (bootmap == NULL) { | 241 | if (bootmap == NULL) { |
242 | if (nodedata_phys < start || nodedata_phys >= end) | 242 | if (nodedata_phys < start || nodedata_phys >= end) { |
243 | free_bootmem(nodedata_phys, pgdat_size); | 243 | /* |
244 | * only need to free it if it is from other node | ||
245 | * bootmem | ||
246 | */ | ||
247 | if (nid != nodeid) | ||
248 | free_bootmem(nodedata_phys, pgdat_size); | ||
249 | } | ||
244 | node_data[nodeid] = NULL; | 250 | node_data[nodeid] = NULL; |
245 | return; | 251 | return; |
246 | } | 252 | } |
@@ -306,8 +312,71 @@ void __init numa_init_array(void) | |||
306 | 312 | ||
307 | #ifdef CONFIG_NUMA_EMU | 313 | #ifdef CONFIG_NUMA_EMU |
308 | /* Numa emulation */ | 314 | /* Numa emulation */ |
315 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | ||
316 | static struct bootnode physnodes[MAX_NUMNODES] __initdata; | ||
309 | static char *cmdline __initdata; | 317 | static char *cmdline __initdata; |
310 | 318 | ||
319 | static int __init setup_physnodes(unsigned long start, unsigned long end, | ||
320 | int acpi, int k8) | ||
321 | { | ||
322 | int nr_nodes = 0; | ||
323 | int ret = 0; | ||
324 | int i; | ||
325 | |||
326 | #ifdef CONFIG_ACPI_NUMA | ||
327 | if (acpi) | ||
328 | nr_nodes = acpi_get_nodes(physnodes); | ||
329 | #endif | ||
330 | #ifdef CONFIG_K8_NUMA | ||
331 | if (k8) | ||
332 | nr_nodes = k8_get_nodes(physnodes); | ||
333 | #endif | ||
334 | /* | ||
335 | * Basic sanity checking on the physical node map: there may be errors | ||
336 | * if the SRAT or K8 incorrectly reported the topology or the mem= | ||
337 | * kernel parameter is used. | ||
338 | */ | ||
339 | for (i = 0; i < nr_nodes; i++) { | ||
340 | if (physnodes[i].start == physnodes[i].end) | ||
341 | continue; | ||
342 | if (physnodes[i].start > end) { | ||
343 | physnodes[i].end = physnodes[i].start; | ||
344 | continue; | ||
345 | } | ||
346 | if (physnodes[i].end < start) { | ||
347 | physnodes[i].start = physnodes[i].end; | ||
348 | continue; | ||
349 | } | ||
350 | if (physnodes[i].start < start) | ||
351 | physnodes[i].start = start; | ||
352 | if (physnodes[i].end > end) | ||
353 | physnodes[i].end = end; | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * Remove all nodes that have no memory or were truncated because of the | ||
358 | * limited address range. | ||
359 | */ | ||
360 | for (i = 0; i < nr_nodes; i++) { | ||
361 | if (physnodes[i].start == physnodes[i].end) | ||
362 | continue; | ||
363 | physnodes[ret].start = physnodes[i].start; | ||
364 | physnodes[ret].end = physnodes[i].end; | ||
365 | ret++; | ||
366 | } | ||
367 | |||
368 | /* | ||
369 | * If no physical topology was detected, a single node is faked to cover | ||
370 | * the entire address space. | ||
371 | */ | ||
372 | if (!ret) { | ||
373 | physnodes[ret].start = start; | ||
374 | physnodes[ret].end = end; | ||
375 | ret = 1; | ||
376 | } | ||
377 | return ret; | ||
378 | } | ||
379 | |||
311 | /* | 380 | /* |
312 | * Setups up nid to range from addr to addr + size. If the end | 381 | * Setups up nid to range from addr to addr + size. If the end |
313 | * boundary is greater than max_addr, then max_addr is used instead. | 382 | * boundary is greater than max_addr, then max_addr is used instead. |
@@ -315,11 +384,9 @@ static char *cmdline __initdata; | |||
315 | * allocation past addr and -1 otherwise. addr is adjusted to be at | 384 | * allocation past addr and -1 otherwise. addr is adjusted to be at |
316 | * the end of the node. | 385 | * the end of the node. |
317 | */ | 386 | */ |
318 | static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, | 387 | static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr) |
319 | u64 size, u64 max_addr) | ||
320 | { | 388 | { |
321 | int ret = 0; | 389 | int ret = 0; |
322 | |||
323 | nodes[nid].start = *addr; | 390 | nodes[nid].start = *addr; |
324 | *addr += size; | 391 | *addr += size; |
325 | if (*addr >= max_addr) { | 392 | if (*addr >= max_addr) { |
@@ -335,12 +402,111 @@ static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, | |||
335 | } | 402 | } |
336 | 403 | ||
337 | /* | 404 | /* |
405 | * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr | ||
406 | * to max_addr. The return value is the number of nodes allocated. | ||
407 | */ | ||
408 | static int __init split_nodes_interleave(u64 addr, u64 max_addr, | ||
409 | int nr_phys_nodes, int nr_nodes) | ||
410 | { | ||
411 | nodemask_t physnode_mask = NODE_MASK_NONE; | ||
412 | u64 size; | ||
413 | int big; | ||
414 | int ret = 0; | ||
415 | int i; | ||
416 | |||
417 | if (nr_nodes <= 0) | ||
418 | return -1; | ||
419 | if (nr_nodes > MAX_NUMNODES) { | ||
420 | pr_info("numa=fake=%d too large, reducing to %d\n", | ||
421 | nr_nodes, MAX_NUMNODES); | ||
422 | nr_nodes = MAX_NUMNODES; | ||
423 | } | ||
424 | |||
425 | size = (max_addr - addr - e820_hole_size(addr, max_addr)) / nr_nodes; | ||
426 | /* | ||
427 | * Calculate the number of big nodes that can be allocated as a result | ||
428 | * of consolidating the remainder. | ||
429 | */ | ||
430 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) & nr_nodes) / | ||
431 | FAKE_NODE_MIN_SIZE; | ||
432 | |||
433 | size &= FAKE_NODE_MIN_HASH_MASK; | ||
434 | if (!size) { | ||
435 | pr_err("Not enough memory for each node. " | ||
436 | "NUMA emulation disabled.\n"); | ||
437 | return -1; | ||
438 | } | ||
439 | |||
440 | for (i = 0; i < nr_phys_nodes; i++) | ||
441 | if (physnodes[i].start != physnodes[i].end) | ||
442 | node_set(i, physnode_mask); | ||
443 | |||
444 | /* | ||
445 | * Continue to fill physical nodes with fake nodes until there is no | ||
446 | * memory left on any of them. | ||
447 | */ | ||
448 | while (nodes_weight(physnode_mask)) { | ||
449 | for_each_node_mask(i, physnode_mask) { | ||
450 | u64 end = physnodes[i].start + size; | ||
451 | u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); | ||
452 | |||
453 | if (ret < big) | ||
454 | end += FAKE_NODE_MIN_SIZE; | ||
455 | |||
456 | /* | ||
457 | * Continue to add memory to this fake node if its | ||
458 | * non-reserved memory is less than the per-node size. | ||
459 | */ | ||
460 | while (end - physnodes[i].start - | ||
461 | e820_hole_size(physnodes[i].start, end) < size) { | ||
462 | end += FAKE_NODE_MIN_SIZE; | ||
463 | if (end > physnodes[i].end) { | ||
464 | end = physnodes[i].end; | ||
465 | break; | ||
466 | } | ||
467 | } | ||
468 | |||
469 | /* | ||
470 | * If there won't be at least FAKE_NODE_MIN_SIZE of | ||
471 | * non-reserved memory in ZONE_DMA32 for the next node, | ||
472 | * this one must extend to the boundary. | ||
473 | */ | ||
474 | if (end < dma32_end && dma32_end - end - | ||
475 | e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) | ||
476 | end = dma32_end; | ||
477 | |||
478 | /* | ||
479 | * If there won't be enough non-reserved memory for the | ||
480 | * next node, this one must extend to the end of the | ||
481 | * physical node. | ||
482 | */ | ||
483 | if (physnodes[i].end - end - | ||
484 | e820_hole_size(end, physnodes[i].end) < size) | ||
485 | end = physnodes[i].end; | ||
486 | |||
487 | /* | ||
488 | * Avoid allocating more nodes than requested, which can | ||
489 | * happen as a result of rounding down each node's size | ||
490 | * to FAKE_NODE_MIN_SIZE. | ||
491 | */ | ||
492 | if (nodes_weight(physnode_mask) + ret >= nr_nodes) | ||
493 | end = physnodes[i].end; | ||
494 | |||
495 | if (setup_node_range(ret++, &physnodes[i].start, | ||
496 | end - physnodes[i].start, | ||
497 | physnodes[i].end) < 0) | ||
498 | node_clear(i, physnode_mask); | ||
499 | } | ||
500 | } | ||
501 | return ret; | ||
502 | } | ||
503 | |||
504 | /* | ||
338 | * Splits num_nodes nodes up equally starting at node_start. The return value | 505 | * Splits num_nodes nodes up equally starting at node_start. The return value |
339 | * is the number of nodes split up and addr is adjusted to be at the end of the | 506 | * is the number of nodes split up and addr is adjusted to be at the end of the |
340 | * last node allocated. | 507 | * last node allocated. |
341 | */ | 508 | */ |
342 | static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | 509 | static int __init split_nodes_equally(u64 *addr, u64 max_addr, int node_start, |
343 | u64 max_addr, int node_start, | ||
344 | int num_nodes) | 510 | int num_nodes) |
345 | { | 511 | { |
346 | unsigned int big; | 512 | unsigned int big; |
@@ -388,7 +554,7 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | |||
388 | break; | 554 | break; |
389 | } | 555 | } |
390 | } | 556 | } |
391 | if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0) | 557 | if (setup_node_range(i, addr, end - *addr, max_addr) < 0) |
392 | break; | 558 | break; |
393 | } | 559 | } |
394 | return i - node_start + 1; | 560 | return i - node_start + 1; |
@@ -399,12 +565,12 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | |||
399 | * always assigned to a final node and can be asymmetric. Returns the number of | 565 | * always assigned to a final node and can be asymmetric. Returns the number of |
400 | * nodes split. | 566 | * nodes split. |
401 | */ | 567 | */ |
402 | static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, | 568 | static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start, |
403 | u64 max_addr, int node_start, u64 size) | 569 | u64 size) |
404 | { | 570 | { |
405 | int i = node_start; | 571 | int i = node_start; |
406 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; | 572 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; |
407 | while (!setup_node_range(i++, nodes, addr, size, max_addr)) | 573 | while (!setup_node_range(i++, addr, size, max_addr)) |
408 | ; | 574 | ; |
409 | return i - node_start; | 575 | return i - node_start; |
410 | } | 576 | } |
@@ -413,15 +579,15 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, | |||
413 | * Sets up the system RAM area from start_pfn to last_pfn according to the | 579 | * Sets up the system RAM area from start_pfn to last_pfn according to the |
414 | * numa=fake command-line option. | 580 | * numa=fake command-line option. |
415 | */ | 581 | */ |
416 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 582 | static int __init numa_emulation(unsigned long start_pfn, |
417 | 583 | unsigned long last_pfn, int acpi, int k8) | |
418 | static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn) | ||
419 | { | 584 | { |
420 | u64 size, addr = start_pfn << PAGE_SHIFT; | 585 | u64 size, addr = start_pfn << PAGE_SHIFT; |
421 | u64 max_addr = last_pfn << PAGE_SHIFT; | 586 | u64 max_addr = last_pfn << PAGE_SHIFT; |
422 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; | 587 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; |
588 | int num_phys_nodes; | ||
423 | 589 | ||
424 | memset(&nodes, 0, sizeof(nodes)); | 590 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); |
425 | /* | 591 | /* |
426 | * If the numa=fake command-line is just a single number N, split the | 592 | * If the numa=fake command-line is just a single number N, split the |
427 | * system RAM into N fake nodes. | 593 | * system RAM into N fake nodes. |
@@ -429,7 +595,8 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn | |||
429 | if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { | 595 | if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { |
430 | long n = simple_strtol(cmdline, NULL, 0); | 596 | long n = simple_strtol(cmdline, NULL, 0); |
431 | 597 | ||
432 | num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n); | 598 | num_nodes = split_nodes_interleave(addr, max_addr, |
599 | num_phys_nodes, n); | ||
433 | if (num_nodes < 0) | 600 | if (num_nodes < 0) |
434 | return num_nodes; | 601 | return num_nodes; |
435 | goto out; | 602 | goto out; |
@@ -456,8 +623,8 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn | |||
456 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; | 623 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; |
457 | if (size) | 624 | if (size) |
458 | for (i = 0; i < coeff; i++, num_nodes++) | 625 | for (i = 0; i < coeff; i++, num_nodes++) |
459 | if (setup_node_range(num_nodes, nodes, | 626 | if (setup_node_range(num_nodes, &addr, |
460 | &addr, size, max_addr) < 0) | 627 | size, max_addr) < 0) |
461 | goto done; | 628 | goto done; |
462 | if (!*cmdline) | 629 | if (!*cmdline) |
463 | break; | 630 | break; |
@@ -473,7 +640,7 @@ done: | |||
473 | if (addr < max_addr) { | 640 | if (addr < max_addr) { |
474 | if (coeff_flag && coeff < 0) { | 641 | if (coeff_flag && coeff < 0) { |
475 | /* Split remaining nodes into num-sized chunks */ | 642 | /* Split remaining nodes into num-sized chunks */ |
476 | num_nodes += split_nodes_by_size(nodes, &addr, max_addr, | 643 | num_nodes += split_nodes_by_size(&addr, max_addr, |
477 | num_nodes, num); | 644 | num_nodes, num); |
478 | goto out; | 645 | goto out; |
479 | } | 646 | } |
@@ -482,7 +649,7 @@ done: | |||
482 | /* Split remaining nodes into coeff chunks */ | 649 | /* Split remaining nodes into coeff chunks */ |
483 | if (coeff <= 0) | 650 | if (coeff <= 0) |
484 | break; | 651 | break; |
485 | num_nodes += split_nodes_equally(nodes, &addr, max_addr, | 652 | num_nodes += split_nodes_equally(&addr, max_addr, |
486 | num_nodes, coeff); | 653 | num_nodes, coeff); |
487 | break; | 654 | break; |
488 | case ',': | 655 | case ',': |
@@ -490,8 +657,8 @@ done: | |||
490 | break; | 657 | break; |
491 | default: | 658 | default: |
492 | /* Give one final node */ | 659 | /* Give one final node */ |
493 | setup_node_range(num_nodes, nodes, &addr, | 660 | setup_node_range(num_nodes, &addr, max_addr - addr, |
494 | max_addr - addr, max_addr); | 661 | max_addr); |
495 | num_nodes++; | 662 | num_nodes++; |
496 | } | 663 | } |
497 | } | 664 | } |
@@ -505,14 +672,10 @@ out: | |||
505 | } | 672 | } |
506 | 673 | ||
507 | /* | 674 | /* |
508 | * We need to vacate all active ranges that may have been registered by | 675 | * We need to vacate all active ranges that may have been registered for |
509 | * SRAT and set acpi_numa to -1 so that srat_disabled() always returns | 676 | * the e820 memory map. |
510 | * true. NUMA emulation has succeeded so we will not scan ACPI nodes. | ||
511 | */ | 677 | */ |
512 | remove_all_active_ranges(); | 678 | remove_all_active_ranges(); |
513 | #ifdef CONFIG_ACPI_NUMA | ||
514 | acpi_numa = -1; | ||
515 | #endif | ||
516 | for_each_node_mask(i, node_possible_map) { | 679 | for_each_node_mask(i, node_possible_map) { |
517 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | 680 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, |
518 | nodes[i].end >> PAGE_SHIFT); | 681 | nodes[i].end >> PAGE_SHIFT); |
@@ -524,7 +687,8 @@ out: | |||
524 | } | 687 | } |
525 | #endif /* CONFIG_NUMA_EMU */ | 688 | #endif /* CONFIG_NUMA_EMU */ |
526 | 689 | ||
527 | void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) | 690 | void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, |
691 | int acpi, int k8) | ||
528 | { | 692 | { |
529 | int i; | 693 | int i; |
530 | 694 | ||
@@ -532,23 +696,22 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) | |||
532 | nodes_clear(node_online_map); | 696 | nodes_clear(node_online_map); |
533 | 697 | ||
534 | #ifdef CONFIG_NUMA_EMU | 698 | #ifdef CONFIG_NUMA_EMU |
535 | if (cmdline && !numa_emulation(start_pfn, last_pfn)) | 699 | if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8)) |
536 | return; | 700 | return; |
537 | nodes_clear(node_possible_map); | 701 | nodes_clear(node_possible_map); |
538 | nodes_clear(node_online_map); | 702 | nodes_clear(node_online_map); |
539 | #endif | 703 | #endif |
540 | 704 | ||
541 | #ifdef CONFIG_ACPI_NUMA | 705 | #ifdef CONFIG_ACPI_NUMA |
542 | if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, | 706 | if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, |
543 | last_pfn << PAGE_SHIFT)) | 707 | last_pfn << PAGE_SHIFT)) |
544 | return; | 708 | return; |
545 | nodes_clear(node_possible_map); | 709 | nodes_clear(node_possible_map); |
546 | nodes_clear(node_online_map); | 710 | nodes_clear(node_online_map); |
547 | #endif | 711 | #endif |
548 | 712 | ||
549 | #ifdef CONFIG_K8_NUMA | 713 | #ifdef CONFIG_K8_NUMA |
550 | if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, | 714 | if (!numa_off && k8 && !k8_scan_nodes()) |
551 | last_pfn<<PAGE_SHIFT)) | ||
552 | return; | 715 | return; |
553 | nodes_clear(node_possible_map); | 716 | nodes_clear(node_possible_map); |
554 | nodes_clear(node_online_map); | 717 | nodes_clear(node_online_map); |
@@ -601,6 +764,25 @@ static __init int numa_setup(char *opt) | |||
601 | early_param("numa", numa_setup); | 764 | early_param("numa", numa_setup); |
602 | 765 | ||
603 | #ifdef CONFIG_NUMA | 766 | #ifdef CONFIG_NUMA |
767 | |||
768 | static __init int find_near_online_node(int node) | ||
769 | { | ||
770 | int n, val; | ||
771 | int min_val = INT_MAX; | ||
772 | int best_node = -1; | ||
773 | |||
774 | for_each_online_node(n) { | ||
775 | val = node_distance(node, n); | ||
776 | |||
777 | if (val < min_val) { | ||
778 | min_val = val; | ||
779 | best_node = n; | ||
780 | } | ||
781 | } | ||
782 | |||
783 | return best_node; | ||
784 | } | ||
785 | |||
604 | /* | 786 | /* |
605 | * Setup early cpu_to_node. | 787 | * Setup early cpu_to_node. |
606 | * | 788 | * |
@@ -632,7 +814,7 @@ void __init init_cpu_to_node(void) | |||
632 | if (node == NUMA_NO_NODE) | 814 | if (node == NUMA_NO_NODE) |
633 | continue; | 815 | continue; |
634 | if (!node_online(node)) | 816 | if (!node_online(node)) |
635 | continue; | 817 | node = find_near_online_node(node); |
636 | numa_set_node(cpu, node); | 818 | numa_set_node(cpu, node); |
637 | } | 819 | } |
638 | } | 820 | } |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dd38bfbefd1f..1d4eb93d333c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -279,6 +279,22 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
279 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) | 279 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) |
280 | pgprot_val(forbidden) |= _PAGE_RW; | 280 | pgprot_val(forbidden) |= _PAGE_RW; |
281 | 281 | ||
282 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | ||
283 | /* | ||
284 | * Once the kernel maps the text as RO (kernel_set_to_readonly is set), | ||
285 | * kernel text mappings for the large page aligned text, rodata sections | ||
286 | * will be always read-only. For the kernel identity mappings covering | ||
287 | * the holes caused by this alignment can be anything that user asks. | ||
288 | * | ||
289 | * This will preserve the large page mappings for kernel text/data | ||
290 | * at no extra cost. | ||
291 | */ | ||
292 | if (kernel_set_to_readonly && | ||
293 | within(address, (unsigned long)_text, | ||
294 | (unsigned long)__end_rodata_hpage_align)) | ||
295 | pgprot_val(forbidden) |= _PAGE_RW; | ||
296 | #endif | ||
297 | |||
282 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); | 298 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); |
283 | 299 | ||
284 | return prot; | 300 | return prot; |
@@ -1069,12 +1085,18 @@ EXPORT_SYMBOL(set_memory_array_wb); | |||
1069 | 1085 | ||
1070 | int set_memory_x(unsigned long addr, int numpages) | 1086 | int set_memory_x(unsigned long addr, int numpages) |
1071 | { | 1087 | { |
1088 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
1089 | return 0; | ||
1090 | |||
1072 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); | 1091 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); |
1073 | } | 1092 | } |
1074 | EXPORT_SYMBOL(set_memory_x); | 1093 | EXPORT_SYMBOL(set_memory_x); |
1075 | 1094 | ||
1076 | int set_memory_nx(unsigned long addr, int numpages) | 1095 | int set_memory_nx(unsigned long addr, int numpages) |
1077 | { | 1096 | { |
1097 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
1098 | return 0; | ||
1099 | |||
1078 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); | 1100 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); |
1079 | } | 1101 | } |
1080 | EXPORT_SYMBOL(set_memory_nx); | 1102 | EXPORT_SYMBOL(set_memory_nx); |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index e78cd0ec2bcf..66b55d6e69ed 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <asm/cacheflush.h> | 20 | #include <asm/cacheflush.h> |
21 | #include <asm/processor.h> | 21 | #include <asm/processor.h> |
22 | #include <asm/tlbflush.h> | 22 | #include <asm/tlbflush.h> |
23 | #include <asm/x86_init.h> | ||
23 | #include <asm/pgtable.h> | 24 | #include <asm/pgtable.h> |
24 | #include <asm/fcntl.h> | 25 | #include <asm/fcntl.h> |
25 | #include <asm/e820.h> | 26 | #include <asm/e820.h> |
@@ -355,9 +356,6 @@ static int free_ram_pages_type(u64 start, u64 end) | |||
355 | * - _PAGE_CACHE_UC_MINUS | 356 | * - _PAGE_CACHE_UC_MINUS |
356 | * - _PAGE_CACHE_UC | 357 | * - _PAGE_CACHE_UC |
357 | * | 358 | * |
358 | * req_type will have a special case value '-1', when requester want to inherit | ||
359 | * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS. | ||
360 | * | ||
361 | * If new_type is NULL, function will return an error if it cannot reserve the | 359 | * If new_type is NULL, function will return an error if it cannot reserve the |
362 | * region with req_type. If new_type is non-NULL, function will return | 360 | * region with req_type. If new_type is non-NULL, function will return |
363 | * available type in new_type in case of no error. In case of any error | 361 | * available type in new_type in case of no error. In case of any error |
@@ -377,9 +375,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
377 | if (!pat_enabled) { | 375 | if (!pat_enabled) { |
378 | /* This is identical to page table setting without PAT */ | 376 | /* This is identical to page table setting without PAT */ |
379 | if (new_type) { | 377 | if (new_type) { |
380 | if (req_type == -1) | 378 | if (req_type == _PAGE_CACHE_WC) |
381 | *new_type = _PAGE_CACHE_WB; | ||
382 | else if (req_type == _PAGE_CACHE_WC) | ||
383 | *new_type = _PAGE_CACHE_UC_MINUS; | 379 | *new_type = _PAGE_CACHE_UC_MINUS; |
384 | else | 380 | else |
385 | *new_type = req_type & _PAGE_CACHE_MASK; | 381 | *new_type = req_type & _PAGE_CACHE_MASK; |
@@ -388,7 +384,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
388 | } | 384 | } |
389 | 385 | ||
390 | /* Low ISA region is always mapped WB in page table. No need to track */ | 386 | /* Low ISA region is always mapped WB in page table. No need to track */ |
391 | if (is_ISA_range(start, end - 1)) { | 387 | if (x86_platform.is_untracked_pat_range(start, end)) { |
392 | if (new_type) | 388 | if (new_type) |
393 | *new_type = _PAGE_CACHE_WB; | 389 | *new_type = _PAGE_CACHE_WB; |
394 | return 0; | 390 | return 0; |
@@ -499,7 +495,7 @@ int free_memtype(u64 start, u64 end) | |||
499 | return 0; | 495 | return 0; |
500 | 496 | ||
501 | /* Low ISA region is always mapped WB. No need to track */ | 497 | /* Low ISA region is always mapped WB. No need to track */ |
502 | if (is_ISA_range(start, end - 1)) | 498 | if (x86_platform.is_untracked_pat_range(start, end)) |
503 | return 0; | 499 | return 0; |
504 | 500 | ||
505 | is_range_ram = pat_pagerange_is_ram(start, end); | 501 | is_range_ram = pat_pagerange_is_ram(start, end); |
@@ -582,7 +578,7 @@ static unsigned long lookup_memtype(u64 paddr) | |||
582 | int rettype = _PAGE_CACHE_WB; | 578 | int rettype = _PAGE_CACHE_WB; |
583 | struct memtype *entry; | 579 | struct memtype *entry; |
584 | 580 | ||
585 | if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1)) | 581 | if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE)) |
586 | return rettype; | 582 | return rettype; |
587 | 583 | ||
588 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { | 584 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { |
@@ -1018,8 +1014,10 @@ static const struct file_operations memtype_fops = { | |||
1018 | 1014 | ||
1019 | static int __init pat_memtype_list_init(void) | 1015 | static int __init pat_memtype_list_init(void) |
1020 | { | 1016 | { |
1021 | debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir, | 1017 | if (pat_enabled) { |
1022 | NULL, &memtype_fops); | 1018 | debugfs_create_file("pat_memtype_list", S_IRUSR, |
1019 | arch_debugfs_dir, NULL, &memtype_fops); | ||
1020 | } | ||
1023 | return 0; | 1021 | return 0; |
1024 | } | 1022 | } |
1025 | 1023 | ||
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index 513d8ed5d2ec..a3250aa34086 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c | |||
@@ -3,10 +3,8 @@ | |||
3 | #include <linux/init.h> | 3 | #include <linux/init.h> |
4 | 4 | ||
5 | #include <asm/pgtable.h> | 5 | #include <asm/pgtable.h> |
6 | #include <asm/proto.h> | ||
6 | 7 | ||
7 | int nx_enabled; | ||
8 | |||
9 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
10 | static int disable_nx __cpuinitdata; | 8 | static int disable_nx __cpuinitdata; |
11 | 9 | ||
12 | /* | 10 | /* |
@@ -22,48 +20,41 @@ static int __init noexec_setup(char *str) | |||
22 | if (!str) | 20 | if (!str) |
23 | return -EINVAL; | 21 | return -EINVAL; |
24 | if (!strncmp(str, "on", 2)) { | 22 | if (!strncmp(str, "on", 2)) { |
25 | __supported_pte_mask |= _PAGE_NX; | ||
26 | disable_nx = 0; | 23 | disable_nx = 0; |
27 | } else if (!strncmp(str, "off", 3)) { | 24 | } else if (!strncmp(str, "off", 3)) { |
28 | disable_nx = 1; | 25 | disable_nx = 1; |
29 | __supported_pte_mask &= ~_PAGE_NX; | ||
30 | } | 26 | } |
27 | x86_configure_nx(); | ||
31 | return 0; | 28 | return 0; |
32 | } | 29 | } |
33 | early_param("noexec", noexec_setup); | 30 | early_param("noexec", noexec_setup); |
34 | #endif | ||
35 | 31 | ||
36 | #ifdef CONFIG_X86_PAE | 32 | void __cpuinit x86_configure_nx(void) |
37 | void __init set_nx(void) | ||
38 | { | 33 | { |
39 | unsigned int v[4], l, h; | 34 | if (cpu_has_nx && !disable_nx) |
40 | 35 | __supported_pte_mask |= _PAGE_NX; | |
41 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | 36 | else |
42 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | 37 | __supported_pte_mask &= ~_PAGE_NX; |
38 | } | ||
43 | 39 | ||
44 | if ((v[3] & (1 << 20)) && !disable_nx) { | 40 | void __init x86_report_nx(void) |
45 | rdmsr(MSR_EFER, l, h); | 41 | { |
46 | l |= EFER_NX; | 42 | if (!cpu_has_nx) { |
47 | wrmsr(MSR_EFER, l, h); | 43 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " |
48 | nx_enabled = 1; | 44 | "missing in CPU or disabled in BIOS!\n"); |
49 | __supported_pte_mask |= _PAGE_NX; | 45 | } else { |
46 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
47 | if (disable_nx) { | ||
48 | printk(KERN_INFO "NX (Execute Disable) protection: " | ||
49 | "disabled by kernel command line option\n"); | ||
50 | } else { | ||
51 | printk(KERN_INFO "NX (Execute Disable) protection: " | ||
52 | "active\n"); | ||
50 | } | 53 | } |
51 | } | ||
52 | } | ||
53 | #else | 54 | #else |
54 | void set_nx(void) | 55 | /* 32bit non-PAE kernel, NX cannot be used */ |
55 | { | 56 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " |
56 | } | 57 | "cannot be enabled: non-PAE kernel!\n"); |
57 | #endif | 58 | #endif |
58 | 59 | } | |
59 | #ifdef CONFIG_X86_64 | ||
60 | void __cpuinit check_efer(void) | ||
61 | { | ||
62 | unsigned long efer; | ||
63 | |||
64 | rdmsrl(MSR_EFER, efer); | ||
65 | if (!(efer & EFER_NX) || disable_nx) | ||
66 | __supported_pte_mask &= ~_PAGE_NX; | ||
67 | } | 60 | } |
68 | #endif | ||
69 | |||
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 9d7ce96e5a5c..d89075489664 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -290,8 +290,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
290 | 290 | ||
291 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, | 291 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, |
292 | start, end); | 292 | start, end); |
293 | e820_register_active_regions(node, start >> PAGE_SHIFT, | ||
294 | end >> PAGE_SHIFT); | ||
295 | 293 | ||
296 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { | 294 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { |
297 | update_nodes_add(node, start, end); | 295 | update_nodes_add(node, start, end); |
@@ -338,6 +336,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
338 | 336 | ||
339 | void __init acpi_numa_arch_fixup(void) {} | 337 | void __init acpi_numa_arch_fixup(void) {} |
340 | 338 | ||
339 | int __init acpi_get_nodes(struct bootnode *physnodes) | ||
340 | { | ||
341 | int i; | ||
342 | int ret = 0; | ||
343 | |||
344 | for_each_node_mask(i, nodes_parsed) { | ||
345 | physnodes[ret].start = nodes[i].start; | ||
346 | physnodes[ret].end = nodes[i].end; | ||
347 | ret++; | ||
348 | } | ||
349 | return ret; | ||
350 | } | ||
351 | |||
341 | /* Use the information discovered above to actually set up the nodes. */ | 352 | /* Use the information discovered above to actually set up the nodes. */ |
342 | int __init acpi_scan_nodes(unsigned long start, unsigned long end) | 353 | int __init acpi_scan_nodes(unsigned long start, unsigned long end) |
343 | { | 354 | { |
@@ -350,11 +361,6 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
350 | for (i = 0; i < MAX_NUMNODES; i++) | 361 | for (i = 0; i < MAX_NUMNODES; i++) |
351 | cutoff_node(i, start, end); | 362 | cutoff_node(i, start, end); |
352 | 363 | ||
353 | if (!nodes_cover_memory(nodes)) { | ||
354 | bad_srat(); | ||
355 | return -1; | ||
356 | } | ||
357 | |||
358 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, | 364 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, |
359 | memblk_nodeid); | 365 | memblk_nodeid); |
360 | if (memnode_shift < 0) { | 366 | if (memnode_shift < 0) { |
@@ -364,6 +370,14 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
364 | return -1; | 370 | return -1; |
365 | } | 371 | } |
366 | 372 | ||
373 | for_each_node_mask(i, nodes_parsed) | ||
374 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | ||
375 | nodes[i].end >> PAGE_SHIFT); | ||
376 | if (!nodes_cover_memory(nodes)) { | ||
377 | bad_srat(); | ||
378 | return -1; | ||
379 | } | ||
380 | |||
367 | /* Account for nodes with cpus and no memory */ | 381 | /* Account for nodes with cpus and no memory */ |
368 | nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); | 382 | nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); |
369 | 383 | ||
@@ -454,7 +468,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | |||
454 | for (i = 0; i < num_nodes; i++) | 468 | for (i = 0; i < num_nodes; i++) |
455 | if (fake_nodes[i].start != fake_nodes[i].end) | 469 | if (fake_nodes[i].start != fake_nodes[i].end) |
456 | node_set(i, nodes_parsed); | 470 | node_set(i, nodes_parsed); |
457 | WARN_ON(!nodes_cover_memory(fake_nodes)); | ||
458 | } | 471 | } |
459 | 472 | ||
460 | static int null_slit_node_compare(int a, int b) | 473 | static int null_slit_node_compare(int a, int b) |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 36fe08eeb5c3..65b58e4b0b8b 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -8,6 +8,7 @@ | |||
8 | 8 | ||
9 | #include <asm/tlbflush.h> | 9 | #include <asm/tlbflush.h> |
10 | #include <asm/mmu_context.h> | 10 | #include <asm/mmu_context.h> |
11 | #include <asm/cache.h> | ||
11 | #include <asm/apic.h> | 12 | #include <asm/apic.h> |
12 | #include <asm/uv/uv.h> | 13 | #include <asm/uv/uv.h> |
13 | 14 | ||
@@ -43,7 +44,7 @@ union smp_flush_state { | |||
43 | spinlock_t tlbstate_lock; | 44 | spinlock_t tlbstate_lock; |
44 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); | 45 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); |
45 | }; | 46 | }; |
46 | char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; | 47 | char pad[INTERNODE_CACHE_BYTES]; |
47 | } ____cacheline_internodealigned_in_smp; | 48 | } ____cacheline_internodealigned_in_smp; |
48 | 49 | ||
49 | /* State is put into the per CPU data section, but padded | 50 | /* State is put into the per CPU data section, but padded |
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 58bc00f68b12..02b442e92007 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -393,7 +393,6 @@ static ctl_table abi_table2[] = { | |||
393 | 393 | ||
394 | static ctl_table abi_root_table2[] = { | 394 | static ctl_table abi_root_table2[] = { |
395 | { | 395 | { |
396 | .ctl_name = CTL_ABI, | ||
397 | .procname = "abi", | 396 | .procname = "abi", |
398 | .mode = 0555, | 397 | .mode = 0555, |
399 | .child = abi_table2 | 398 | .child = abi_table2 |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index dfbf70e65860..c462cea8ef09 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1093,10 +1093,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1093 | 1093 | ||
1094 | __supported_pte_mask |= _PAGE_IOMAP; | 1094 | __supported_pte_mask |= _PAGE_IOMAP; |
1095 | 1095 | ||
1096 | #ifdef CONFIG_X86_64 | ||
1097 | /* Work out if we support NX */ | 1096 | /* Work out if we support NX */ |
1098 | check_efer(); | 1097 | x86_configure_nx(); |
1099 | #endif | ||
1100 | 1098 | ||
1101 | xen_setup_features(); | 1099 | xen_setup_features(); |
1102 | 1100 | ||
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index fe03eeed7b48..738da0cb0d8b 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -73,7 +73,7 @@ static __cpuinit void cpu_bringup(void) | |||
73 | 73 | ||
74 | xen_setup_cpu_clockevents(); | 74 | xen_setup_cpu_clockevents(); |
75 | 75 | ||
76 | cpu_set(cpu, cpu_online_map); | 76 | set_cpu_online(cpu, true); |
77 | percpu_write(cpu_state, CPU_ONLINE); | 77 | percpu_write(cpu_state, CPU_ONLINE); |
78 | wmb(); | 78 | wmb(); |
79 | 79 | ||