diff options
Diffstat (limited to 'arch/x86')
111 files changed, 6138 insertions, 1698 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 184bc8872799..3ed5ad92b029 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -51,6 +51,7 @@ config X86 | |||
51 | select HAVE_KERNEL_GZIP | 51 | select HAVE_KERNEL_GZIP |
52 | select HAVE_KERNEL_BZIP2 | 52 | select HAVE_KERNEL_BZIP2 |
53 | select HAVE_KERNEL_LZMA | 53 | select HAVE_KERNEL_LZMA |
54 | select HAVE_KERNEL_XZ | ||
54 | select HAVE_KERNEL_LZO | 55 | select HAVE_KERNEL_LZO |
55 | select HAVE_HW_BREAKPOINT | 56 | select HAVE_HW_BREAKPOINT |
56 | select HAVE_MIXED_BREAKPOINTS_REGS | 57 | select HAVE_MIXED_BREAKPOINTS_REGS |
@@ -65,6 +66,7 @@ config X86 | |||
65 | select HAVE_SPARSE_IRQ | 66 | select HAVE_SPARSE_IRQ |
66 | select GENERIC_IRQ_PROBE | 67 | select GENERIC_IRQ_PROBE |
67 | select GENERIC_PENDING_IRQ if SMP | 68 | select GENERIC_PENDING_IRQ if SMP |
69 | select USE_GENERIC_SMP_HELPERS if SMP | ||
68 | 70 | ||
69 | config INSTRUCTION_DECODER | 71 | config INSTRUCTION_DECODER |
70 | def_bool (KPROBES || PERF_EVENTS) | 72 | def_bool (KPROBES || PERF_EVENTS) |
@@ -203,10 +205,6 @@ config HAVE_INTEL_TXT | |||
203 | def_bool y | 205 | def_bool y |
204 | depends on EXPERIMENTAL && DMAR && ACPI | 206 | depends on EXPERIMENTAL && DMAR && ACPI |
205 | 207 | ||
206 | config USE_GENERIC_SMP_HELPERS | ||
207 | def_bool y | ||
208 | depends on SMP | ||
209 | |||
210 | config X86_32_SMP | 208 | config X86_32_SMP |
211 | def_bool y | 209 | def_bool y |
212 | depends on X86_32 && SMP | 210 | depends on X86_32 && SMP |
@@ -1936,13 +1934,19 @@ config PCI_MMCONFIG | |||
1936 | depends on X86_64 && PCI && ACPI | 1934 | depends on X86_64 && PCI && ACPI |
1937 | 1935 | ||
1938 | config PCI_CNB20LE_QUIRK | 1936 | config PCI_CNB20LE_QUIRK |
1939 | bool "Read CNB20LE Host Bridge Windows" | 1937 | bool "Read CNB20LE Host Bridge Windows" if EMBEDDED |
1940 | depends on PCI | 1938 | default n |
1939 | depends on PCI && EXPERIMENTAL | ||
1941 | help | 1940 | help |
1942 | Read the PCI windows out of the CNB20LE host bridge. This allows | 1941 | Read the PCI windows out of the CNB20LE host bridge. This allows |
1943 | PCI hotplug to work on systems with the CNB20LE chipset which do | 1942 | PCI hotplug to work on systems with the CNB20LE chipset which do |
1944 | not have ACPI. | 1943 | not have ACPI. |
1945 | 1944 | ||
1945 | There's no public spec for this chipset, and this functionality | ||
1946 | is known to be incomplete. | ||
1947 | |||
1948 | You should say N unless you know you need this. | ||
1949 | |||
1946 | config DMAR | 1950 | config DMAR |
1947 | bool "Support for DMA Remapping Devices (EXPERIMENTAL)" | 1951 | bool "Support for DMA Remapping Devices (EXPERIMENTAL)" |
1948 | depends on PCI_MSI && ACPI && EXPERIMENTAL | 1952 | depends on PCI_MSI && ACPI && EXPERIMENTAL |
@@ -2071,7 +2075,7 @@ config OLPC | |||
2071 | 2075 | ||
2072 | config OLPC_XO1 | 2076 | config OLPC_XO1 |
2073 | tristate "OLPC XO-1 support" | 2077 | tristate "OLPC XO-1 support" |
2074 | depends on OLPC && PCI | 2078 | depends on OLPC && MFD_CS5535 |
2075 | ---help--- | 2079 | ---help--- |
2076 | Add support for non-essential features of the OLPC XO-1 laptop. | 2080 | Add support for non-essential features of the OLPC XO-1 laptop. |
2077 | 2081 | ||
@@ -2079,11 +2083,17 @@ config OLPC_OPENFIRMWARE | |||
2079 | bool "Support for OLPC's Open Firmware" | 2083 | bool "Support for OLPC's Open Firmware" |
2080 | depends on !X86_64 && !X86_PAE | 2084 | depends on !X86_64 && !X86_PAE |
2081 | default n | 2085 | default n |
2086 | select OF | ||
2082 | help | 2087 | help |
2083 | This option adds support for the implementation of Open Firmware | 2088 | This option adds support for the implementation of Open Firmware |
2084 | that is used on the OLPC XO-1 Children's Machine. | 2089 | that is used on the OLPC XO-1 Children's Machine. |
2085 | If unsure, say N here. | 2090 | If unsure, say N here. |
2086 | 2091 | ||
2092 | config OLPC_OPENFIRMWARE_DT | ||
2093 | bool | ||
2094 | default y if OLPC_OPENFIRMWARE && PROC_DEVICETREE | ||
2095 | select OF_PROMTREE | ||
2096 | |||
2087 | endif # X86_32 | 2097 | endif # X86_32 |
2088 | 2098 | ||
2089 | config AMD_NB | 2099 | config AMD_NB |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2ac9069890cd..15588a0ef466 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -310,6 +310,9 @@ config X86_INTERNODE_CACHE_SHIFT | |||
310 | config X86_CMPXCHG | 310 | config X86_CMPXCHG |
311 | def_bool X86_64 || (X86_32 && !M386) | 311 | def_bool X86_64 || (X86_32 && !M386) |
312 | 312 | ||
313 | config CMPXCHG_LOCAL | ||
314 | def_bool X86_64 || (X86_32 && !M386) | ||
315 | |||
313 | config X86_L1_CACHE_SHIFT | 316 | config X86_L1_CACHE_SHIFT |
314 | int | 317 | int |
315 | default "7" if MPENTIUM4 || MPSC | 318 | default "7" if MPENTIUM4 || MPSC |
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 0c229551eead..09664efb9cee 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -4,7 +4,7 @@ | |||
4 | # create a compressed vmlinux image from the original vmlinux | 4 | # create a compressed vmlinux image from the original vmlinux |
5 | # | 5 | # |
6 | 6 | ||
7 | targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.lzo head_$(BITS).o misc.o string.o cmdline.o early_serial_console.o piggy.o | 7 | targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo head_$(BITS).o misc.o string.o cmdline.o early_serial_console.o piggy.o |
8 | 8 | ||
9 | KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 | 9 | KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 |
10 | KBUILD_CFLAGS += -fno-strict-aliasing -fPIC | 10 | KBUILD_CFLAGS += -fno-strict-aliasing -fPIC |
@@ -49,12 +49,15 @@ $(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE | |||
49 | $(call if_changed,bzip2) | 49 | $(call if_changed,bzip2) |
50 | $(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE | 50 | $(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE |
51 | $(call if_changed,lzma) | 51 | $(call if_changed,lzma) |
52 | $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE | ||
53 | $(call if_changed,xzkern) | ||
52 | $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE | 54 | $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE |
53 | $(call if_changed,lzo) | 55 | $(call if_changed,lzo) |
54 | 56 | ||
55 | suffix-$(CONFIG_KERNEL_GZIP) := gz | 57 | suffix-$(CONFIG_KERNEL_GZIP) := gz |
56 | suffix-$(CONFIG_KERNEL_BZIP2) := bz2 | 58 | suffix-$(CONFIG_KERNEL_BZIP2) := bz2 |
57 | suffix-$(CONFIG_KERNEL_LZMA) := lzma | 59 | suffix-$(CONFIG_KERNEL_LZMA) := lzma |
60 | suffix-$(CONFIG_KERNEL_XZ) := xz | ||
58 | suffix-$(CONFIG_KERNEL_LZO) := lzo | 61 | suffix-$(CONFIG_KERNEL_LZO) := lzo |
59 | 62 | ||
60 | quiet_cmd_mkpiggy = MKPIGGY $@ | 63 | quiet_cmd_mkpiggy = MKPIGGY $@ |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 325c05294fc4..3a19d04cebeb 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -139,6 +139,10 @@ static int lines, cols; | |||
139 | #include "../../../../lib/decompress_unlzma.c" | 139 | #include "../../../../lib/decompress_unlzma.c" |
140 | #endif | 140 | #endif |
141 | 141 | ||
142 | #ifdef CONFIG_KERNEL_XZ | ||
143 | #include "../../../../lib/decompress_unxz.c" | ||
144 | #endif | ||
145 | |||
142 | #ifdef CONFIG_KERNEL_LZO | 146 | #ifdef CONFIG_KERNEL_LZO |
143 | #include "../../../../lib/decompress_unlzo.c" | 147 | #include "../../../../lib/decompress_unlzo.c" |
144 | #endif | 148 | #endif |
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c index 5c228129d175..646aa78ba5fd 100644 --- a/arch/x86/boot/compressed/mkpiggy.c +++ b/arch/x86/boot/compressed/mkpiggy.c | |||
@@ -74,7 +74,7 @@ int main(int argc, char *argv[]) | |||
74 | 74 | ||
75 | offs = (olen > ilen) ? olen - ilen : 0; | 75 | offs = (olen > ilen) ? olen - ilen : 0; |
76 | offs += olen >> 12; /* Add 8 bytes for each 32K block */ | 76 | offs += olen >> 12; /* Add 8 bytes for each 32K block */ |
77 | offs += 32*1024 + 18; /* Add 32K + 18 bytes slack */ | 77 | offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */ |
78 | offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ | 78 | offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ |
79 | 79 | ||
80 | printf(".section \".rodata..compressed\",\"a\",@progbits\n"); | 80 | printf(".section \".rodata..compressed\",\"a\",@progbits\n"); |
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index ff16756a51c1..8fe2a4966b7a 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -9,6 +9,20 @@ | |||
9 | * Vinodh Gopal <vinodh.gopal@intel.com> | 9 | * Vinodh Gopal <vinodh.gopal@intel.com> |
10 | * Kahraman Akdemir | 10 | * Kahraman Akdemir |
11 | * | 11 | * |
12 | * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD | ||
13 | * interface for 64-bit kernels. | ||
14 | * Authors: Erdinc Ozturk (erdinc.ozturk@intel.com) | ||
15 | * Aidan O'Mahony (aidan.o.mahony@intel.com) | ||
16 | * Adrian Hoban <adrian.hoban@intel.com> | ||
17 | * James Guilford (james.guilford@intel.com) | ||
18 | * Gabriele Paoloni <gabriele.paoloni@intel.com> | ||
19 | * Tadeusz Struk (tadeusz.struk@intel.com) | ||
20 | * Wajdi Feghali (wajdi.k.feghali@intel.com) | ||
21 | * Copyright (c) 2010, Intel Corporation. | ||
22 | * | ||
23 | * Ported x86_64 version to x86: | ||
24 | * Author: Mathias Krause <minipli@googlemail.com> | ||
25 | * | ||
12 | * This program is free software; you can redistribute it and/or modify | 26 | * This program is free software; you can redistribute it and/or modify |
13 | * it under the terms of the GNU General Public License as published by | 27 | * it under the terms of the GNU General Public License as published by |
14 | * the Free Software Foundation; either version 2 of the License, or | 28 | * the Free Software Foundation; either version 2 of the License, or |
@@ -18,8 +32,62 @@ | |||
18 | #include <linux/linkage.h> | 32 | #include <linux/linkage.h> |
19 | #include <asm/inst.h> | 33 | #include <asm/inst.h> |
20 | 34 | ||
35 | #ifdef __x86_64__ | ||
36 | .data | ||
37 | POLY: .octa 0xC2000000000000000000000000000001 | ||
38 | TWOONE: .octa 0x00000001000000000000000000000001 | ||
39 | |||
40 | # order of these constants should not change. | ||
41 | # more specifically, ALL_F should follow SHIFT_MASK, | ||
42 | # and ZERO should follow ALL_F | ||
43 | |||
44 | SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F | ||
45 | MASK1: .octa 0x0000000000000000ffffffffffffffff | ||
46 | MASK2: .octa 0xffffffffffffffff0000000000000000 | ||
47 | SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 | ||
48 | ALL_F: .octa 0xffffffffffffffffffffffffffffffff | ||
49 | ZERO: .octa 0x00000000000000000000000000000000 | ||
50 | ONE: .octa 0x00000000000000000000000000000001 | ||
51 | F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 | ||
52 | dec: .octa 0x1 | ||
53 | enc: .octa 0x2 | ||
54 | |||
55 | |||
21 | .text | 56 | .text |
22 | 57 | ||
58 | |||
59 | #define STACK_OFFSET 8*3 | ||
60 | #define HashKey 16*0 // store HashKey <<1 mod poly here | ||
61 | #define HashKey_2 16*1 // store HashKey^2 <<1 mod poly here | ||
62 | #define HashKey_3 16*2 // store HashKey^3 <<1 mod poly here | ||
63 | #define HashKey_4 16*3 // store HashKey^4 <<1 mod poly here | ||
64 | #define HashKey_k 16*4 // store XOR of High 64 bits and Low 64 | ||
65 | // bits of HashKey <<1 mod poly here | ||
66 | //(for Karatsuba purposes) | ||
67 | #define HashKey_2_k 16*5 // store XOR of High 64 bits and Low 64 | ||
68 | // bits of HashKey^2 <<1 mod poly here | ||
69 | // (for Karatsuba purposes) | ||
70 | #define HashKey_3_k 16*6 // store XOR of High 64 bits and Low 64 | ||
71 | // bits of HashKey^3 <<1 mod poly here | ||
72 | // (for Karatsuba purposes) | ||
73 | #define HashKey_4_k 16*7 // store XOR of High 64 bits and Low 64 | ||
74 | // bits of HashKey^4 <<1 mod poly here | ||
75 | // (for Karatsuba purposes) | ||
76 | #define VARIABLE_OFFSET 16*8 | ||
77 | |||
78 | #define arg1 rdi | ||
79 | #define arg2 rsi | ||
80 | #define arg3 rdx | ||
81 | #define arg4 rcx | ||
82 | #define arg5 r8 | ||
83 | #define arg6 r9 | ||
84 | #define arg7 STACK_OFFSET+8(%r14) | ||
85 | #define arg8 STACK_OFFSET+16(%r14) | ||
86 | #define arg9 STACK_OFFSET+24(%r14) | ||
87 | #define arg10 STACK_OFFSET+32(%r14) | ||
88 | #endif | ||
89 | |||
90 | |||
23 | #define STATE1 %xmm0 | 91 | #define STATE1 %xmm0 |
24 | #define STATE2 %xmm4 | 92 | #define STATE2 %xmm4 |
25 | #define STATE3 %xmm5 | 93 | #define STATE3 %xmm5 |
@@ -32,12 +100,16 @@ | |||
32 | #define IN IN1 | 100 | #define IN IN1 |
33 | #define KEY %xmm2 | 101 | #define KEY %xmm2 |
34 | #define IV %xmm3 | 102 | #define IV %xmm3 |
103 | |||
35 | #define BSWAP_MASK %xmm10 | 104 | #define BSWAP_MASK %xmm10 |
36 | #define CTR %xmm11 | 105 | #define CTR %xmm11 |
37 | #define INC %xmm12 | 106 | #define INC %xmm12 |
38 | 107 | ||
108 | #ifdef __x86_64__ | ||
109 | #define AREG %rax | ||
39 | #define KEYP %rdi | 110 | #define KEYP %rdi |
40 | #define OUTP %rsi | 111 | #define OUTP %rsi |
112 | #define UKEYP OUTP | ||
41 | #define INP %rdx | 113 | #define INP %rdx |
42 | #define LEN %rcx | 114 | #define LEN %rcx |
43 | #define IVP %r8 | 115 | #define IVP %r8 |
@@ -46,6 +118,1588 @@ | |||
46 | #define TKEYP T1 | 118 | #define TKEYP T1 |
47 | #define T2 %r11 | 119 | #define T2 %r11 |
48 | #define TCTR_LOW T2 | 120 | #define TCTR_LOW T2 |
121 | #else | ||
122 | #define AREG %eax | ||
123 | #define KEYP %edi | ||
124 | #define OUTP AREG | ||
125 | #define UKEYP OUTP | ||
126 | #define INP %edx | ||
127 | #define LEN %esi | ||
128 | #define IVP %ebp | ||
129 | #define KLEN %ebx | ||
130 | #define T1 %ecx | ||
131 | #define TKEYP T1 | ||
132 | #endif | ||
133 | |||
134 | |||
135 | #ifdef __x86_64__ | ||
136 | /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) | ||
137 | * | ||
138 | * | ||
139 | * Input: A and B (128-bits each, bit-reflected) | ||
140 | * Output: C = A*B*x mod poly, (i.e. >>1 ) | ||
141 | * To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input | ||
142 | * GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. | ||
143 | * | ||
144 | */ | ||
145 | .macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5 | ||
146 | movdqa \GH, \TMP1 | ||
147 | pshufd $78, \GH, \TMP2 | ||
148 | pshufd $78, \HK, \TMP3 | ||
149 | pxor \GH, \TMP2 # TMP2 = a1+a0 | ||
150 | pxor \HK, \TMP3 # TMP3 = b1+b0 | ||
151 | PCLMULQDQ 0x11, \HK, \TMP1 # TMP1 = a1*b1 | ||
152 | PCLMULQDQ 0x00, \HK, \GH # GH = a0*b0 | ||
153 | PCLMULQDQ 0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0) | ||
154 | pxor \GH, \TMP2 | ||
155 | pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0) | ||
156 | movdqa \TMP2, \TMP3 | ||
157 | pslldq $8, \TMP3 # left shift TMP3 2 DWs | ||
158 | psrldq $8, \TMP2 # right shift TMP2 2 DWs | ||
159 | pxor \TMP3, \GH | ||
160 | pxor \TMP2, \TMP1 # TMP2:GH holds the result of GH*HK | ||
161 | |||
162 | # first phase of the reduction | ||
163 | |||
164 | movdqa \GH, \TMP2 | ||
165 | movdqa \GH, \TMP3 | ||
166 | movdqa \GH, \TMP4 # copy GH into TMP2,TMP3 and TMP4 | ||
167 | # in in order to perform | ||
168 | # independent shifts | ||
169 | pslld $31, \TMP2 # packed right shift <<31 | ||
170 | pslld $30, \TMP3 # packed right shift <<30 | ||
171 | pslld $25, \TMP4 # packed right shift <<25 | ||
172 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
173 | pxor \TMP4, \TMP2 | ||
174 | movdqa \TMP2, \TMP5 | ||
175 | psrldq $4, \TMP5 # right shift TMP5 1 DW | ||
176 | pslldq $12, \TMP2 # left shift TMP2 3 DWs | ||
177 | pxor \TMP2, \GH | ||
178 | |||
179 | # second phase of the reduction | ||
180 | |||
181 | movdqa \GH,\TMP2 # copy GH into TMP2,TMP3 and TMP4 | ||
182 | # in in order to perform | ||
183 | # independent shifts | ||
184 | movdqa \GH,\TMP3 | ||
185 | movdqa \GH,\TMP4 | ||
186 | psrld $1,\TMP2 # packed left shift >>1 | ||
187 | psrld $2,\TMP3 # packed left shift >>2 | ||
188 | psrld $7,\TMP4 # packed left shift >>7 | ||
189 | pxor \TMP3,\TMP2 # xor the shifted versions | ||
190 | pxor \TMP4,\TMP2 | ||
191 | pxor \TMP5, \TMP2 | ||
192 | pxor \TMP2, \GH | ||
193 | pxor \TMP1, \GH # result is in TMP1 | ||
194 | .endm | ||
195 | |||
196 | /* | ||
197 | * if a = number of total plaintext bytes | ||
198 | * b = floor(a/16) | ||
199 | * num_initial_blocks = b mod 4 | ||
200 | * encrypt the initial num_initial_blocks blocks and apply ghash on | ||
201 | * the ciphertext | ||
202 | * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers | ||
203 | * are clobbered | ||
204 | * arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified | ||
205 | */ | ||
206 | |||
207 | |||
208 | .macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ | ||
209 | XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation | ||
210 | mov arg7, %r10 # %r10 = AAD | ||
211 | mov arg8, %r12 # %r12 = aadLen | ||
212 | mov %r12, %r11 | ||
213 | pxor %xmm\i, %xmm\i | ||
214 | _get_AAD_loop\num_initial_blocks\operation: | ||
215 | movd (%r10), \TMP1 | ||
216 | pslldq $12, \TMP1 | ||
217 | psrldq $4, %xmm\i | ||
218 | pxor \TMP1, %xmm\i | ||
219 | add $4, %r10 | ||
220 | sub $4, %r12 | ||
221 | jne _get_AAD_loop\num_initial_blocks\operation | ||
222 | cmp $16, %r11 | ||
223 | je _get_AAD_loop2_done\num_initial_blocks\operation | ||
224 | mov $16, %r12 | ||
225 | _get_AAD_loop2\num_initial_blocks\operation: | ||
226 | psrldq $4, %xmm\i | ||
227 | sub $4, %r12 | ||
228 | cmp %r11, %r12 | ||
229 | jne _get_AAD_loop2\num_initial_blocks\operation | ||
230 | _get_AAD_loop2_done\num_initial_blocks\operation: | ||
231 | movdqa SHUF_MASK(%rip), %xmm14 | ||
232 | PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data | ||
233 | |||
234 | xor %r11, %r11 # initialise the data pointer offset as zero | ||
235 | |||
236 | # start AES for num_initial_blocks blocks | ||
237 | |||
238 | mov %arg5, %rax # %rax = *Y0 | ||
239 | movdqu (%rax), \XMM0 # XMM0 = Y0 | ||
240 | movdqa SHUF_MASK(%rip), %xmm14 | ||
241 | PSHUFB_XMM %xmm14, \XMM0 | ||
242 | |||
243 | .if (\i == 5) || (\i == 6) || (\i == 7) | ||
244 | .irpc index, \i_seq | ||
245 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
246 | movdqa \XMM0, %xmm\index | ||
247 | movdqa SHUF_MASK(%rip), %xmm14 | ||
248 | PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap | ||
249 | |||
250 | .endr | ||
251 | .irpc index, \i_seq | ||
252 | pxor 16*0(%arg1), %xmm\index | ||
253 | .endr | ||
254 | .irpc index, \i_seq | ||
255 | movaps 0x10(%rdi), \TMP1 | ||
256 | AESENC \TMP1, %xmm\index # Round 1 | ||
257 | .endr | ||
258 | .irpc index, \i_seq | ||
259 | movaps 0x20(%arg1), \TMP1 | ||
260 | AESENC \TMP1, %xmm\index # Round 2 | ||
261 | .endr | ||
262 | .irpc index, \i_seq | ||
263 | movaps 0x30(%arg1), \TMP1 | ||
264 | AESENC \TMP1, %xmm\index # Round 2 | ||
265 | .endr | ||
266 | .irpc index, \i_seq | ||
267 | movaps 0x40(%arg1), \TMP1 | ||
268 | AESENC \TMP1, %xmm\index # Round 2 | ||
269 | .endr | ||
270 | .irpc index, \i_seq | ||
271 | movaps 0x50(%arg1), \TMP1 | ||
272 | AESENC \TMP1, %xmm\index # Round 2 | ||
273 | .endr | ||
274 | .irpc index, \i_seq | ||
275 | movaps 0x60(%arg1), \TMP1 | ||
276 | AESENC \TMP1, %xmm\index # Round 2 | ||
277 | .endr | ||
278 | .irpc index, \i_seq | ||
279 | movaps 0x70(%arg1), \TMP1 | ||
280 | AESENC \TMP1, %xmm\index # Round 2 | ||
281 | .endr | ||
282 | .irpc index, \i_seq | ||
283 | movaps 0x80(%arg1), \TMP1 | ||
284 | AESENC \TMP1, %xmm\index # Round 2 | ||
285 | .endr | ||
286 | .irpc index, \i_seq | ||
287 | movaps 0x90(%arg1), \TMP1 | ||
288 | AESENC \TMP1, %xmm\index # Round 2 | ||
289 | .endr | ||
290 | .irpc index, \i_seq | ||
291 | movaps 0xa0(%arg1), \TMP1 | ||
292 | AESENCLAST \TMP1, %xmm\index # Round 10 | ||
293 | .endr | ||
294 | .irpc index, \i_seq | ||
295 | movdqu (%arg3 , %r11, 1), \TMP1 | ||
296 | pxor \TMP1, %xmm\index | ||
297 | movdqu %xmm\index, (%arg2 , %r11, 1) | ||
298 | # write back plaintext/ciphertext for num_initial_blocks | ||
299 | add $16, %r11 | ||
300 | |||
301 | movdqa \TMP1, %xmm\index | ||
302 | movdqa SHUF_MASK(%rip), %xmm14 | ||
303 | PSHUFB_XMM %xmm14, %xmm\index | ||
304 | |||
305 | # prepare plaintext/ciphertext for GHASH computation | ||
306 | .endr | ||
307 | .endif | ||
308 | GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
309 | # apply GHASH on num_initial_blocks blocks | ||
310 | |||
311 | .if \i == 5 | ||
312 | pxor %xmm5, %xmm6 | ||
313 | GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
314 | pxor %xmm6, %xmm7 | ||
315 | GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
316 | pxor %xmm7, %xmm8 | ||
317 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
318 | .elseif \i == 6 | ||
319 | pxor %xmm6, %xmm7 | ||
320 | GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
321 | pxor %xmm7, %xmm8 | ||
322 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
323 | .elseif \i == 7 | ||
324 | pxor %xmm7, %xmm8 | ||
325 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
326 | .endif | ||
327 | cmp $64, %r13 | ||
328 | jl _initial_blocks_done\num_initial_blocks\operation | ||
329 | # no need for precomputed values | ||
330 | /* | ||
331 | * | ||
332 | * Precomputations for HashKey parallel with encryption of first 4 blocks. | ||
333 | * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
334 | */ | ||
335 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
336 | movdqa \XMM0, \XMM1 | ||
337 | movdqa SHUF_MASK(%rip), %xmm14 | ||
338 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
339 | |||
340 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
341 | movdqa \XMM0, \XMM2 | ||
342 | movdqa SHUF_MASK(%rip), %xmm14 | ||
343 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | ||
344 | |||
345 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
346 | movdqa \XMM0, \XMM3 | ||
347 | movdqa SHUF_MASK(%rip), %xmm14 | ||
348 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
349 | |||
350 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
351 | movdqa \XMM0, \XMM4 | ||
352 | movdqa SHUF_MASK(%rip), %xmm14 | ||
353 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
354 | |||
355 | pxor 16*0(%arg1), \XMM1 | ||
356 | pxor 16*0(%arg1), \XMM2 | ||
357 | pxor 16*0(%arg1), \XMM3 | ||
358 | pxor 16*0(%arg1), \XMM4 | ||
359 | movdqa \TMP3, \TMP5 | ||
360 | pshufd $78, \TMP3, \TMP1 | ||
361 | pxor \TMP3, \TMP1 | ||
362 | movdqa \TMP1, HashKey_k(%rsp) | ||
363 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
364 | # TMP5 = HashKey^2<<1 (mod poly) | ||
365 | movdqa \TMP5, HashKey_2(%rsp) | ||
366 | # HashKey_2 = HashKey^2<<1 (mod poly) | ||
367 | pshufd $78, \TMP5, \TMP1 | ||
368 | pxor \TMP5, \TMP1 | ||
369 | movdqa \TMP1, HashKey_2_k(%rsp) | ||
370 | .irpc index, 1234 # do 4 rounds | ||
371 | movaps 0x10*\index(%arg1), \TMP1 | ||
372 | AESENC \TMP1, \XMM1 | ||
373 | AESENC \TMP1, \XMM2 | ||
374 | AESENC \TMP1, \XMM3 | ||
375 | AESENC \TMP1, \XMM4 | ||
376 | .endr | ||
377 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
378 | # TMP5 = HashKey^3<<1 (mod poly) | ||
379 | movdqa \TMP5, HashKey_3(%rsp) | ||
380 | pshufd $78, \TMP5, \TMP1 | ||
381 | pxor \TMP5, \TMP1 | ||
382 | movdqa \TMP1, HashKey_3_k(%rsp) | ||
383 | .irpc index, 56789 # do next 5 rounds | ||
384 | movaps 0x10*\index(%arg1), \TMP1 | ||
385 | AESENC \TMP1, \XMM1 | ||
386 | AESENC \TMP1, \XMM2 | ||
387 | AESENC \TMP1, \XMM3 | ||
388 | AESENC \TMP1, \XMM4 | ||
389 | .endr | ||
390 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
391 | # TMP5 = HashKey^3<<1 (mod poly) | ||
392 | movdqa \TMP5, HashKey_4(%rsp) | ||
393 | pshufd $78, \TMP5, \TMP1 | ||
394 | pxor \TMP5, \TMP1 | ||
395 | movdqa \TMP1, HashKey_4_k(%rsp) | ||
396 | movaps 0xa0(%arg1), \TMP2 | ||
397 | AESENCLAST \TMP2, \XMM1 | ||
398 | AESENCLAST \TMP2, \XMM2 | ||
399 | AESENCLAST \TMP2, \XMM3 | ||
400 | AESENCLAST \TMP2, \XMM4 | ||
401 | movdqu 16*0(%arg3 , %r11 , 1), \TMP1 | ||
402 | pxor \TMP1, \XMM1 | ||
403 | movdqu \XMM1, 16*0(%arg2 , %r11 , 1) | ||
404 | movdqa \TMP1, \XMM1 | ||
405 | movdqu 16*1(%arg3 , %r11 , 1), \TMP1 | ||
406 | pxor \TMP1, \XMM2 | ||
407 | movdqu \XMM2, 16*1(%arg2 , %r11 , 1) | ||
408 | movdqa \TMP1, \XMM2 | ||
409 | movdqu 16*2(%arg3 , %r11 , 1), \TMP1 | ||
410 | pxor \TMP1, \XMM3 | ||
411 | movdqu \XMM3, 16*2(%arg2 , %r11 , 1) | ||
412 | movdqa \TMP1, \XMM3 | ||
413 | movdqu 16*3(%arg3 , %r11 , 1), \TMP1 | ||
414 | pxor \TMP1, \XMM4 | ||
415 | movdqu \XMM4, 16*3(%arg2 , %r11 , 1) | ||
416 | movdqa \TMP1, \XMM4 | ||
417 | add $64, %r11 | ||
418 | movdqa SHUF_MASK(%rip), %xmm14 | ||
419 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
420 | pxor \XMMDst, \XMM1 | ||
421 | # combine GHASHed value with the corresponding ciphertext | ||
422 | movdqa SHUF_MASK(%rip), %xmm14 | ||
423 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | ||
424 | movdqa SHUF_MASK(%rip), %xmm14 | ||
425 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
426 | movdqa SHUF_MASK(%rip), %xmm14 | ||
427 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
428 | |||
429 | _initial_blocks_done\num_initial_blocks\operation: | ||
430 | |||
431 | .endm | ||
432 | |||
433 | |||
434 | /* | ||
435 | * if a = number of total plaintext bytes | ||
436 | * b = floor(a/16) | ||
437 | * num_initial_blocks = b mod 4 | ||
438 | * encrypt the initial num_initial_blocks blocks and apply ghash on | ||
439 | * the ciphertext | ||
440 | * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers | ||
441 | * are clobbered | ||
442 | * arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified | ||
443 | */ | ||
444 | |||
445 | |||
446 | .macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ | ||
447 | XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation | ||
448 | mov arg7, %r10 # %r10 = AAD | ||
449 | mov arg8, %r12 # %r12 = aadLen | ||
450 | mov %r12, %r11 | ||
451 | pxor %xmm\i, %xmm\i | ||
452 | _get_AAD_loop\num_initial_blocks\operation: | ||
453 | movd (%r10), \TMP1 | ||
454 | pslldq $12, \TMP1 | ||
455 | psrldq $4, %xmm\i | ||
456 | pxor \TMP1, %xmm\i | ||
457 | add $4, %r10 | ||
458 | sub $4, %r12 | ||
459 | jne _get_AAD_loop\num_initial_blocks\operation | ||
460 | cmp $16, %r11 | ||
461 | je _get_AAD_loop2_done\num_initial_blocks\operation | ||
462 | mov $16, %r12 | ||
463 | _get_AAD_loop2\num_initial_blocks\operation: | ||
464 | psrldq $4, %xmm\i | ||
465 | sub $4, %r12 | ||
466 | cmp %r11, %r12 | ||
467 | jne _get_AAD_loop2\num_initial_blocks\operation | ||
468 | _get_AAD_loop2_done\num_initial_blocks\operation: | ||
469 | movdqa SHUF_MASK(%rip), %xmm14 | ||
470 | PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data | ||
471 | |||
472 | xor %r11, %r11 # initialise the data pointer offset as zero | ||
473 | |||
474 | # start AES for num_initial_blocks blocks | ||
475 | |||
476 | mov %arg5, %rax # %rax = *Y0 | ||
477 | movdqu (%rax), \XMM0 # XMM0 = Y0 | ||
478 | movdqa SHUF_MASK(%rip), %xmm14 | ||
479 | PSHUFB_XMM %xmm14, \XMM0 | ||
480 | |||
481 | .if (\i == 5) || (\i == 6) || (\i == 7) | ||
482 | .irpc index, \i_seq | ||
483 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
484 | movdqa \XMM0, %xmm\index | ||
485 | movdqa SHUF_MASK(%rip), %xmm14 | ||
486 | PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap | ||
487 | |||
488 | .endr | ||
489 | .irpc index, \i_seq | ||
490 | pxor 16*0(%arg1), %xmm\index | ||
491 | .endr | ||
492 | .irpc index, \i_seq | ||
493 | movaps 0x10(%rdi), \TMP1 | ||
494 | AESENC \TMP1, %xmm\index # Round 1 | ||
495 | .endr | ||
496 | .irpc index, \i_seq | ||
497 | movaps 0x20(%arg1), \TMP1 | ||
498 | AESENC \TMP1, %xmm\index # Round 2 | ||
499 | .endr | ||
500 | .irpc index, \i_seq | ||
501 | movaps 0x30(%arg1), \TMP1 | ||
502 | AESENC \TMP1, %xmm\index # Round 2 | ||
503 | .endr | ||
504 | .irpc index, \i_seq | ||
505 | movaps 0x40(%arg1), \TMP1 | ||
506 | AESENC \TMP1, %xmm\index # Round 2 | ||
507 | .endr | ||
508 | .irpc index, \i_seq | ||
509 | movaps 0x50(%arg1), \TMP1 | ||
510 | AESENC \TMP1, %xmm\index # Round 2 | ||
511 | .endr | ||
512 | .irpc index, \i_seq | ||
513 | movaps 0x60(%arg1), \TMP1 | ||
514 | AESENC \TMP1, %xmm\index # Round 2 | ||
515 | .endr | ||
516 | .irpc index, \i_seq | ||
517 | movaps 0x70(%arg1), \TMP1 | ||
518 | AESENC \TMP1, %xmm\index # Round 2 | ||
519 | .endr | ||
520 | .irpc index, \i_seq | ||
521 | movaps 0x80(%arg1), \TMP1 | ||
522 | AESENC \TMP1, %xmm\index # Round 2 | ||
523 | .endr | ||
524 | .irpc index, \i_seq | ||
525 | movaps 0x90(%arg1), \TMP1 | ||
526 | AESENC \TMP1, %xmm\index # Round 2 | ||
527 | .endr | ||
528 | .irpc index, \i_seq | ||
529 | movaps 0xa0(%arg1), \TMP1 | ||
530 | AESENCLAST \TMP1, %xmm\index # Round 10 | ||
531 | .endr | ||
532 | .irpc index, \i_seq | ||
533 | movdqu (%arg3 , %r11, 1), \TMP1 | ||
534 | pxor \TMP1, %xmm\index | ||
535 | movdqu %xmm\index, (%arg2 , %r11, 1) | ||
536 | # write back plaintext/ciphertext for num_initial_blocks | ||
537 | add $16, %r11 | ||
538 | |||
539 | movdqa SHUF_MASK(%rip), %xmm14 | ||
540 | PSHUFB_XMM %xmm14, %xmm\index | ||
541 | |||
542 | # prepare plaintext/ciphertext for GHASH computation | ||
543 | .endr | ||
544 | .endif | ||
545 | GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
546 | # apply GHASH on num_initial_blocks blocks | ||
547 | |||
548 | .if \i == 5 | ||
549 | pxor %xmm5, %xmm6 | ||
550 | GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
551 | pxor %xmm6, %xmm7 | ||
552 | GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
553 | pxor %xmm7, %xmm8 | ||
554 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
555 | .elseif \i == 6 | ||
556 | pxor %xmm6, %xmm7 | ||
557 | GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
558 | pxor %xmm7, %xmm8 | ||
559 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
560 | .elseif \i == 7 | ||
561 | pxor %xmm7, %xmm8 | ||
562 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
563 | .endif | ||
564 | cmp $64, %r13 | ||
565 | jl _initial_blocks_done\num_initial_blocks\operation | ||
566 | # no need for precomputed values | ||
567 | /* | ||
568 | * | ||
569 | * Precomputations for HashKey parallel with encryption of first 4 blocks. | ||
570 | * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
571 | */ | ||
572 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
573 | movdqa \XMM0, \XMM1 | ||
574 | movdqa SHUF_MASK(%rip), %xmm14 | ||
575 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
576 | |||
577 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
578 | movdqa \XMM0, \XMM2 | ||
579 | movdqa SHUF_MASK(%rip), %xmm14 | ||
580 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | ||
581 | |||
582 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
583 | movdqa \XMM0, \XMM3 | ||
584 | movdqa SHUF_MASK(%rip), %xmm14 | ||
585 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
586 | |||
587 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
588 | movdqa \XMM0, \XMM4 | ||
589 | movdqa SHUF_MASK(%rip), %xmm14 | ||
590 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
591 | |||
592 | pxor 16*0(%arg1), \XMM1 | ||
593 | pxor 16*0(%arg1), \XMM2 | ||
594 | pxor 16*0(%arg1), \XMM3 | ||
595 | pxor 16*0(%arg1), \XMM4 | ||
596 | movdqa \TMP3, \TMP5 | ||
597 | pshufd $78, \TMP3, \TMP1 | ||
598 | pxor \TMP3, \TMP1 | ||
599 | movdqa \TMP1, HashKey_k(%rsp) | ||
600 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
601 | # TMP5 = HashKey^2<<1 (mod poly) | ||
602 | movdqa \TMP5, HashKey_2(%rsp) | ||
603 | # HashKey_2 = HashKey^2<<1 (mod poly) | ||
604 | pshufd $78, \TMP5, \TMP1 | ||
605 | pxor \TMP5, \TMP1 | ||
606 | movdqa \TMP1, HashKey_2_k(%rsp) | ||
607 | .irpc index, 1234 # do 4 rounds | ||
608 | movaps 0x10*\index(%arg1), \TMP1 | ||
609 | AESENC \TMP1, \XMM1 | ||
610 | AESENC \TMP1, \XMM2 | ||
611 | AESENC \TMP1, \XMM3 | ||
612 | AESENC \TMP1, \XMM4 | ||
613 | .endr | ||
614 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
615 | # TMP5 = HashKey^3<<1 (mod poly) | ||
616 | movdqa \TMP5, HashKey_3(%rsp) | ||
617 | pshufd $78, \TMP5, \TMP1 | ||
618 | pxor \TMP5, \TMP1 | ||
619 | movdqa \TMP1, HashKey_3_k(%rsp) | ||
620 | .irpc index, 56789 # do next 5 rounds | ||
621 | movaps 0x10*\index(%arg1), \TMP1 | ||
622 | AESENC \TMP1, \XMM1 | ||
623 | AESENC \TMP1, \XMM2 | ||
624 | AESENC \TMP1, \XMM3 | ||
625 | AESENC \TMP1, \XMM4 | ||
626 | .endr | ||
627 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
628 | # TMP5 = HashKey^3<<1 (mod poly) | ||
629 | movdqa \TMP5, HashKey_4(%rsp) | ||
630 | pshufd $78, \TMP5, \TMP1 | ||
631 | pxor \TMP5, \TMP1 | ||
632 | movdqa \TMP1, HashKey_4_k(%rsp) | ||
633 | movaps 0xa0(%arg1), \TMP2 | ||
634 | AESENCLAST \TMP2, \XMM1 | ||
635 | AESENCLAST \TMP2, \XMM2 | ||
636 | AESENCLAST \TMP2, \XMM3 | ||
637 | AESENCLAST \TMP2, \XMM4 | ||
638 | movdqu 16*0(%arg3 , %r11 , 1), \TMP1 | ||
639 | pxor \TMP1, \XMM1 | ||
640 | movdqu 16*1(%arg3 , %r11 , 1), \TMP1 | ||
641 | pxor \TMP1, \XMM2 | ||
642 | movdqu 16*2(%arg3 , %r11 , 1), \TMP1 | ||
643 | pxor \TMP1, \XMM3 | ||
644 | movdqu 16*3(%arg3 , %r11 , 1), \TMP1 | ||
645 | pxor \TMP1, \XMM4 | ||
646 | movdqu \XMM1, 16*0(%arg2 , %r11 , 1) | ||
647 | movdqu \XMM2, 16*1(%arg2 , %r11 , 1) | ||
648 | movdqu \XMM3, 16*2(%arg2 , %r11 , 1) | ||
649 | movdqu \XMM4, 16*3(%arg2 , %r11 , 1) | ||
650 | |||
651 | add $64, %r11 | ||
652 | movdqa SHUF_MASK(%rip), %xmm14 | ||
653 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
654 | pxor \XMMDst, \XMM1 | ||
655 | # combine GHASHed value with the corresponding ciphertext | ||
656 | movdqa SHUF_MASK(%rip), %xmm14 | ||
657 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | ||
658 | movdqa SHUF_MASK(%rip), %xmm14 | ||
659 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
660 | movdqa SHUF_MASK(%rip), %xmm14 | ||
661 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
662 | |||
663 | _initial_blocks_done\num_initial_blocks\operation: | ||
664 | |||
665 | .endm | ||
666 | |||
667 | /* | ||
668 | * encrypt 4 blocks at a time | ||
669 | * ghash the 4 previously encrypted ciphertext blocks | ||
670 | * arg1, %arg2, %arg3 are used as pointers only, not modified | ||
671 | * %r11 is the data offset value | ||
672 | */ | ||
673 | .macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \ | ||
674 | TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation | ||
675 | |||
676 | movdqa \XMM1, \XMM5 | ||
677 | movdqa \XMM2, \XMM6 | ||
678 | movdqa \XMM3, \XMM7 | ||
679 | movdqa \XMM4, \XMM8 | ||
680 | |||
681 | movdqa SHUF_MASK(%rip), %xmm15 | ||
682 | # multiply TMP5 * HashKey using karatsuba | ||
683 | |||
684 | movdqa \XMM5, \TMP4 | ||
685 | pshufd $78, \XMM5, \TMP6 | ||
686 | pxor \XMM5, \TMP6 | ||
687 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
688 | movdqa HashKey_4(%rsp), \TMP5 | ||
689 | PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 | ||
690 | movdqa \XMM0, \XMM1 | ||
691 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
692 | movdqa \XMM0, \XMM2 | ||
693 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
694 | movdqa \XMM0, \XMM3 | ||
695 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
696 | movdqa \XMM0, \XMM4 | ||
697 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap | ||
698 | PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 | ||
699 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap | ||
700 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap | ||
701 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap | ||
702 | |||
703 | pxor (%arg1), \XMM1 | ||
704 | pxor (%arg1), \XMM2 | ||
705 | pxor (%arg1), \XMM3 | ||
706 | pxor (%arg1), \XMM4 | ||
707 | movdqa HashKey_4_k(%rsp), \TMP5 | ||
708 | PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) | ||
709 | movaps 0x10(%arg1), \TMP1 | ||
710 | AESENC \TMP1, \XMM1 # Round 1 | ||
711 | AESENC \TMP1, \XMM2 | ||
712 | AESENC \TMP1, \XMM3 | ||
713 | AESENC \TMP1, \XMM4 | ||
714 | movaps 0x20(%arg1), \TMP1 | ||
715 | AESENC \TMP1, \XMM1 # Round 2 | ||
716 | AESENC \TMP1, \XMM2 | ||
717 | AESENC \TMP1, \XMM3 | ||
718 | AESENC \TMP1, \XMM4 | ||
719 | movdqa \XMM6, \TMP1 | ||
720 | pshufd $78, \XMM6, \TMP2 | ||
721 | pxor \XMM6, \TMP2 | ||
722 | movdqa HashKey_3(%rsp), \TMP5 | ||
723 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 | ||
724 | movaps 0x30(%arg1), \TMP3 | ||
725 | AESENC \TMP3, \XMM1 # Round 3 | ||
726 | AESENC \TMP3, \XMM2 | ||
727 | AESENC \TMP3, \XMM3 | ||
728 | AESENC \TMP3, \XMM4 | ||
729 | PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 | ||
730 | movaps 0x40(%arg1), \TMP3 | ||
731 | AESENC \TMP3, \XMM1 # Round 4 | ||
732 | AESENC \TMP3, \XMM2 | ||
733 | AESENC \TMP3, \XMM3 | ||
734 | AESENC \TMP3, \XMM4 | ||
735 | movdqa HashKey_3_k(%rsp), \TMP5 | ||
736 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
737 | movaps 0x50(%arg1), \TMP3 | ||
738 | AESENC \TMP3, \XMM1 # Round 5 | ||
739 | AESENC \TMP3, \XMM2 | ||
740 | AESENC \TMP3, \XMM3 | ||
741 | AESENC \TMP3, \XMM4 | ||
742 | pxor \TMP1, \TMP4 | ||
743 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part | ||
744 | pxor \XMM6, \XMM5 | ||
745 | pxor \TMP2, \TMP6 | ||
746 | movdqa \XMM7, \TMP1 | ||
747 | pshufd $78, \XMM7, \TMP2 | ||
748 | pxor \XMM7, \TMP2 | ||
749 | movdqa HashKey_2(%rsp ), \TMP5 | ||
750 | |||
751 | # Multiply TMP5 * HashKey using karatsuba | ||
752 | |||
753 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
754 | movaps 0x60(%arg1), \TMP3 | ||
755 | AESENC \TMP3, \XMM1 # Round 6 | ||
756 | AESENC \TMP3, \XMM2 | ||
757 | AESENC \TMP3, \XMM3 | ||
758 | AESENC \TMP3, \XMM4 | ||
759 | PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 | ||
760 | movaps 0x70(%arg1), \TMP3 | ||
761 | AESENC \TMP3, \XMM1 # Round 7 | ||
762 | AESENC \TMP3, \XMM2 | ||
763 | AESENC \TMP3, \XMM3 | ||
764 | AESENC \TMP3, \XMM4 | ||
765 | movdqa HashKey_2_k(%rsp), \TMP5 | ||
766 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
767 | movaps 0x80(%arg1), \TMP3 | ||
768 | AESENC \TMP3, \XMM1 # Round 8 | ||
769 | AESENC \TMP3, \XMM2 | ||
770 | AESENC \TMP3, \XMM3 | ||
771 | AESENC \TMP3, \XMM4 | ||
772 | pxor \TMP1, \TMP4 | ||
773 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part | ||
774 | pxor \XMM7, \XMM5 | ||
775 | pxor \TMP2, \TMP6 | ||
776 | |||
777 | # Multiply XMM8 * HashKey | ||
778 | # XMM8 and TMP5 hold the values for the two operands | ||
779 | |||
780 | movdqa \XMM8, \TMP1 | ||
781 | pshufd $78, \XMM8, \TMP2 | ||
782 | pxor \XMM8, \TMP2 | ||
783 | movdqa HashKey(%rsp), \TMP5 | ||
784 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
785 | movaps 0x90(%arg1), \TMP3 | ||
786 | AESENC \TMP3, \XMM1 # Round 9 | ||
787 | AESENC \TMP3, \XMM2 | ||
788 | AESENC \TMP3, \XMM3 | ||
789 | AESENC \TMP3, \XMM4 | ||
790 | PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 | ||
791 | movaps 0xa0(%arg1), \TMP3 | ||
792 | AESENCLAST \TMP3, \XMM1 # Round 10 | ||
793 | AESENCLAST \TMP3, \XMM2 | ||
794 | AESENCLAST \TMP3, \XMM3 | ||
795 | AESENCLAST \TMP3, \XMM4 | ||
796 | movdqa HashKey_k(%rsp), \TMP5 | ||
797 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
798 | movdqu (%arg3,%r11,1), \TMP3 | ||
799 | pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK | ||
800 | movdqu 16(%arg3,%r11,1), \TMP3 | ||
801 | pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK | ||
802 | movdqu 32(%arg3,%r11,1), \TMP3 | ||
803 | pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK | ||
804 | movdqu 48(%arg3,%r11,1), \TMP3 | ||
805 | pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK | ||
806 | movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer | ||
807 | movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer | ||
808 | movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer | ||
809 | movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer | ||
810 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap | ||
811 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap | ||
812 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap | ||
813 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap | ||
814 | |||
815 | pxor \TMP4, \TMP1 | ||
816 | pxor \XMM8, \XMM5 | ||
817 | pxor \TMP6, \TMP2 | ||
818 | pxor \TMP1, \TMP2 | ||
819 | pxor \XMM5, \TMP2 | ||
820 | movdqa \TMP2, \TMP3 | ||
821 | pslldq $8, \TMP3 # left shift TMP3 2 DWs | ||
822 | psrldq $8, \TMP2 # right shift TMP2 2 DWs | ||
823 | pxor \TMP3, \XMM5 | ||
824 | pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 | ||
825 | |||
826 | # first phase of reduction | ||
827 | |||
828 | movdqa \XMM5, \TMP2 | ||
829 | movdqa \XMM5, \TMP3 | ||
830 | movdqa \XMM5, \TMP4 | ||
831 | # move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently | ||
832 | pslld $31, \TMP2 # packed right shift << 31 | ||
833 | pslld $30, \TMP3 # packed right shift << 30 | ||
834 | pslld $25, \TMP4 # packed right shift << 25 | ||
835 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
836 | pxor \TMP4, \TMP2 | ||
837 | movdqa \TMP2, \TMP5 | ||
838 | psrldq $4, \TMP5 # right shift T5 1 DW | ||
839 | pslldq $12, \TMP2 # left shift T2 3 DWs | ||
840 | pxor \TMP2, \XMM5 | ||
841 | |||
842 | # second phase of reduction | ||
843 | |||
844 | movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 | ||
845 | movdqa \XMM5,\TMP3 | ||
846 | movdqa \XMM5,\TMP4 | ||
847 | psrld $1, \TMP2 # packed left shift >>1 | ||
848 | psrld $2, \TMP3 # packed left shift >>2 | ||
849 | psrld $7, \TMP4 # packed left shift >>7 | ||
850 | pxor \TMP3,\TMP2 # xor the shifted versions | ||
851 | pxor \TMP4,\TMP2 | ||
852 | pxor \TMP5, \TMP2 | ||
853 | pxor \TMP2, \XMM5 | ||
854 | pxor \TMP1, \XMM5 # result is in TMP1 | ||
855 | |||
856 | pxor \XMM5, \XMM1 | ||
857 | .endm | ||
858 | |||
859 | /* | ||
860 | * decrypt 4 blocks at a time | ||
861 | * ghash the 4 previously decrypted ciphertext blocks | ||
862 | * arg1, %arg2, %arg3 are used as pointers only, not modified | ||
863 | * %r11 is the data offset value | ||
864 | */ | ||
865 | .macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \ | ||
866 | TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation | ||
867 | |||
868 | movdqa \XMM1, \XMM5 | ||
869 | movdqa \XMM2, \XMM6 | ||
870 | movdqa \XMM3, \XMM7 | ||
871 | movdqa \XMM4, \XMM8 | ||
872 | |||
873 | movdqa SHUF_MASK(%rip), %xmm15 | ||
874 | # multiply TMP5 * HashKey using karatsuba | ||
875 | |||
876 | movdqa \XMM5, \TMP4 | ||
877 | pshufd $78, \XMM5, \TMP6 | ||
878 | pxor \XMM5, \TMP6 | ||
879 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
880 | movdqa HashKey_4(%rsp), \TMP5 | ||
881 | PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 | ||
882 | movdqa \XMM0, \XMM1 | ||
883 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
884 | movdqa \XMM0, \XMM2 | ||
885 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
886 | movdqa \XMM0, \XMM3 | ||
887 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
888 | movdqa \XMM0, \XMM4 | ||
889 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap | ||
890 | PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 | ||
891 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap | ||
892 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap | ||
893 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap | ||
894 | |||
895 | pxor (%arg1), \XMM1 | ||
896 | pxor (%arg1), \XMM2 | ||
897 | pxor (%arg1), \XMM3 | ||
898 | pxor (%arg1), \XMM4 | ||
899 | movdqa HashKey_4_k(%rsp), \TMP5 | ||
900 | PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) | ||
901 | movaps 0x10(%arg1), \TMP1 | ||
902 | AESENC \TMP1, \XMM1 # Round 1 | ||
903 | AESENC \TMP1, \XMM2 | ||
904 | AESENC \TMP1, \XMM3 | ||
905 | AESENC \TMP1, \XMM4 | ||
906 | movaps 0x20(%arg1), \TMP1 | ||
907 | AESENC \TMP1, \XMM1 # Round 2 | ||
908 | AESENC \TMP1, \XMM2 | ||
909 | AESENC \TMP1, \XMM3 | ||
910 | AESENC \TMP1, \XMM4 | ||
911 | movdqa \XMM6, \TMP1 | ||
912 | pshufd $78, \XMM6, \TMP2 | ||
913 | pxor \XMM6, \TMP2 | ||
914 | movdqa HashKey_3(%rsp), \TMP5 | ||
915 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 | ||
916 | movaps 0x30(%arg1), \TMP3 | ||
917 | AESENC \TMP3, \XMM1 # Round 3 | ||
918 | AESENC \TMP3, \XMM2 | ||
919 | AESENC \TMP3, \XMM3 | ||
920 | AESENC \TMP3, \XMM4 | ||
921 | PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 | ||
922 | movaps 0x40(%arg1), \TMP3 | ||
923 | AESENC \TMP3, \XMM1 # Round 4 | ||
924 | AESENC \TMP3, \XMM2 | ||
925 | AESENC \TMP3, \XMM3 | ||
926 | AESENC \TMP3, \XMM4 | ||
927 | movdqa HashKey_3_k(%rsp), \TMP5 | ||
928 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
929 | movaps 0x50(%arg1), \TMP3 | ||
930 | AESENC \TMP3, \XMM1 # Round 5 | ||
931 | AESENC \TMP3, \XMM2 | ||
932 | AESENC \TMP3, \XMM3 | ||
933 | AESENC \TMP3, \XMM4 | ||
934 | pxor \TMP1, \TMP4 | ||
935 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part | ||
936 | pxor \XMM6, \XMM5 | ||
937 | pxor \TMP2, \TMP6 | ||
938 | movdqa \XMM7, \TMP1 | ||
939 | pshufd $78, \XMM7, \TMP2 | ||
940 | pxor \XMM7, \TMP2 | ||
941 | movdqa HashKey_2(%rsp ), \TMP5 | ||
942 | |||
943 | # Multiply TMP5 * HashKey using karatsuba | ||
944 | |||
945 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
946 | movaps 0x60(%arg1), \TMP3 | ||
947 | AESENC \TMP3, \XMM1 # Round 6 | ||
948 | AESENC \TMP3, \XMM2 | ||
949 | AESENC \TMP3, \XMM3 | ||
950 | AESENC \TMP3, \XMM4 | ||
951 | PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 | ||
952 | movaps 0x70(%arg1), \TMP3 | ||
953 | AESENC \TMP3, \XMM1 # Round 7 | ||
954 | AESENC \TMP3, \XMM2 | ||
955 | AESENC \TMP3, \XMM3 | ||
956 | AESENC \TMP3, \XMM4 | ||
957 | movdqa HashKey_2_k(%rsp), \TMP5 | ||
958 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
959 | movaps 0x80(%arg1), \TMP3 | ||
960 | AESENC \TMP3, \XMM1 # Round 8 | ||
961 | AESENC \TMP3, \XMM2 | ||
962 | AESENC \TMP3, \XMM3 | ||
963 | AESENC \TMP3, \XMM4 | ||
964 | pxor \TMP1, \TMP4 | ||
965 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part | ||
966 | pxor \XMM7, \XMM5 | ||
967 | pxor \TMP2, \TMP6 | ||
968 | |||
969 | # Multiply XMM8 * HashKey | ||
970 | # XMM8 and TMP5 hold the values for the two operands | ||
971 | |||
972 | movdqa \XMM8, \TMP1 | ||
973 | pshufd $78, \XMM8, \TMP2 | ||
974 | pxor \XMM8, \TMP2 | ||
975 | movdqa HashKey(%rsp), \TMP5 | ||
976 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
977 | movaps 0x90(%arg1), \TMP3 | ||
978 | AESENC \TMP3, \XMM1 # Round 9 | ||
979 | AESENC \TMP3, \XMM2 | ||
980 | AESENC \TMP3, \XMM3 | ||
981 | AESENC \TMP3, \XMM4 | ||
982 | PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 | ||
983 | movaps 0xa0(%arg1), \TMP3 | ||
984 | AESENCLAST \TMP3, \XMM1 # Round 10 | ||
985 | AESENCLAST \TMP3, \XMM2 | ||
986 | AESENCLAST \TMP3, \XMM3 | ||
987 | AESENCLAST \TMP3, \XMM4 | ||
988 | movdqa HashKey_k(%rsp), \TMP5 | ||
989 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
990 | movdqu (%arg3,%r11,1), \TMP3 | ||
991 | pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK | ||
992 | movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer | ||
993 | movdqa \TMP3, \XMM1 | ||
994 | movdqu 16(%arg3,%r11,1), \TMP3 | ||
995 | pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK | ||
996 | movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer | ||
997 | movdqa \TMP3, \XMM2 | ||
998 | movdqu 32(%arg3,%r11,1), \TMP3 | ||
999 | pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK | ||
1000 | movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer | ||
1001 | movdqa \TMP3, \XMM3 | ||
1002 | movdqu 48(%arg3,%r11,1), \TMP3 | ||
1003 | pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK | ||
1004 | movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer | ||
1005 | movdqa \TMP3, \XMM4 | ||
1006 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap | ||
1007 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap | ||
1008 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap | ||
1009 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap | ||
1010 | |||
1011 | pxor \TMP4, \TMP1 | ||
1012 | pxor \XMM8, \XMM5 | ||
1013 | pxor \TMP6, \TMP2 | ||
1014 | pxor \TMP1, \TMP2 | ||
1015 | pxor \XMM5, \TMP2 | ||
1016 | movdqa \TMP2, \TMP3 | ||
1017 | pslldq $8, \TMP3 # left shift TMP3 2 DWs | ||
1018 | psrldq $8, \TMP2 # right shift TMP2 2 DWs | ||
1019 | pxor \TMP3, \XMM5 | ||
1020 | pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 | ||
1021 | |||
1022 | # first phase of reduction | ||
1023 | |||
1024 | movdqa \XMM5, \TMP2 | ||
1025 | movdqa \XMM5, \TMP3 | ||
1026 | movdqa \XMM5, \TMP4 | ||
1027 | # move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently | ||
1028 | pslld $31, \TMP2 # packed right shift << 31 | ||
1029 | pslld $30, \TMP3 # packed right shift << 30 | ||
1030 | pslld $25, \TMP4 # packed right shift << 25 | ||
1031 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
1032 | pxor \TMP4, \TMP2 | ||
1033 | movdqa \TMP2, \TMP5 | ||
1034 | psrldq $4, \TMP5 # right shift T5 1 DW | ||
1035 | pslldq $12, \TMP2 # left shift T2 3 DWs | ||
1036 | pxor \TMP2, \XMM5 | ||
1037 | |||
1038 | # second phase of reduction | ||
1039 | |||
1040 | movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 | ||
1041 | movdqa \XMM5,\TMP3 | ||
1042 | movdqa \XMM5,\TMP4 | ||
1043 | psrld $1, \TMP2 # packed left shift >>1 | ||
1044 | psrld $2, \TMP3 # packed left shift >>2 | ||
1045 | psrld $7, \TMP4 # packed left shift >>7 | ||
1046 | pxor \TMP3,\TMP2 # xor the shifted versions | ||
1047 | pxor \TMP4,\TMP2 | ||
1048 | pxor \TMP5, \TMP2 | ||
1049 | pxor \TMP2, \XMM5 | ||
1050 | pxor \TMP1, \XMM5 # result is in TMP1 | ||
1051 | |||
1052 | pxor \XMM5, \XMM1 | ||
1053 | .endm | ||
1054 | |||
1055 | /* GHASH the last 4 ciphertext blocks. */ | ||
1056 | .macro GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \ | ||
1057 | TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst | ||
1058 | |||
1059 | # Multiply TMP6 * HashKey (using Karatsuba) | ||
1060 | |||
1061 | movdqa \XMM1, \TMP6 | ||
1062 | pshufd $78, \XMM1, \TMP2 | ||
1063 | pxor \XMM1, \TMP2 | ||
1064 | movdqa HashKey_4(%rsp), \TMP5 | ||
1065 | PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1 | ||
1066 | PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0 | ||
1067 | movdqa HashKey_4_k(%rsp), \TMP4 | ||
1068 | PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
1069 | movdqa \XMM1, \XMMDst | ||
1070 | movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 | ||
1071 | |||
1072 | # Multiply TMP1 * HashKey (using Karatsuba) | ||
1073 | |||
1074 | movdqa \XMM2, \TMP1 | ||
1075 | pshufd $78, \XMM2, \TMP2 | ||
1076 | pxor \XMM2, \TMP2 | ||
1077 | movdqa HashKey_3(%rsp), \TMP5 | ||
1078 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
1079 | PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0 | ||
1080 | movdqa HashKey_3_k(%rsp), \TMP4 | ||
1081 | PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
1082 | pxor \TMP1, \TMP6 | ||
1083 | pxor \XMM2, \XMMDst | ||
1084 | pxor \TMP2, \XMM1 | ||
1085 | # results accumulated in TMP6, XMMDst, XMM1 | ||
1086 | |||
1087 | # Multiply TMP1 * HashKey (using Karatsuba) | ||
1088 | |||
1089 | movdqa \XMM3, \TMP1 | ||
1090 | pshufd $78, \XMM3, \TMP2 | ||
1091 | pxor \XMM3, \TMP2 | ||
1092 | movdqa HashKey_2(%rsp), \TMP5 | ||
1093 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
1094 | PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0 | ||
1095 | movdqa HashKey_2_k(%rsp), \TMP4 | ||
1096 | PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
1097 | pxor \TMP1, \TMP6 | ||
1098 | pxor \XMM3, \XMMDst | ||
1099 | pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1 | ||
1100 | |||
1101 | # Multiply TMP1 * HashKey (using Karatsuba) | ||
1102 | movdqa \XMM4, \TMP1 | ||
1103 | pshufd $78, \XMM4, \TMP2 | ||
1104 | pxor \XMM4, \TMP2 | ||
1105 | movdqa HashKey(%rsp), \TMP5 | ||
1106 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
1107 | PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0 | ||
1108 | movdqa HashKey_k(%rsp), \TMP4 | ||
1109 | PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
1110 | pxor \TMP1, \TMP6 | ||
1111 | pxor \XMM4, \XMMDst | ||
1112 | pxor \XMM1, \TMP2 | ||
1113 | pxor \TMP6, \TMP2 | ||
1114 | pxor \XMMDst, \TMP2 | ||
1115 | # middle section of the temp results combined as in karatsuba algorithm | ||
1116 | movdqa \TMP2, \TMP4 | ||
1117 | pslldq $8, \TMP4 # left shift TMP4 2 DWs | ||
1118 | psrldq $8, \TMP2 # right shift TMP2 2 DWs | ||
1119 | pxor \TMP4, \XMMDst | ||
1120 | pxor \TMP2, \TMP6 | ||
1121 | # TMP6:XMMDst holds the result of the accumulated carry-less multiplications | ||
1122 | # first phase of the reduction | ||
1123 | movdqa \XMMDst, \TMP2 | ||
1124 | movdqa \XMMDst, \TMP3 | ||
1125 | movdqa \XMMDst, \TMP4 | ||
1126 | # move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently | ||
1127 | pslld $31, \TMP2 # packed right shifting << 31 | ||
1128 | pslld $30, \TMP3 # packed right shifting << 30 | ||
1129 | pslld $25, \TMP4 # packed right shifting << 25 | ||
1130 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
1131 | pxor \TMP4, \TMP2 | ||
1132 | movdqa \TMP2, \TMP7 | ||
1133 | psrldq $4, \TMP7 # right shift TMP7 1 DW | ||
1134 | pslldq $12, \TMP2 # left shift TMP2 3 DWs | ||
1135 | pxor \TMP2, \XMMDst | ||
1136 | |||
1137 | # second phase of the reduction | ||
1138 | movdqa \XMMDst, \TMP2 | ||
1139 | # make 3 copies of XMMDst for doing 3 shift operations | ||
1140 | movdqa \XMMDst, \TMP3 | ||
1141 | movdqa \XMMDst, \TMP4 | ||
1142 | psrld $1, \TMP2 # packed left shift >> 1 | ||
1143 | psrld $2, \TMP3 # packed left shift >> 2 | ||
1144 | psrld $7, \TMP4 # packed left shift >> 7 | ||
1145 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
1146 | pxor \TMP4, \TMP2 | ||
1147 | pxor \TMP7, \TMP2 | ||
1148 | pxor \TMP2, \XMMDst | ||
1149 | pxor \TMP6, \XMMDst # reduced result is in XMMDst | ||
1150 | .endm | ||
1151 | |||
1152 | /* Encryption of a single block done*/ | ||
1153 | .macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 | ||
1154 | |||
1155 | pxor (%arg1), \XMM0 | ||
1156 | movaps 16(%arg1), \TMP1 | ||
1157 | AESENC \TMP1, \XMM0 | ||
1158 | movaps 32(%arg1), \TMP1 | ||
1159 | AESENC \TMP1, \XMM0 | ||
1160 | movaps 48(%arg1), \TMP1 | ||
1161 | AESENC \TMP1, \XMM0 | ||
1162 | movaps 64(%arg1), \TMP1 | ||
1163 | AESENC \TMP1, \XMM0 | ||
1164 | movaps 80(%arg1), \TMP1 | ||
1165 | AESENC \TMP1, \XMM0 | ||
1166 | movaps 96(%arg1), \TMP1 | ||
1167 | AESENC \TMP1, \XMM0 | ||
1168 | movaps 112(%arg1), \TMP1 | ||
1169 | AESENC \TMP1, \XMM0 | ||
1170 | movaps 128(%arg1), \TMP1 | ||
1171 | AESENC \TMP1, \XMM0 | ||
1172 | movaps 144(%arg1), \TMP1 | ||
1173 | AESENC \TMP1, \XMM0 | ||
1174 | movaps 160(%arg1), \TMP1 | ||
1175 | AESENCLAST \TMP1, \XMM0 | ||
1176 | .endm | ||
1177 | |||
1178 | |||
1179 | /***************************************************************************** | ||
1180 | * void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. | ||
1181 | * u8 *out, // Plaintext output. Encrypt in-place is allowed. | ||
1182 | * const u8 *in, // Ciphertext input | ||
1183 | * u64 plaintext_len, // Length of data in bytes for decryption. | ||
1184 | * u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) | ||
1185 | * // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) | ||
1186 | * // concatenated with 0x00000001. 16-byte aligned pointer. | ||
1187 | * u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. | ||
1188 | * const u8 *aad, // Additional Authentication Data (AAD) | ||
1189 | * u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes | ||
1190 | * u8 *auth_tag, // Authenticated Tag output. The driver will compare this to the | ||
1191 | * // given authentication tag and only return the plaintext if they match. | ||
1192 | * u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 | ||
1193 | * // (most likely), 12 or 8. | ||
1194 | * | ||
1195 | * Assumptions: | ||
1196 | * | ||
1197 | * keys: | ||
1198 | * keys are pre-expanded and aligned to 16 bytes. we are using the first | ||
1199 | * set of 11 keys in the data structure void *aes_ctx | ||
1200 | * | ||
1201 | * iv: | ||
1202 | * 0 1 2 3 | ||
1203 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1204 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1205 | * | Salt (From the SA) | | ||
1206 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1207 | * | Initialization Vector | | ||
1208 | * | (This is the sequence number from IPSec header) | | ||
1209 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1210 | * | 0x1 | | ||
1211 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1212 | * | ||
1213 | * | ||
1214 | * | ||
1215 | * AAD: | ||
1216 | * AAD padded to 128 bits with 0 | ||
1217 | * for example, assume AAD is a u32 vector | ||
1218 | * | ||
1219 | * if AAD is 8 bytes: | ||
1220 | * AAD[3] = {A0, A1}; | ||
1221 | * padded AAD in xmm register = {A1 A0 0 0} | ||
1222 | * | ||
1223 | * 0 1 2 3 | ||
1224 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1225 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1226 | * | SPI (A1) | | ||
1227 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1228 | * | 32-bit Sequence Number (A0) | | ||
1229 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1230 | * | 0x0 | | ||
1231 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1232 | * | ||
1233 | * AAD Format with 32-bit Sequence Number | ||
1234 | * | ||
1235 | * if AAD is 12 bytes: | ||
1236 | * AAD[3] = {A0, A1, A2}; | ||
1237 | * padded AAD in xmm register = {A2 A1 A0 0} | ||
1238 | * | ||
1239 | * 0 1 2 3 | ||
1240 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1241 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1242 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1243 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1244 | * | SPI (A2) | | ||
1245 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1246 | * | 64-bit Extended Sequence Number {A1,A0} | | ||
1247 | * | | | ||
1248 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1249 | * | 0x0 | | ||
1250 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1251 | * | ||
1252 | * AAD Format with 64-bit Extended Sequence Number | ||
1253 | * | ||
1254 | * aadLen: | ||
1255 | * from the definition of the spec, aadLen can only be 8 or 12 bytes. | ||
1256 | * The code supports 16 too but for other sizes, the code will fail. | ||
1257 | * | ||
1258 | * TLen: | ||
1259 | * from the definition of the spec, TLen can only be 8, 12 or 16 bytes. | ||
1260 | * For other sizes, the code will fail. | ||
1261 | * | ||
1262 | * poly = x^128 + x^127 + x^126 + x^121 + 1 | ||
1263 | * | ||
1264 | *****************************************************************************/ | ||
1265 | |||
1266 | ENTRY(aesni_gcm_dec) | ||
1267 | push %r12 | ||
1268 | push %r13 | ||
1269 | push %r14 | ||
1270 | mov %rsp, %r14 | ||
1271 | /* | ||
1272 | * states of %xmm registers %xmm6:%xmm15 not saved | ||
1273 | * all %xmm registers are clobbered | ||
1274 | */ | ||
1275 | sub $VARIABLE_OFFSET, %rsp | ||
1276 | and $~63, %rsp # align rsp to 64 bytes | ||
1277 | mov %arg6, %r12 | ||
1278 | movdqu (%r12), %xmm13 # %xmm13 = HashKey | ||
1279 | movdqa SHUF_MASK(%rip), %xmm2 | ||
1280 | PSHUFB_XMM %xmm2, %xmm13 | ||
1281 | |||
1282 | |||
1283 | # Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH) | ||
1284 | |||
1285 | movdqa %xmm13, %xmm2 | ||
1286 | psllq $1, %xmm13 | ||
1287 | psrlq $63, %xmm2 | ||
1288 | movdqa %xmm2, %xmm1 | ||
1289 | pslldq $8, %xmm2 | ||
1290 | psrldq $8, %xmm1 | ||
1291 | por %xmm2, %xmm13 | ||
1292 | |||
1293 | # Reduction | ||
1294 | |||
1295 | pshufd $0x24, %xmm1, %xmm2 | ||
1296 | pcmpeqd TWOONE(%rip), %xmm2 | ||
1297 | pand POLY(%rip), %xmm2 | ||
1298 | pxor %xmm2, %xmm13 # %xmm13 holds the HashKey<<1 (mod poly) | ||
1299 | |||
1300 | |||
1301 | # Decrypt first few blocks | ||
1302 | |||
1303 | movdqa %xmm13, HashKey(%rsp) # store HashKey<<1 (mod poly) | ||
1304 | mov %arg4, %r13 # save the number of bytes of plaintext/ciphertext | ||
1305 | and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) | ||
1306 | mov %r13, %r12 | ||
1307 | and $(3<<4), %r12 | ||
1308 | jz _initial_num_blocks_is_0_decrypt | ||
1309 | cmp $(2<<4), %r12 | ||
1310 | jb _initial_num_blocks_is_1_decrypt | ||
1311 | je _initial_num_blocks_is_2_decrypt | ||
1312 | _initial_num_blocks_is_3_decrypt: | ||
1313 | INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1314 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec | ||
1315 | sub $48, %r13 | ||
1316 | jmp _initial_blocks_decrypted | ||
1317 | _initial_num_blocks_is_2_decrypt: | ||
1318 | INITIAL_BLOCKS_DEC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1319 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec | ||
1320 | sub $32, %r13 | ||
1321 | jmp _initial_blocks_decrypted | ||
1322 | _initial_num_blocks_is_1_decrypt: | ||
1323 | INITIAL_BLOCKS_DEC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1324 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec | ||
1325 | sub $16, %r13 | ||
1326 | jmp _initial_blocks_decrypted | ||
1327 | _initial_num_blocks_is_0_decrypt: | ||
1328 | INITIAL_BLOCKS_DEC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1329 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec | ||
1330 | _initial_blocks_decrypted: | ||
1331 | cmp $0, %r13 | ||
1332 | je _zero_cipher_left_decrypt | ||
1333 | sub $64, %r13 | ||
1334 | je _four_cipher_left_decrypt | ||
1335 | _decrypt_by_4: | ||
1336 | GHASH_4_ENCRYPT_4_PARALLEL_DEC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ | ||
1337 | %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec | ||
1338 | add $64, %r11 | ||
1339 | sub $64, %r13 | ||
1340 | jne _decrypt_by_4 | ||
1341 | _four_cipher_left_decrypt: | ||
1342 | GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ | ||
1343 | %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 | ||
1344 | _zero_cipher_left_decrypt: | ||
1345 | mov %arg4, %r13 | ||
1346 | and $15, %r13 # %r13 = arg4 (mod 16) | ||
1347 | je _multiple_of_16_bytes_decrypt | ||
1348 | |||
1349 | # Handle the last <16 byte block seperately | ||
1350 | |||
1351 | paddd ONE(%rip), %xmm0 # increment CNT to get Yn | ||
1352 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1353 | PSHUFB_XMM %xmm10, %xmm0 | ||
1354 | |||
1355 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) | ||
1356 | sub $16, %r11 | ||
1357 | add %r13, %r11 | ||
1358 | movdqu (%arg3,%r11,1), %xmm1 # recieve the last <16 byte block | ||
1359 | lea SHIFT_MASK+16(%rip), %r12 | ||
1360 | sub %r13, %r12 | ||
1361 | # adjust the shuffle mask pointer to be able to shift 16-%r13 bytes | ||
1362 | # (%r13 is the number of bytes in plaintext mod 16) | ||
1363 | movdqu (%r12), %xmm2 # get the appropriate shuffle mask | ||
1364 | PSHUFB_XMM %xmm2, %xmm1 # right shift 16-%r13 butes | ||
1365 | |||
1366 | movdqa %xmm1, %xmm2 | ||
1367 | pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) | ||
1368 | movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 | ||
1369 | # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 | ||
1370 | pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 | ||
1371 | pand %xmm1, %xmm2 | ||
1372 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1373 | PSHUFB_XMM %xmm10 ,%xmm2 | ||
1374 | |||
1375 | pxor %xmm2, %xmm8 | ||
1376 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1377 | # GHASH computation for the last <16 byte block | ||
1378 | sub %r13, %r11 | ||
1379 | add $16, %r11 | ||
1380 | |||
1381 | # output %r13 bytes | ||
1382 | MOVQ_R64_XMM %xmm0, %rax | ||
1383 | cmp $8, %r13 | ||
1384 | jle _less_than_8_bytes_left_decrypt | ||
1385 | mov %rax, (%arg2 , %r11, 1) | ||
1386 | add $8, %r11 | ||
1387 | psrldq $8, %xmm0 | ||
1388 | MOVQ_R64_XMM %xmm0, %rax | ||
1389 | sub $8, %r13 | ||
1390 | _less_than_8_bytes_left_decrypt: | ||
1391 | mov %al, (%arg2, %r11, 1) | ||
1392 | add $1, %r11 | ||
1393 | shr $8, %rax | ||
1394 | sub $1, %r13 | ||
1395 | jne _less_than_8_bytes_left_decrypt | ||
1396 | _multiple_of_16_bytes_decrypt: | ||
1397 | mov arg8, %r12 # %r13 = aadLen (number of bytes) | ||
1398 | shl $3, %r12 # convert into number of bits | ||
1399 | movd %r12d, %xmm15 # len(A) in %xmm15 | ||
1400 | shl $3, %arg4 # len(C) in bits (*128) | ||
1401 | MOVQ_R64_XMM %arg4, %xmm1 | ||
1402 | pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 | ||
1403 | pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) | ||
1404 | pxor %xmm15, %xmm8 | ||
1405 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1406 | # final GHASH computation | ||
1407 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1408 | PSHUFB_XMM %xmm10, %xmm8 | ||
1409 | |||
1410 | mov %arg5, %rax # %rax = *Y0 | ||
1411 | movdqu (%rax), %xmm0 # %xmm0 = Y0 | ||
1412 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) | ||
1413 | pxor %xmm8, %xmm0 | ||
1414 | _return_T_decrypt: | ||
1415 | mov arg9, %r10 # %r10 = authTag | ||
1416 | mov arg10, %r11 # %r11 = auth_tag_len | ||
1417 | cmp $16, %r11 | ||
1418 | je _T_16_decrypt | ||
1419 | cmp $12, %r11 | ||
1420 | je _T_12_decrypt | ||
1421 | _T_8_decrypt: | ||
1422 | MOVQ_R64_XMM %xmm0, %rax | ||
1423 | mov %rax, (%r10) | ||
1424 | jmp _return_T_done_decrypt | ||
1425 | _T_12_decrypt: | ||
1426 | MOVQ_R64_XMM %xmm0, %rax | ||
1427 | mov %rax, (%r10) | ||
1428 | psrldq $8, %xmm0 | ||
1429 | movd %xmm0, %eax | ||
1430 | mov %eax, 8(%r10) | ||
1431 | jmp _return_T_done_decrypt | ||
1432 | _T_16_decrypt: | ||
1433 | movdqu %xmm0, (%r10) | ||
1434 | _return_T_done_decrypt: | ||
1435 | mov %r14, %rsp | ||
1436 | pop %r14 | ||
1437 | pop %r13 | ||
1438 | pop %r12 | ||
1439 | ret | ||
1440 | |||
1441 | |||
1442 | /***************************************************************************** | ||
1443 | * void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. | ||
1444 | * u8 *out, // Ciphertext output. Encrypt in-place is allowed. | ||
1445 | * const u8 *in, // Plaintext input | ||
1446 | * u64 plaintext_len, // Length of data in bytes for encryption. | ||
1447 | * u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) | ||
1448 | * // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) | ||
1449 | * // concatenated with 0x00000001. 16-byte aligned pointer. | ||
1450 | * u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. | ||
1451 | * const u8 *aad, // Additional Authentication Data (AAD) | ||
1452 | * u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes | ||
1453 | * u8 *auth_tag, // Authenticated Tag output. | ||
1454 | * u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), | ||
1455 | * // 12 or 8. | ||
1456 | * | ||
1457 | * Assumptions: | ||
1458 | * | ||
1459 | * keys: | ||
1460 | * keys are pre-expanded and aligned to 16 bytes. we are using the | ||
1461 | * first set of 11 keys in the data structure void *aes_ctx | ||
1462 | * | ||
1463 | * | ||
1464 | * iv: | ||
1465 | * 0 1 2 3 | ||
1466 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1467 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1468 | * | Salt (From the SA) | | ||
1469 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1470 | * | Initialization Vector | | ||
1471 | * | (This is the sequence number from IPSec header) | | ||
1472 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1473 | * | 0x1 | | ||
1474 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1475 | * | ||
1476 | * | ||
1477 | * | ||
1478 | * AAD: | ||
1479 | * AAD padded to 128 bits with 0 | ||
1480 | * for example, assume AAD is a u32 vector | ||
1481 | * | ||
1482 | * if AAD is 8 bytes: | ||
1483 | * AAD[3] = {A0, A1}; | ||
1484 | * padded AAD in xmm register = {A1 A0 0 0} | ||
1485 | * | ||
1486 | * 0 1 2 3 | ||
1487 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1488 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1489 | * | SPI (A1) | | ||
1490 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1491 | * | 32-bit Sequence Number (A0) | | ||
1492 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1493 | * | 0x0 | | ||
1494 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1495 | * | ||
1496 | * AAD Format with 32-bit Sequence Number | ||
1497 | * | ||
1498 | * if AAD is 12 bytes: | ||
1499 | * AAD[3] = {A0, A1, A2}; | ||
1500 | * padded AAD in xmm register = {A2 A1 A0 0} | ||
1501 | * | ||
1502 | * 0 1 2 3 | ||
1503 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
1504 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1505 | * | SPI (A2) | | ||
1506 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1507 | * | 64-bit Extended Sequence Number {A1,A0} | | ||
1508 | * | | | ||
1509 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1510 | * | 0x0 | | ||
1511 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
1512 | * | ||
1513 | * AAD Format with 64-bit Extended Sequence Number | ||
1514 | * | ||
1515 | * aadLen: | ||
1516 | * from the definition of the spec, aadLen can only be 8 or 12 bytes. | ||
1517 | * The code supports 16 too but for other sizes, the code will fail. | ||
1518 | * | ||
1519 | * TLen: | ||
1520 | * from the definition of the spec, TLen can only be 8, 12 or 16 bytes. | ||
1521 | * For other sizes, the code will fail. | ||
1522 | * | ||
1523 | * poly = x^128 + x^127 + x^126 + x^121 + 1 | ||
1524 | ***************************************************************************/ | ||
1525 | ENTRY(aesni_gcm_enc) | ||
1526 | push %r12 | ||
1527 | push %r13 | ||
1528 | push %r14 | ||
1529 | mov %rsp, %r14 | ||
1530 | # | ||
1531 | # states of %xmm registers %xmm6:%xmm15 not saved | ||
1532 | # all %xmm registers are clobbered | ||
1533 | # | ||
1534 | sub $VARIABLE_OFFSET, %rsp | ||
1535 | and $~63, %rsp | ||
1536 | mov %arg6, %r12 | ||
1537 | movdqu (%r12), %xmm13 | ||
1538 | movdqa SHUF_MASK(%rip), %xmm2 | ||
1539 | PSHUFB_XMM %xmm2, %xmm13 | ||
1540 | |||
1541 | |||
1542 | # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) | ||
1543 | |||
1544 | movdqa %xmm13, %xmm2 | ||
1545 | psllq $1, %xmm13 | ||
1546 | psrlq $63, %xmm2 | ||
1547 | movdqa %xmm2, %xmm1 | ||
1548 | pslldq $8, %xmm2 | ||
1549 | psrldq $8, %xmm1 | ||
1550 | por %xmm2, %xmm13 | ||
1551 | |||
1552 | # reduce HashKey<<1 | ||
1553 | |||
1554 | pshufd $0x24, %xmm1, %xmm2 | ||
1555 | pcmpeqd TWOONE(%rip), %xmm2 | ||
1556 | pand POLY(%rip), %xmm2 | ||
1557 | pxor %xmm2, %xmm13 | ||
1558 | movdqa %xmm13, HashKey(%rsp) | ||
1559 | mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly) | ||
1560 | and $-16, %r13 | ||
1561 | mov %r13, %r12 | ||
1562 | |||
1563 | # Encrypt first few blocks | ||
1564 | |||
1565 | and $(3<<4), %r12 | ||
1566 | jz _initial_num_blocks_is_0_encrypt | ||
1567 | cmp $(2<<4), %r12 | ||
1568 | jb _initial_num_blocks_is_1_encrypt | ||
1569 | je _initial_num_blocks_is_2_encrypt | ||
1570 | _initial_num_blocks_is_3_encrypt: | ||
1571 | INITIAL_BLOCKS_ENC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1572 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc | ||
1573 | sub $48, %r13 | ||
1574 | jmp _initial_blocks_encrypted | ||
1575 | _initial_num_blocks_is_2_encrypt: | ||
1576 | INITIAL_BLOCKS_ENC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1577 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc | ||
1578 | sub $32, %r13 | ||
1579 | jmp _initial_blocks_encrypted | ||
1580 | _initial_num_blocks_is_1_encrypt: | ||
1581 | INITIAL_BLOCKS_ENC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1582 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc | ||
1583 | sub $16, %r13 | ||
1584 | jmp _initial_blocks_encrypted | ||
1585 | _initial_num_blocks_is_0_encrypt: | ||
1586 | INITIAL_BLOCKS_ENC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | ||
1587 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc | ||
1588 | _initial_blocks_encrypted: | ||
1589 | |||
1590 | # Main loop - Encrypt remaining blocks | ||
1591 | |||
1592 | cmp $0, %r13 | ||
1593 | je _zero_cipher_left_encrypt | ||
1594 | sub $64, %r13 | ||
1595 | je _four_cipher_left_encrypt | ||
1596 | _encrypt_by_4_encrypt: | ||
1597 | GHASH_4_ENCRYPT_4_PARALLEL_ENC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ | ||
1598 | %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc | ||
1599 | add $64, %r11 | ||
1600 | sub $64, %r13 | ||
1601 | jne _encrypt_by_4_encrypt | ||
1602 | _four_cipher_left_encrypt: | ||
1603 | GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ | ||
1604 | %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 | ||
1605 | _zero_cipher_left_encrypt: | ||
1606 | mov %arg4, %r13 | ||
1607 | and $15, %r13 # %r13 = arg4 (mod 16) | ||
1608 | je _multiple_of_16_bytes_encrypt | ||
1609 | |||
1610 | # Handle the last <16 Byte block seperately | ||
1611 | paddd ONE(%rip), %xmm0 # INCR CNT to get Yn | ||
1612 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1613 | PSHUFB_XMM %xmm10, %xmm0 | ||
1614 | |||
1615 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) | ||
1616 | sub $16, %r11 | ||
1617 | add %r13, %r11 | ||
1618 | movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte blocks | ||
1619 | lea SHIFT_MASK+16(%rip), %r12 | ||
1620 | sub %r13, %r12 | ||
1621 | # adjust the shuffle mask pointer to be able to shift 16-r13 bytes | ||
1622 | # (%r13 is the number of bytes in plaintext mod 16) | ||
1623 | movdqu (%r12), %xmm2 # get the appropriate shuffle mask | ||
1624 | PSHUFB_XMM %xmm2, %xmm1 # shift right 16-r13 byte | ||
1625 | pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) | ||
1626 | movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 | ||
1627 | # get the appropriate mask to mask out top 16-r13 bytes of xmm0 | ||
1628 | pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 | ||
1629 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1630 | PSHUFB_XMM %xmm10,%xmm0 | ||
1631 | |||
1632 | pxor %xmm0, %xmm8 | ||
1633 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1634 | # GHASH computation for the last <16 byte block | ||
1635 | sub %r13, %r11 | ||
1636 | add $16, %r11 | ||
1637 | PSHUFB_XMM %xmm10, %xmm1 | ||
1638 | |||
1639 | # shuffle xmm0 back to output as ciphertext | ||
1640 | |||
1641 | # Output %r13 bytes | ||
1642 | MOVQ_R64_XMM %xmm0, %rax | ||
1643 | cmp $8, %r13 | ||
1644 | jle _less_than_8_bytes_left_encrypt | ||
1645 | mov %rax, (%arg2 , %r11, 1) | ||
1646 | add $8, %r11 | ||
1647 | psrldq $8, %xmm0 | ||
1648 | MOVQ_R64_XMM %xmm0, %rax | ||
1649 | sub $8, %r13 | ||
1650 | _less_than_8_bytes_left_encrypt: | ||
1651 | mov %al, (%arg2, %r11, 1) | ||
1652 | add $1, %r11 | ||
1653 | shr $8, %rax | ||
1654 | sub $1, %r13 | ||
1655 | jne _less_than_8_bytes_left_encrypt | ||
1656 | _multiple_of_16_bytes_encrypt: | ||
1657 | mov arg8, %r12 # %r12 = addLen (number of bytes) | ||
1658 | shl $3, %r12 | ||
1659 | movd %r12d, %xmm15 # len(A) in %xmm15 | ||
1660 | shl $3, %arg4 # len(C) in bits (*128) | ||
1661 | MOVQ_R64_XMM %arg4, %xmm1 | ||
1662 | pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 | ||
1663 | pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) | ||
1664 | pxor %xmm15, %xmm8 | ||
1665 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1666 | # final GHASH computation | ||
1667 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1668 | PSHUFB_XMM %xmm10, %xmm8 # perform a 16 byte swap | ||
1669 | |||
1670 | mov %arg5, %rax # %rax = *Y0 | ||
1671 | movdqu (%rax), %xmm0 # %xmm0 = Y0 | ||
1672 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0) | ||
1673 | pxor %xmm8, %xmm0 | ||
1674 | _return_T_encrypt: | ||
1675 | mov arg9, %r10 # %r10 = authTag | ||
1676 | mov arg10, %r11 # %r11 = auth_tag_len | ||
1677 | cmp $16, %r11 | ||
1678 | je _T_16_encrypt | ||
1679 | cmp $12, %r11 | ||
1680 | je _T_12_encrypt | ||
1681 | _T_8_encrypt: | ||
1682 | MOVQ_R64_XMM %xmm0, %rax | ||
1683 | mov %rax, (%r10) | ||
1684 | jmp _return_T_done_encrypt | ||
1685 | _T_12_encrypt: | ||
1686 | MOVQ_R64_XMM %xmm0, %rax | ||
1687 | mov %rax, (%r10) | ||
1688 | psrldq $8, %xmm0 | ||
1689 | movd %xmm0, %eax | ||
1690 | mov %eax, 8(%r10) | ||
1691 | jmp _return_T_done_encrypt | ||
1692 | _T_16_encrypt: | ||
1693 | movdqu %xmm0, (%r10) | ||
1694 | _return_T_done_encrypt: | ||
1695 | mov %r14, %rsp | ||
1696 | pop %r14 | ||
1697 | pop %r13 | ||
1698 | pop %r12 | ||
1699 | ret | ||
1700 | |||
1701 | #endif | ||
1702 | |||
49 | 1703 | ||
50 | _key_expansion_128: | 1704 | _key_expansion_128: |
51 | _key_expansion_256a: | 1705 | _key_expansion_256a: |
@@ -55,10 +1709,11 @@ _key_expansion_256a: | |||
55 | shufps $0b10001100, %xmm0, %xmm4 | 1709 | shufps $0b10001100, %xmm0, %xmm4 |
56 | pxor %xmm4, %xmm0 | 1710 | pxor %xmm4, %xmm0 |
57 | pxor %xmm1, %xmm0 | 1711 | pxor %xmm1, %xmm0 |
58 | movaps %xmm0, (%rcx) | 1712 | movaps %xmm0, (TKEYP) |
59 | add $0x10, %rcx | 1713 | add $0x10, TKEYP |
60 | ret | 1714 | ret |
61 | 1715 | ||
1716 | .align 4 | ||
62 | _key_expansion_192a: | 1717 | _key_expansion_192a: |
63 | pshufd $0b01010101, %xmm1, %xmm1 | 1718 | pshufd $0b01010101, %xmm1, %xmm1 |
64 | shufps $0b00010000, %xmm0, %xmm4 | 1719 | shufps $0b00010000, %xmm0, %xmm4 |
@@ -76,12 +1731,13 @@ _key_expansion_192a: | |||
76 | 1731 | ||
77 | movaps %xmm0, %xmm1 | 1732 | movaps %xmm0, %xmm1 |
78 | shufps $0b01000100, %xmm0, %xmm6 | 1733 | shufps $0b01000100, %xmm0, %xmm6 |
79 | movaps %xmm6, (%rcx) | 1734 | movaps %xmm6, (TKEYP) |
80 | shufps $0b01001110, %xmm2, %xmm1 | 1735 | shufps $0b01001110, %xmm2, %xmm1 |
81 | movaps %xmm1, 16(%rcx) | 1736 | movaps %xmm1, 0x10(TKEYP) |
82 | add $0x20, %rcx | 1737 | add $0x20, TKEYP |
83 | ret | 1738 | ret |
84 | 1739 | ||
1740 | .align 4 | ||
85 | _key_expansion_192b: | 1741 | _key_expansion_192b: |
86 | pshufd $0b01010101, %xmm1, %xmm1 | 1742 | pshufd $0b01010101, %xmm1, %xmm1 |
87 | shufps $0b00010000, %xmm0, %xmm4 | 1743 | shufps $0b00010000, %xmm0, %xmm4 |
@@ -96,10 +1752,11 @@ _key_expansion_192b: | |||
96 | pxor %xmm3, %xmm2 | 1752 | pxor %xmm3, %xmm2 |
97 | pxor %xmm5, %xmm2 | 1753 | pxor %xmm5, %xmm2 |
98 | 1754 | ||
99 | movaps %xmm0, (%rcx) | 1755 | movaps %xmm0, (TKEYP) |
100 | add $0x10, %rcx | 1756 | add $0x10, TKEYP |
101 | ret | 1757 | ret |
102 | 1758 | ||
1759 | .align 4 | ||
103 | _key_expansion_256b: | 1760 | _key_expansion_256b: |
104 | pshufd $0b10101010, %xmm1, %xmm1 | 1761 | pshufd $0b10101010, %xmm1, %xmm1 |
105 | shufps $0b00010000, %xmm2, %xmm4 | 1762 | shufps $0b00010000, %xmm2, %xmm4 |
@@ -107,8 +1764,8 @@ _key_expansion_256b: | |||
107 | shufps $0b10001100, %xmm2, %xmm4 | 1764 | shufps $0b10001100, %xmm2, %xmm4 |
108 | pxor %xmm4, %xmm2 | 1765 | pxor %xmm4, %xmm2 |
109 | pxor %xmm1, %xmm2 | 1766 | pxor %xmm1, %xmm2 |
110 | movaps %xmm2, (%rcx) | 1767 | movaps %xmm2, (TKEYP) |
111 | add $0x10, %rcx | 1768 | add $0x10, TKEYP |
112 | ret | 1769 | ret |
113 | 1770 | ||
114 | /* | 1771 | /* |
@@ -116,17 +1773,23 @@ _key_expansion_256b: | |||
116 | * unsigned int key_len) | 1773 | * unsigned int key_len) |
117 | */ | 1774 | */ |
118 | ENTRY(aesni_set_key) | 1775 | ENTRY(aesni_set_key) |
119 | movups (%rsi), %xmm0 # user key (first 16 bytes) | 1776 | #ifndef __x86_64__ |
120 | movaps %xmm0, (%rdi) | 1777 | pushl KEYP |
121 | lea 0x10(%rdi), %rcx # key addr | 1778 | movl 8(%esp), KEYP # ctx |
122 | movl %edx, 480(%rdi) | 1779 | movl 12(%esp), UKEYP # in_key |
1780 | movl 16(%esp), %edx # key_len | ||
1781 | #endif | ||
1782 | movups (UKEYP), %xmm0 # user key (first 16 bytes) | ||
1783 | movaps %xmm0, (KEYP) | ||
1784 | lea 0x10(KEYP), TKEYP # key addr | ||
1785 | movl %edx, 480(KEYP) | ||
123 | pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x | 1786 | pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x |
124 | cmp $24, %dl | 1787 | cmp $24, %dl |
125 | jb .Lenc_key128 | 1788 | jb .Lenc_key128 |
126 | je .Lenc_key192 | 1789 | je .Lenc_key192 |
127 | movups 0x10(%rsi), %xmm2 # other user key | 1790 | movups 0x10(UKEYP), %xmm2 # other user key |
128 | movaps %xmm2, (%rcx) | 1791 | movaps %xmm2, (TKEYP) |
129 | add $0x10, %rcx | 1792 | add $0x10, TKEYP |
130 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 | 1793 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
131 | call _key_expansion_256a | 1794 | call _key_expansion_256a |
132 | AESKEYGENASSIST 0x1 %xmm0 %xmm1 | 1795 | AESKEYGENASSIST 0x1 %xmm0 %xmm1 |
@@ -155,7 +1818,7 @@ ENTRY(aesni_set_key) | |||
155 | call _key_expansion_256a | 1818 | call _key_expansion_256a |
156 | jmp .Ldec_key | 1819 | jmp .Ldec_key |
157 | .Lenc_key192: | 1820 | .Lenc_key192: |
158 | movq 0x10(%rsi), %xmm2 # other user key | 1821 | movq 0x10(UKEYP), %xmm2 # other user key |
159 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 | 1822 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
160 | call _key_expansion_192a | 1823 | call _key_expansion_192a |
161 | AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 | 1824 | AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 |
@@ -195,33 +1858,47 @@ ENTRY(aesni_set_key) | |||
195 | AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 | 1858 | AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 |
196 | call _key_expansion_128 | 1859 | call _key_expansion_128 |
197 | .Ldec_key: | 1860 | .Ldec_key: |
198 | sub $0x10, %rcx | 1861 | sub $0x10, TKEYP |
199 | movaps (%rdi), %xmm0 | 1862 | movaps (KEYP), %xmm0 |
200 | movaps (%rcx), %xmm1 | 1863 | movaps (TKEYP), %xmm1 |
201 | movaps %xmm0, 240(%rcx) | 1864 | movaps %xmm0, 240(TKEYP) |
202 | movaps %xmm1, 240(%rdi) | 1865 | movaps %xmm1, 240(KEYP) |
203 | add $0x10, %rdi | 1866 | add $0x10, KEYP |
204 | lea 240-16(%rcx), %rsi | 1867 | lea 240-16(TKEYP), UKEYP |
205 | .align 4 | 1868 | .align 4 |
206 | .Ldec_key_loop: | 1869 | .Ldec_key_loop: |
207 | movaps (%rdi), %xmm0 | 1870 | movaps (KEYP), %xmm0 |
208 | AESIMC %xmm0 %xmm1 | 1871 | AESIMC %xmm0 %xmm1 |
209 | movaps %xmm1, (%rsi) | 1872 | movaps %xmm1, (UKEYP) |
210 | add $0x10, %rdi | 1873 | add $0x10, KEYP |
211 | sub $0x10, %rsi | 1874 | sub $0x10, UKEYP |
212 | cmp %rcx, %rdi | 1875 | cmp TKEYP, KEYP |
213 | jb .Ldec_key_loop | 1876 | jb .Ldec_key_loop |
214 | xor %rax, %rax | 1877 | xor AREG, AREG |
1878 | #ifndef __x86_64__ | ||
1879 | popl KEYP | ||
1880 | #endif | ||
215 | ret | 1881 | ret |
216 | 1882 | ||
217 | /* | 1883 | /* |
218 | * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) | 1884 | * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) |
219 | */ | 1885 | */ |
220 | ENTRY(aesni_enc) | 1886 | ENTRY(aesni_enc) |
1887 | #ifndef __x86_64__ | ||
1888 | pushl KEYP | ||
1889 | pushl KLEN | ||
1890 | movl 12(%esp), KEYP | ||
1891 | movl 16(%esp), OUTP | ||
1892 | movl 20(%esp), INP | ||
1893 | #endif | ||
221 | movl 480(KEYP), KLEN # key length | 1894 | movl 480(KEYP), KLEN # key length |
222 | movups (INP), STATE # input | 1895 | movups (INP), STATE # input |
223 | call _aesni_enc1 | 1896 | call _aesni_enc1 |
224 | movups STATE, (OUTP) # output | 1897 | movups STATE, (OUTP) # output |
1898 | #ifndef __x86_64__ | ||
1899 | popl KLEN | ||
1900 | popl KEYP | ||
1901 | #endif | ||
225 | ret | 1902 | ret |
226 | 1903 | ||
227 | /* | 1904 | /* |
@@ -236,6 +1913,7 @@ ENTRY(aesni_enc) | |||
236 | * KEY | 1913 | * KEY |
237 | * TKEYP (T1) | 1914 | * TKEYP (T1) |
238 | */ | 1915 | */ |
1916 | .align 4 | ||
239 | _aesni_enc1: | 1917 | _aesni_enc1: |
240 | movaps (KEYP), KEY # key | 1918 | movaps (KEYP), KEY # key |
241 | mov KEYP, TKEYP | 1919 | mov KEYP, TKEYP |
@@ -298,6 +1976,7 @@ _aesni_enc1: | |||
298 | * KEY | 1976 | * KEY |
299 | * TKEYP (T1) | 1977 | * TKEYP (T1) |
300 | */ | 1978 | */ |
1979 | .align 4 | ||
301 | _aesni_enc4: | 1980 | _aesni_enc4: |
302 | movaps (KEYP), KEY # key | 1981 | movaps (KEYP), KEY # key |
303 | mov KEYP, TKEYP | 1982 | mov KEYP, TKEYP |
@@ -391,11 +2070,22 @@ _aesni_enc4: | |||
391 | * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) | 2070 | * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) |
392 | */ | 2071 | */ |
393 | ENTRY(aesni_dec) | 2072 | ENTRY(aesni_dec) |
2073 | #ifndef __x86_64__ | ||
2074 | pushl KEYP | ||
2075 | pushl KLEN | ||
2076 | movl 12(%esp), KEYP | ||
2077 | movl 16(%esp), OUTP | ||
2078 | movl 20(%esp), INP | ||
2079 | #endif | ||
394 | mov 480(KEYP), KLEN # key length | 2080 | mov 480(KEYP), KLEN # key length |
395 | add $240, KEYP | 2081 | add $240, KEYP |
396 | movups (INP), STATE # input | 2082 | movups (INP), STATE # input |
397 | call _aesni_dec1 | 2083 | call _aesni_dec1 |
398 | movups STATE, (OUTP) #output | 2084 | movups STATE, (OUTP) #output |
2085 | #ifndef __x86_64__ | ||
2086 | popl KLEN | ||
2087 | popl KEYP | ||
2088 | #endif | ||
399 | ret | 2089 | ret |
400 | 2090 | ||
401 | /* | 2091 | /* |
@@ -410,6 +2100,7 @@ ENTRY(aesni_dec) | |||
410 | * KEY | 2100 | * KEY |
411 | * TKEYP (T1) | 2101 | * TKEYP (T1) |
412 | */ | 2102 | */ |
2103 | .align 4 | ||
413 | _aesni_dec1: | 2104 | _aesni_dec1: |
414 | movaps (KEYP), KEY # key | 2105 | movaps (KEYP), KEY # key |
415 | mov KEYP, TKEYP | 2106 | mov KEYP, TKEYP |
@@ -472,6 +2163,7 @@ _aesni_dec1: | |||
472 | * KEY | 2163 | * KEY |
473 | * TKEYP (T1) | 2164 | * TKEYP (T1) |
474 | */ | 2165 | */ |
2166 | .align 4 | ||
475 | _aesni_dec4: | 2167 | _aesni_dec4: |
476 | movaps (KEYP), KEY # key | 2168 | movaps (KEYP), KEY # key |
477 | mov KEYP, TKEYP | 2169 | mov KEYP, TKEYP |
@@ -566,6 +2258,15 @@ _aesni_dec4: | |||
566 | * size_t len) | 2258 | * size_t len) |
567 | */ | 2259 | */ |
568 | ENTRY(aesni_ecb_enc) | 2260 | ENTRY(aesni_ecb_enc) |
2261 | #ifndef __x86_64__ | ||
2262 | pushl LEN | ||
2263 | pushl KEYP | ||
2264 | pushl KLEN | ||
2265 | movl 16(%esp), KEYP | ||
2266 | movl 20(%esp), OUTP | ||
2267 | movl 24(%esp), INP | ||
2268 | movl 28(%esp), LEN | ||
2269 | #endif | ||
569 | test LEN, LEN # check length | 2270 | test LEN, LEN # check length |
570 | jz .Lecb_enc_ret | 2271 | jz .Lecb_enc_ret |
571 | mov 480(KEYP), KLEN | 2272 | mov 480(KEYP), KLEN |
@@ -602,6 +2303,11 @@ ENTRY(aesni_ecb_enc) | |||
602 | cmp $16, LEN | 2303 | cmp $16, LEN |
603 | jge .Lecb_enc_loop1 | 2304 | jge .Lecb_enc_loop1 |
604 | .Lecb_enc_ret: | 2305 | .Lecb_enc_ret: |
2306 | #ifndef __x86_64__ | ||
2307 | popl KLEN | ||
2308 | popl KEYP | ||
2309 | popl LEN | ||
2310 | #endif | ||
605 | ret | 2311 | ret |
606 | 2312 | ||
607 | /* | 2313 | /* |
@@ -609,6 +2315,15 @@ ENTRY(aesni_ecb_enc) | |||
609 | * size_t len); | 2315 | * size_t len); |
610 | */ | 2316 | */ |
611 | ENTRY(aesni_ecb_dec) | 2317 | ENTRY(aesni_ecb_dec) |
2318 | #ifndef __x86_64__ | ||
2319 | pushl LEN | ||
2320 | pushl KEYP | ||
2321 | pushl KLEN | ||
2322 | movl 16(%esp), KEYP | ||
2323 | movl 20(%esp), OUTP | ||
2324 | movl 24(%esp), INP | ||
2325 | movl 28(%esp), LEN | ||
2326 | #endif | ||
612 | test LEN, LEN | 2327 | test LEN, LEN |
613 | jz .Lecb_dec_ret | 2328 | jz .Lecb_dec_ret |
614 | mov 480(KEYP), KLEN | 2329 | mov 480(KEYP), KLEN |
@@ -646,6 +2361,11 @@ ENTRY(aesni_ecb_dec) | |||
646 | cmp $16, LEN | 2361 | cmp $16, LEN |
647 | jge .Lecb_dec_loop1 | 2362 | jge .Lecb_dec_loop1 |
648 | .Lecb_dec_ret: | 2363 | .Lecb_dec_ret: |
2364 | #ifndef __x86_64__ | ||
2365 | popl KLEN | ||
2366 | popl KEYP | ||
2367 | popl LEN | ||
2368 | #endif | ||
649 | ret | 2369 | ret |
650 | 2370 | ||
651 | /* | 2371 | /* |
@@ -653,6 +2373,17 @@ ENTRY(aesni_ecb_dec) | |||
653 | * size_t len, u8 *iv) | 2373 | * size_t len, u8 *iv) |
654 | */ | 2374 | */ |
655 | ENTRY(aesni_cbc_enc) | 2375 | ENTRY(aesni_cbc_enc) |
2376 | #ifndef __x86_64__ | ||
2377 | pushl IVP | ||
2378 | pushl LEN | ||
2379 | pushl KEYP | ||
2380 | pushl KLEN | ||
2381 | movl 20(%esp), KEYP | ||
2382 | movl 24(%esp), OUTP | ||
2383 | movl 28(%esp), INP | ||
2384 | movl 32(%esp), LEN | ||
2385 | movl 36(%esp), IVP | ||
2386 | #endif | ||
656 | cmp $16, LEN | 2387 | cmp $16, LEN |
657 | jb .Lcbc_enc_ret | 2388 | jb .Lcbc_enc_ret |
658 | mov 480(KEYP), KLEN | 2389 | mov 480(KEYP), KLEN |
@@ -670,6 +2401,12 @@ ENTRY(aesni_cbc_enc) | |||
670 | jge .Lcbc_enc_loop | 2401 | jge .Lcbc_enc_loop |
671 | movups STATE, (IVP) | 2402 | movups STATE, (IVP) |
672 | .Lcbc_enc_ret: | 2403 | .Lcbc_enc_ret: |
2404 | #ifndef __x86_64__ | ||
2405 | popl KLEN | ||
2406 | popl KEYP | ||
2407 | popl LEN | ||
2408 | popl IVP | ||
2409 | #endif | ||
673 | ret | 2410 | ret |
674 | 2411 | ||
675 | /* | 2412 | /* |
@@ -677,6 +2414,17 @@ ENTRY(aesni_cbc_enc) | |||
677 | * size_t len, u8 *iv) | 2414 | * size_t len, u8 *iv) |
678 | */ | 2415 | */ |
679 | ENTRY(aesni_cbc_dec) | 2416 | ENTRY(aesni_cbc_dec) |
2417 | #ifndef __x86_64__ | ||
2418 | pushl IVP | ||
2419 | pushl LEN | ||
2420 | pushl KEYP | ||
2421 | pushl KLEN | ||
2422 | movl 20(%esp), KEYP | ||
2423 | movl 24(%esp), OUTP | ||
2424 | movl 28(%esp), INP | ||
2425 | movl 32(%esp), LEN | ||
2426 | movl 36(%esp), IVP | ||
2427 | #endif | ||
680 | cmp $16, LEN | 2428 | cmp $16, LEN |
681 | jb .Lcbc_dec_just_ret | 2429 | jb .Lcbc_dec_just_ret |
682 | mov 480(KEYP), KLEN | 2430 | mov 480(KEYP), KLEN |
@@ -690,16 +2438,30 @@ ENTRY(aesni_cbc_dec) | |||
690 | movaps IN1, STATE1 | 2438 | movaps IN1, STATE1 |
691 | movups 0x10(INP), IN2 | 2439 | movups 0x10(INP), IN2 |
692 | movaps IN2, STATE2 | 2440 | movaps IN2, STATE2 |
2441 | #ifdef __x86_64__ | ||
693 | movups 0x20(INP), IN3 | 2442 | movups 0x20(INP), IN3 |
694 | movaps IN3, STATE3 | 2443 | movaps IN3, STATE3 |
695 | movups 0x30(INP), IN4 | 2444 | movups 0x30(INP), IN4 |
696 | movaps IN4, STATE4 | 2445 | movaps IN4, STATE4 |
2446 | #else | ||
2447 | movups 0x20(INP), IN1 | ||
2448 | movaps IN1, STATE3 | ||
2449 | movups 0x30(INP), IN2 | ||
2450 | movaps IN2, STATE4 | ||
2451 | #endif | ||
697 | call _aesni_dec4 | 2452 | call _aesni_dec4 |
698 | pxor IV, STATE1 | 2453 | pxor IV, STATE1 |
2454 | #ifdef __x86_64__ | ||
699 | pxor IN1, STATE2 | 2455 | pxor IN1, STATE2 |
700 | pxor IN2, STATE3 | 2456 | pxor IN2, STATE3 |
701 | pxor IN3, STATE4 | 2457 | pxor IN3, STATE4 |
702 | movaps IN4, IV | 2458 | movaps IN4, IV |
2459 | #else | ||
2460 | pxor (INP), STATE2 | ||
2461 | pxor 0x10(INP), STATE3 | ||
2462 | pxor IN1, STATE4 | ||
2463 | movaps IN2, IV | ||
2464 | #endif | ||
703 | movups STATE1, (OUTP) | 2465 | movups STATE1, (OUTP) |
704 | movups STATE2, 0x10(OUTP) | 2466 | movups STATE2, 0x10(OUTP) |
705 | movups STATE3, 0x20(OUTP) | 2467 | movups STATE3, 0x20(OUTP) |
@@ -727,8 +2489,15 @@ ENTRY(aesni_cbc_dec) | |||
727 | .Lcbc_dec_ret: | 2489 | .Lcbc_dec_ret: |
728 | movups IV, (IVP) | 2490 | movups IV, (IVP) |
729 | .Lcbc_dec_just_ret: | 2491 | .Lcbc_dec_just_ret: |
2492 | #ifndef __x86_64__ | ||
2493 | popl KLEN | ||
2494 | popl KEYP | ||
2495 | popl LEN | ||
2496 | popl IVP | ||
2497 | #endif | ||
730 | ret | 2498 | ret |
731 | 2499 | ||
2500 | #ifdef __x86_64__ | ||
732 | .align 16 | 2501 | .align 16 |
733 | .Lbswap_mask: | 2502 | .Lbswap_mask: |
734 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | 2503 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 |
@@ -744,6 +2513,7 @@ ENTRY(aesni_cbc_dec) | |||
744 | * INC: == 1, in little endian | 2513 | * INC: == 1, in little endian |
745 | * BSWAP_MASK == endian swapping mask | 2514 | * BSWAP_MASK == endian swapping mask |
746 | */ | 2515 | */ |
2516 | .align 4 | ||
747 | _aesni_inc_init: | 2517 | _aesni_inc_init: |
748 | movaps .Lbswap_mask, BSWAP_MASK | 2518 | movaps .Lbswap_mask, BSWAP_MASK |
749 | movaps IV, CTR | 2519 | movaps IV, CTR |
@@ -768,6 +2538,7 @@ _aesni_inc_init: | |||
768 | * CTR: == output IV, in little endian | 2538 | * CTR: == output IV, in little endian |
769 | * TCTR_LOW: == lower qword of CTR | 2539 | * TCTR_LOW: == lower qword of CTR |
770 | */ | 2540 | */ |
2541 | .align 4 | ||
771 | _aesni_inc: | 2542 | _aesni_inc: |
772 | paddq INC, CTR | 2543 | paddq INC, CTR |
773 | add $1, TCTR_LOW | 2544 | add $1, TCTR_LOW |
@@ -839,3 +2610,4 @@ ENTRY(aesni_ctr_enc) | |||
839 | movups IV, (IVP) | 2610 | movups IV, (IVP) |
840 | .Lctr_enc_just_ret: | 2611 | .Lctr_enc_just_ret: |
841 | ret | 2612 | ret |
2613 | #endif | ||
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 2cb3dcc4490a..e1e60c7d5813 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -5,6 +5,14 @@ | |||
5 | * Copyright (C) 2008, Intel Corp. | 5 | * Copyright (C) 2008, Intel Corp. |
6 | * Author: Huang Ying <ying.huang@intel.com> | 6 | * Author: Huang Ying <ying.huang@intel.com> |
7 | * | 7 | * |
8 | * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD | ||
9 | * interface for 64-bit kernels. | ||
10 | * Authors: Adrian Hoban <adrian.hoban@intel.com> | ||
11 | * Gabriele Paoloni <gabriele.paoloni@intel.com> | ||
12 | * Tadeusz Struk (tadeusz.struk@intel.com) | ||
13 | * Aidan O'Mahony (aidan.o.mahony@intel.com) | ||
14 | * Copyright (c) 2010, Intel Corporation. | ||
15 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | 16 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by | 17 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or | 18 | * the Free Software Foundation; either version 2 of the License, or |
@@ -21,6 +29,10 @@ | |||
21 | #include <crypto/ctr.h> | 29 | #include <crypto/ctr.h> |
22 | #include <asm/i387.h> | 30 | #include <asm/i387.h> |
23 | #include <asm/aes.h> | 31 | #include <asm/aes.h> |
32 | #include <crypto/scatterwalk.h> | ||
33 | #include <crypto/internal/aead.h> | ||
34 | #include <linux/workqueue.h> | ||
35 | #include <linux/spinlock.h> | ||
24 | 36 | ||
25 | #if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE) | 37 | #if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE) |
26 | #define HAS_CTR | 38 | #define HAS_CTR |
@@ -42,8 +54,31 @@ struct async_aes_ctx { | |||
42 | struct cryptd_ablkcipher *cryptd_tfm; | 54 | struct cryptd_ablkcipher *cryptd_tfm; |
43 | }; | 55 | }; |
44 | 56 | ||
45 | #define AESNI_ALIGN 16 | 57 | /* This data is stored at the end of the crypto_tfm struct. |
58 | * It's a type of per "session" data storage location. | ||
59 | * This needs to be 16 byte aligned. | ||
60 | */ | ||
61 | struct aesni_rfc4106_gcm_ctx { | ||
62 | u8 hash_subkey[16]; | ||
63 | struct crypto_aes_ctx aes_key_expanded; | ||
64 | u8 nonce[4]; | ||
65 | struct cryptd_aead *cryptd_tfm; | ||
66 | }; | ||
67 | |||
68 | struct aesni_gcm_set_hash_subkey_result { | ||
69 | int err; | ||
70 | struct completion completion; | ||
71 | }; | ||
72 | |||
73 | struct aesni_hash_subkey_req_data { | ||
74 | u8 iv[16]; | ||
75 | struct aesni_gcm_set_hash_subkey_result result; | ||
76 | struct scatterlist sg; | ||
77 | }; | ||
78 | |||
79 | #define AESNI_ALIGN (16) | ||
46 | #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) | 80 | #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) |
81 | #define RFC4106_HASH_SUBKEY_SIZE 16 | ||
47 | 82 | ||
48 | asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, | 83 | asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, |
49 | unsigned int key_len); | 84 | unsigned int key_len); |
@@ -59,9 +94,62 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, | |||
59 | const u8 *in, unsigned int len, u8 *iv); | 94 | const u8 *in, unsigned int len, u8 *iv); |
60 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, | 95 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, |
61 | const u8 *in, unsigned int len, u8 *iv); | 96 | const u8 *in, unsigned int len, u8 *iv); |
97 | #ifdef CONFIG_X86_64 | ||
62 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, | 98 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, |
63 | const u8 *in, unsigned int len, u8 *iv); | 99 | const u8 *in, unsigned int len, u8 *iv); |
64 | 100 | ||
101 | /* asmlinkage void aesni_gcm_enc() | ||
102 | * void *ctx, AES Key schedule. Starts on a 16 byte boundary. | ||
103 | * u8 *out, Ciphertext output. Encrypt in-place is allowed. | ||
104 | * const u8 *in, Plaintext input | ||
105 | * unsigned long plaintext_len, Length of data in bytes for encryption. | ||
106 | * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) | ||
107 | * concatenated with 8 byte Initialisation Vector (from IPSec ESP | ||
108 | * Payload) concatenated with 0x00000001. 16-byte aligned pointer. | ||
109 | * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. | ||
110 | * const u8 *aad, Additional Authentication Data (AAD) | ||
111 | * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this | ||
112 | * is going to be 8 or 12 bytes | ||
113 | * u8 *auth_tag, Authenticated Tag output. | ||
114 | * unsigned long auth_tag_len), Authenticated Tag Length in bytes. | ||
115 | * Valid values are 16 (most likely), 12 or 8. | ||
116 | */ | ||
117 | asmlinkage void aesni_gcm_enc(void *ctx, u8 *out, | ||
118 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
119 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
120 | u8 *auth_tag, unsigned long auth_tag_len); | ||
121 | |||
122 | /* asmlinkage void aesni_gcm_dec() | ||
123 | * void *ctx, AES Key schedule. Starts on a 16 byte boundary. | ||
124 | * u8 *out, Plaintext output. Decrypt in-place is allowed. | ||
125 | * const u8 *in, Ciphertext input | ||
126 | * unsigned long ciphertext_len, Length of data in bytes for decryption. | ||
127 | * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) | ||
128 | * concatenated with 8 byte Initialisation Vector (from IPSec ESP | ||
129 | * Payload) concatenated with 0x00000001. 16-byte aligned pointer. | ||
130 | * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. | ||
131 | * const u8 *aad, Additional Authentication Data (AAD) | ||
132 | * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going | ||
133 | * to be 8 or 12 bytes | ||
134 | * u8 *auth_tag, Authenticated Tag output. | ||
135 | * unsigned long auth_tag_len) Authenticated Tag Length in bytes. | ||
136 | * Valid values are 16 (most likely), 12 or 8. | ||
137 | */ | ||
138 | asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, | ||
139 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
140 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
141 | u8 *auth_tag, unsigned long auth_tag_len); | ||
142 | |||
143 | static inline struct | ||
144 | aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) | ||
145 | { | ||
146 | return | ||
147 | (struct aesni_rfc4106_gcm_ctx *) | ||
148 | PTR_ALIGN((u8 *) | ||
149 | crypto_tfm_ctx(crypto_aead_tfm(tfm)), AESNI_ALIGN); | ||
150 | } | ||
151 | #endif | ||
152 | |||
65 | static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) | 153 | static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) |
66 | { | 154 | { |
67 | unsigned long addr = (unsigned long)raw_ctx; | 155 | unsigned long addr = (unsigned long)raw_ctx; |
@@ -324,6 +412,7 @@ static struct crypto_alg blk_cbc_alg = { | |||
324 | }, | 412 | }, |
325 | }; | 413 | }; |
326 | 414 | ||
415 | #ifdef CONFIG_X86_64 | ||
327 | static void ctr_crypt_final(struct crypto_aes_ctx *ctx, | 416 | static void ctr_crypt_final(struct crypto_aes_ctx *ctx, |
328 | struct blkcipher_walk *walk) | 417 | struct blkcipher_walk *walk) |
329 | { | 418 | { |
@@ -389,6 +478,7 @@ static struct crypto_alg blk_ctr_alg = { | |||
389 | }, | 478 | }, |
390 | }, | 479 | }, |
391 | }; | 480 | }; |
481 | #endif | ||
392 | 482 | ||
393 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | 483 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, |
394 | unsigned int key_len) | 484 | unsigned int key_len) |
@@ -536,6 +626,7 @@ static struct crypto_alg ablk_cbc_alg = { | |||
536 | }, | 626 | }, |
537 | }; | 627 | }; |
538 | 628 | ||
629 | #ifdef CONFIG_X86_64 | ||
539 | static int ablk_ctr_init(struct crypto_tfm *tfm) | 630 | static int ablk_ctr_init(struct crypto_tfm *tfm) |
540 | { | 631 | { |
541 | struct cryptd_ablkcipher *cryptd_tfm; | 632 | struct cryptd_ablkcipher *cryptd_tfm; |
@@ -612,6 +703,7 @@ static struct crypto_alg ablk_rfc3686_ctr_alg = { | |||
612 | }, | 703 | }, |
613 | }; | 704 | }; |
614 | #endif | 705 | #endif |
706 | #endif | ||
615 | 707 | ||
616 | #ifdef HAS_LRW | 708 | #ifdef HAS_LRW |
617 | static int ablk_lrw_init(struct crypto_tfm *tfm) | 709 | static int ablk_lrw_init(struct crypto_tfm *tfm) |
@@ -730,6 +822,424 @@ static struct crypto_alg ablk_xts_alg = { | |||
730 | }; | 822 | }; |
731 | #endif | 823 | #endif |
732 | 824 | ||
825 | #ifdef CONFIG_X86_64 | ||
826 | static int rfc4106_init(struct crypto_tfm *tfm) | ||
827 | { | ||
828 | struct cryptd_aead *cryptd_tfm; | ||
829 | struct aesni_rfc4106_gcm_ctx *ctx = (struct aesni_rfc4106_gcm_ctx *) | ||
830 | PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); | ||
831 | cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0); | ||
832 | if (IS_ERR(cryptd_tfm)) | ||
833 | return PTR_ERR(cryptd_tfm); | ||
834 | ctx->cryptd_tfm = cryptd_tfm; | ||
835 | tfm->crt_aead.reqsize = sizeof(struct aead_request) | ||
836 | + crypto_aead_reqsize(&cryptd_tfm->base); | ||
837 | return 0; | ||
838 | } | ||
839 | |||
840 | static void rfc4106_exit(struct crypto_tfm *tfm) | ||
841 | { | ||
842 | struct aesni_rfc4106_gcm_ctx *ctx = | ||
843 | (struct aesni_rfc4106_gcm_ctx *) | ||
844 | PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); | ||
845 | if (!IS_ERR(ctx->cryptd_tfm)) | ||
846 | cryptd_free_aead(ctx->cryptd_tfm); | ||
847 | return; | ||
848 | } | ||
849 | |||
850 | static void | ||
851 | rfc4106_set_hash_subkey_done(struct crypto_async_request *req, int err) | ||
852 | { | ||
853 | struct aesni_gcm_set_hash_subkey_result *result = req->data; | ||
854 | |||
855 | if (err == -EINPROGRESS) | ||
856 | return; | ||
857 | result->err = err; | ||
858 | complete(&result->completion); | ||
859 | } | ||
860 | |||
861 | static int | ||
862 | rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) | ||
863 | { | ||
864 | struct crypto_ablkcipher *ctr_tfm; | ||
865 | struct ablkcipher_request *req; | ||
866 | int ret = -EINVAL; | ||
867 | struct aesni_hash_subkey_req_data *req_data; | ||
868 | |||
869 | ctr_tfm = crypto_alloc_ablkcipher("ctr(aes)", 0, 0); | ||
870 | if (IS_ERR(ctr_tfm)) | ||
871 | return PTR_ERR(ctr_tfm); | ||
872 | |||
873 | crypto_ablkcipher_clear_flags(ctr_tfm, ~0); | ||
874 | |||
875 | ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len); | ||
876 | if (ret) { | ||
877 | crypto_free_ablkcipher(ctr_tfm); | ||
878 | return ret; | ||
879 | } | ||
880 | |||
881 | req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL); | ||
882 | if (!req) { | ||
883 | crypto_free_ablkcipher(ctr_tfm); | ||
884 | return -EINVAL; | ||
885 | } | ||
886 | |||
887 | req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); | ||
888 | if (!req_data) { | ||
889 | crypto_free_ablkcipher(ctr_tfm); | ||
890 | return -ENOMEM; | ||
891 | } | ||
892 | memset(req_data->iv, 0, sizeof(req_data->iv)); | ||
893 | |||
894 | /* Clear the data in the hash sub key container to zero.*/ | ||
895 | /* We want to cipher all zeros to create the hash sub key. */ | ||
896 | memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE); | ||
897 | |||
898 | init_completion(&req_data->result.completion); | ||
899 | sg_init_one(&req_data->sg, hash_subkey, RFC4106_HASH_SUBKEY_SIZE); | ||
900 | ablkcipher_request_set_tfm(req, ctr_tfm); | ||
901 | ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | | ||
902 | CRYPTO_TFM_REQ_MAY_BACKLOG, | ||
903 | rfc4106_set_hash_subkey_done, | ||
904 | &req_data->result); | ||
905 | |||
906 | ablkcipher_request_set_crypt(req, &req_data->sg, | ||
907 | &req_data->sg, RFC4106_HASH_SUBKEY_SIZE, req_data->iv); | ||
908 | |||
909 | ret = crypto_ablkcipher_encrypt(req); | ||
910 | if (ret == -EINPROGRESS || ret == -EBUSY) { | ||
911 | ret = wait_for_completion_interruptible | ||
912 | (&req_data->result.completion); | ||
913 | if (!ret) | ||
914 | ret = req_data->result.err; | ||
915 | } | ||
916 | ablkcipher_request_free(req); | ||
917 | kfree(req_data); | ||
918 | crypto_free_ablkcipher(ctr_tfm); | ||
919 | return ret; | ||
920 | } | ||
921 | |||
922 | static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | ||
923 | unsigned int key_len) | ||
924 | { | ||
925 | int ret = 0; | ||
926 | struct crypto_tfm *tfm = crypto_aead_tfm(parent); | ||
927 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); | ||
928 | u8 *new_key_mem = NULL; | ||
929 | |||
930 | if (key_len < 4) { | ||
931 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
932 | return -EINVAL; | ||
933 | } | ||
934 | /*Account for 4 byte nonce at the end.*/ | ||
935 | key_len -= 4; | ||
936 | if (key_len != AES_KEYSIZE_128) { | ||
937 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
938 | return -EINVAL; | ||
939 | } | ||
940 | |||
941 | memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce)); | ||
942 | /*This must be on a 16 byte boundary!*/ | ||
943 | if ((unsigned long)(&(ctx->aes_key_expanded.key_enc[0])) % AESNI_ALIGN) | ||
944 | return -EINVAL; | ||
945 | |||
946 | if ((unsigned long)key % AESNI_ALIGN) { | ||
947 | /*key is not aligned: use an auxuliar aligned pointer*/ | ||
948 | new_key_mem = kmalloc(key_len+AESNI_ALIGN, GFP_KERNEL); | ||
949 | if (!new_key_mem) | ||
950 | return -ENOMEM; | ||
951 | |||
952 | new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN); | ||
953 | memcpy(new_key_mem, key, key_len); | ||
954 | key = new_key_mem; | ||
955 | } | ||
956 | |||
957 | if (!irq_fpu_usable()) | ||
958 | ret = crypto_aes_expand_key(&(ctx->aes_key_expanded), | ||
959 | key, key_len); | ||
960 | else { | ||
961 | kernel_fpu_begin(); | ||
962 | ret = aesni_set_key(&(ctx->aes_key_expanded), key, key_len); | ||
963 | kernel_fpu_end(); | ||
964 | } | ||
965 | /*This must be on a 16 byte boundary!*/ | ||
966 | if ((unsigned long)(&(ctx->hash_subkey[0])) % AESNI_ALIGN) { | ||
967 | ret = -EINVAL; | ||
968 | goto exit; | ||
969 | } | ||
970 | ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); | ||
971 | exit: | ||
972 | kfree(new_key_mem); | ||
973 | return ret; | ||
974 | } | ||
975 | |||
976 | /* This is the Integrity Check Value (aka the authentication tag length and can | ||
977 | * be 8, 12 or 16 bytes long. */ | ||
978 | static int rfc4106_set_authsize(struct crypto_aead *parent, | ||
979 | unsigned int authsize) | ||
980 | { | ||
981 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); | ||
982 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
983 | |||
984 | switch (authsize) { | ||
985 | case 8: | ||
986 | case 12: | ||
987 | case 16: | ||
988 | break; | ||
989 | default: | ||
990 | return -EINVAL; | ||
991 | } | ||
992 | crypto_aead_crt(parent)->authsize = authsize; | ||
993 | crypto_aead_crt(cryptd_child)->authsize = authsize; | ||
994 | return 0; | ||
995 | } | ||
996 | |||
997 | static int rfc4106_encrypt(struct aead_request *req) | ||
998 | { | ||
999 | int ret; | ||
1000 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1001 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1002 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
1003 | |||
1004 | if (!irq_fpu_usable()) { | ||
1005 | struct aead_request *cryptd_req = | ||
1006 | (struct aead_request *) aead_request_ctx(req); | ||
1007 | memcpy(cryptd_req, req, sizeof(*req)); | ||
1008 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
1009 | return crypto_aead_encrypt(cryptd_req); | ||
1010 | } else { | ||
1011 | kernel_fpu_begin(); | ||
1012 | ret = cryptd_child->base.crt_aead.encrypt(req); | ||
1013 | kernel_fpu_end(); | ||
1014 | return ret; | ||
1015 | } | ||
1016 | } | ||
1017 | |||
1018 | static int rfc4106_decrypt(struct aead_request *req) | ||
1019 | { | ||
1020 | int ret; | ||
1021 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1022 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1023 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
1024 | |||
1025 | if (!irq_fpu_usable()) { | ||
1026 | struct aead_request *cryptd_req = | ||
1027 | (struct aead_request *) aead_request_ctx(req); | ||
1028 | memcpy(cryptd_req, req, sizeof(*req)); | ||
1029 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
1030 | return crypto_aead_decrypt(cryptd_req); | ||
1031 | } else { | ||
1032 | kernel_fpu_begin(); | ||
1033 | ret = cryptd_child->base.crt_aead.decrypt(req); | ||
1034 | kernel_fpu_end(); | ||
1035 | return ret; | ||
1036 | } | ||
1037 | } | ||
1038 | |||
1039 | static struct crypto_alg rfc4106_alg = { | ||
1040 | .cra_name = "rfc4106(gcm(aes))", | ||
1041 | .cra_driver_name = "rfc4106-gcm-aesni", | ||
1042 | .cra_priority = 400, | ||
1043 | .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, | ||
1044 | .cra_blocksize = 1, | ||
1045 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, | ||
1046 | .cra_alignmask = 0, | ||
1047 | .cra_type = &crypto_nivaead_type, | ||
1048 | .cra_module = THIS_MODULE, | ||
1049 | .cra_list = LIST_HEAD_INIT(rfc4106_alg.cra_list), | ||
1050 | .cra_init = rfc4106_init, | ||
1051 | .cra_exit = rfc4106_exit, | ||
1052 | .cra_u = { | ||
1053 | .aead = { | ||
1054 | .setkey = rfc4106_set_key, | ||
1055 | .setauthsize = rfc4106_set_authsize, | ||
1056 | .encrypt = rfc4106_encrypt, | ||
1057 | .decrypt = rfc4106_decrypt, | ||
1058 | .geniv = "seqiv", | ||
1059 | .ivsize = 8, | ||
1060 | .maxauthsize = 16, | ||
1061 | }, | ||
1062 | }, | ||
1063 | }; | ||
1064 | |||
1065 | static int __driver_rfc4106_encrypt(struct aead_request *req) | ||
1066 | { | ||
1067 | u8 one_entry_in_sg = 0; | ||
1068 | u8 *src, *dst, *assoc; | ||
1069 | __be32 counter = cpu_to_be32(1); | ||
1070 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1071 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1072 | void *aes_ctx = &(ctx->aes_key_expanded); | ||
1073 | unsigned long auth_tag_len = crypto_aead_authsize(tfm); | ||
1074 | u8 iv_tab[16+AESNI_ALIGN]; | ||
1075 | u8* iv = (u8 *) PTR_ALIGN((u8 *)iv_tab, AESNI_ALIGN); | ||
1076 | struct scatter_walk src_sg_walk; | ||
1077 | struct scatter_walk assoc_sg_walk; | ||
1078 | struct scatter_walk dst_sg_walk; | ||
1079 | unsigned int i; | ||
1080 | |||
1081 | /* Assuming we are supporting rfc4106 64-bit extended */ | ||
1082 | /* sequence numbers We need to have the AAD length equal */ | ||
1083 | /* to 8 or 12 bytes */ | ||
1084 | if (unlikely(req->assoclen != 8 && req->assoclen != 12)) | ||
1085 | return -EINVAL; | ||
1086 | /* IV below built */ | ||
1087 | for (i = 0; i < 4; i++) | ||
1088 | *(iv+i) = ctx->nonce[i]; | ||
1089 | for (i = 0; i < 8; i++) | ||
1090 | *(iv+4+i) = req->iv[i]; | ||
1091 | *((__be32 *)(iv+12)) = counter; | ||
1092 | |||
1093 | if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) { | ||
1094 | one_entry_in_sg = 1; | ||
1095 | scatterwalk_start(&src_sg_walk, req->src); | ||
1096 | scatterwalk_start(&assoc_sg_walk, req->assoc); | ||
1097 | src = scatterwalk_map(&src_sg_walk, 0); | ||
1098 | assoc = scatterwalk_map(&assoc_sg_walk, 0); | ||
1099 | dst = src; | ||
1100 | if (unlikely(req->src != req->dst)) { | ||
1101 | scatterwalk_start(&dst_sg_walk, req->dst); | ||
1102 | dst = scatterwalk_map(&dst_sg_walk, 0); | ||
1103 | } | ||
1104 | |||
1105 | } else { | ||
1106 | /* Allocate memory for src, dst, assoc */ | ||
1107 | src = kmalloc(req->cryptlen + auth_tag_len + req->assoclen, | ||
1108 | GFP_ATOMIC); | ||
1109 | if (unlikely(!src)) | ||
1110 | return -ENOMEM; | ||
1111 | assoc = (src + req->cryptlen + auth_tag_len); | ||
1112 | scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); | ||
1113 | scatterwalk_map_and_copy(assoc, req->assoc, 0, | ||
1114 | req->assoclen, 0); | ||
1115 | dst = src; | ||
1116 | } | ||
1117 | |||
1118 | aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, | ||
1119 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst | ||
1120 | + ((unsigned long)req->cryptlen), auth_tag_len); | ||
1121 | |||
1122 | /* The authTag (aka the Integrity Check Value) needs to be written | ||
1123 | * back to the packet. */ | ||
1124 | if (one_entry_in_sg) { | ||
1125 | if (unlikely(req->src != req->dst)) { | ||
1126 | scatterwalk_unmap(dst, 0); | ||
1127 | scatterwalk_done(&dst_sg_walk, 0, 0); | ||
1128 | } | ||
1129 | scatterwalk_unmap(src, 0); | ||
1130 | scatterwalk_unmap(assoc, 0); | ||
1131 | scatterwalk_done(&src_sg_walk, 0, 0); | ||
1132 | scatterwalk_done(&assoc_sg_walk, 0, 0); | ||
1133 | } else { | ||
1134 | scatterwalk_map_and_copy(dst, req->dst, 0, | ||
1135 | req->cryptlen + auth_tag_len, 1); | ||
1136 | kfree(src); | ||
1137 | } | ||
1138 | return 0; | ||
1139 | } | ||
1140 | |||
1141 | static int __driver_rfc4106_decrypt(struct aead_request *req) | ||
1142 | { | ||
1143 | u8 one_entry_in_sg = 0; | ||
1144 | u8 *src, *dst, *assoc; | ||
1145 | unsigned long tempCipherLen = 0; | ||
1146 | __be32 counter = cpu_to_be32(1); | ||
1147 | int retval = 0; | ||
1148 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1149 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1150 | void *aes_ctx = &(ctx->aes_key_expanded); | ||
1151 | unsigned long auth_tag_len = crypto_aead_authsize(tfm); | ||
1152 | u8 iv_and_authTag[32+AESNI_ALIGN]; | ||
1153 | u8 *iv = (u8 *) PTR_ALIGN((u8 *)iv_and_authTag, AESNI_ALIGN); | ||
1154 | u8 *authTag = iv + 16; | ||
1155 | struct scatter_walk src_sg_walk; | ||
1156 | struct scatter_walk assoc_sg_walk; | ||
1157 | struct scatter_walk dst_sg_walk; | ||
1158 | unsigned int i; | ||
1159 | |||
1160 | if (unlikely((req->cryptlen < auth_tag_len) || | ||
1161 | (req->assoclen != 8 && req->assoclen != 12))) | ||
1162 | return -EINVAL; | ||
1163 | /* Assuming we are supporting rfc4106 64-bit extended */ | ||
1164 | /* sequence numbers We need to have the AAD length */ | ||
1165 | /* equal to 8 or 12 bytes */ | ||
1166 | |||
1167 | tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len); | ||
1168 | /* IV below built */ | ||
1169 | for (i = 0; i < 4; i++) | ||
1170 | *(iv+i) = ctx->nonce[i]; | ||
1171 | for (i = 0; i < 8; i++) | ||
1172 | *(iv+4+i) = req->iv[i]; | ||
1173 | *((__be32 *)(iv+12)) = counter; | ||
1174 | |||
1175 | if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) { | ||
1176 | one_entry_in_sg = 1; | ||
1177 | scatterwalk_start(&src_sg_walk, req->src); | ||
1178 | scatterwalk_start(&assoc_sg_walk, req->assoc); | ||
1179 | src = scatterwalk_map(&src_sg_walk, 0); | ||
1180 | assoc = scatterwalk_map(&assoc_sg_walk, 0); | ||
1181 | dst = src; | ||
1182 | if (unlikely(req->src != req->dst)) { | ||
1183 | scatterwalk_start(&dst_sg_walk, req->dst); | ||
1184 | dst = scatterwalk_map(&dst_sg_walk, 0); | ||
1185 | } | ||
1186 | |||
1187 | } else { | ||
1188 | /* Allocate memory for src, dst, assoc */ | ||
1189 | src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC); | ||
1190 | if (!src) | ||
1191 | return -ENOMEM; | ||
1192 | assoc = (src + req->cryptlen + auth_tag_len); | ||
1193 | scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); | ||
1194 | scatterwalk_map_and_copy(assoc, req->assoc, 0, | ||
1195 | req->assoclen, 0); | ||
1196 | dst = src; | ||
1197 | } | ||
1198 | |||
1199 | aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv, | ||
1200 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, | ||
1201 | authTag, auth_tag_len); | ||
1202 | |||
1203 | /* Compare generated tag with passed in tag. */ | ||
1204 | retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ? | ||
1205 | -EBADMSG : 0; | ||
1206 | |||
1207 | if (one_entry_in_sg) { | ||
1208 | if (unlikely(req->src != req->dst)) { | ||
1209 | scatterwalk_unmap(dst, 0); | ||
1210 | scatterwalk_done(&dst_sg_walk, 0, 0); | ||
1211 | } | ||
1212 | scatterwalk_unmap(src, 0); | ||
1213 | scatterwalk_unmap(assoc, 0); | ||
1214 | scatterwalk_done(&src_sg_walk, 0, 0); | ||
1215 | scatterwalk_done(&assoc_sg_walk, 0, 0); | ||
1216 | } else { | ||
1217 | scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1); | ||
1218 | kfree(src); | ||
1219 | } | ||
1220 | return retval; | ||
1221 | } | ||
1222 | |||
1223 | static struct crypto_alg __rfc4106_alg = { | ||
1224 | .cra_name = "__gcm-aes-aesni", | ||
1225 | .cra_driver_name = "__driver-gcm-aes-aesni", | ||
1226 | .cra_priority = 0, | ||
1227 | .cra_flags = CRYPTO_ALG_TYPE_AEAD, | ||
1228 | .cra_blocksize = 1, | ||
1229 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, | ||
1230 | .cra_alignmask = 0, | ||
1231 | .cra_type = &crypto_aead_type, | ||
1232 | .cra_module = THIS_MODULE, | ||
1233 | .cra_list = LIST_HEAD_INIT(__rfc4106_alg.cra_list), | ||
1234 | .cra_u = { | ||
1235 | .aead = { | ||
1236 | .encrypt = __driver_rfc4106_encrypt, | ||
1237 | .decrypt = __driver_rfc4106_decrypt, | ||
1238 | }, | ||
1239 | }, | ||
1240 | }; | ||
1241 | #endif | ||
1242 | |||
733 | static int __init aesni_init(void) | 1243 | static int __init aesni_init(void) |
734 | { | 1244 | { |
735 | int err; | 1245 | int err; |
@@ -738,6 +1248,7 @@ static int __init aesni_init(void) | |||
738 | printk(KERN_INFO "Intel AES-NI instructions are not detected.\n"); | 1248 | printk(KERN_INFO "Intel AES-NI instructions are not detected.\n"); |
739 | return -ENODEV; | 1249 | return -ENODEV; |
740 | } | 1250 | } |
1251 | |||
741 | if ((err = crypto_register_alg(&aesni_alg))) | 1252 | if ((err = crypto_register_alg(&aesni_alg))) |
742 | goto aes_err; | 1253 | goto aes_err; |
743 | if ((err = crypto_register_alg(&__aesni_alg))) | 1254 | if ((err = crypto_register_alg(&__aesni_alg))) |
@@ -746,18 +1257,24 @@ static int __init aesni_init(void) | |||
746 | goto blk_ecb_err; | 1257 | goto blk_ecb_err; |
747 | if ((err = crypto_register_alg(&blk_cbc_alg))) | 1258 | if ((err = crypto_register_alg(&blk_cbc_alg))) |
748 | goto blk_cbc_err; | 1259 | goto blk_cbc_err; |
749 | if ((err = crypto_register_alg(&blk_ctr_alg))) | ||
750 | goto blk_ctr_err; | ||
751 | if ((err = crypto_register_alg(&ablk_ecb_alg))) | 1260 | if ((err = crypto_register_alg(&ablk_ecb_alg))) |
752 | goto ablk_ecb_err; | 1261 | goto ablk_ecb_err; |
753 | if ((err = crypto_register_alg(&ablk_cbc_alg))) | 1262 | if ((err = crypto_register_alg(&ablk_cbc_alg))) |
754 | goto ablk_cbc_err; | 1263 | goto ablk_cbc_err; |
1264 | #ifdef CONFIG_X86_64 | ||
1265 | if ((err = crypto_register_alg(&blk_ctr_alg))) | ||
1266 | goto blk_ctr_err; | ||
755 | if ((err = crypto_register_alg(&ablk_ctr_alg))) | 1267 | if ((err = crypto_register_alg(&ablk_ctr_alg))) |
756 | goto ablk_ctr_err; | 1268 | goto ablk_ctr_err; |
1269 | if ((err = crypto_register_alg(&__rfc4106_alg))) | ||
1270 | goto __aead_gcm_err; | ||
1271 | if ((err = crypto_register_alg(&rfc4106_alg))) | ||
1272 | goto aead_gcm_err; | ||
757 | #ifdef HAS_CTR | 1273 | #ifdef HAS_CTR |
758 | if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) | 1274 | if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) |
759 | goto ablk_rfc3686_ctr_err; | 1275 | goto ablk_rfc3686_ctr_err; |
760 | #endif | 1276 | #endif |
1277 | #endif | ||
761 | #ifdef HAS_LRW | 1278 | #ifdef HAS_LRW |
762 | if ((err = crypto_register_alg(&ablk_lrw_alg))) | 1279 | if ((err = crypto_register_alg(&ablk_lrw_alg))) |
763 | goto ablk_lrw_err; | 1280 | goto ablk_lrw_err; |
@@ -770,7 +1287,6 @@ static int __init aesni_init(void) | |||
770 | if ((err = crypto_register_alg(&ablk_xts_alg))) | 1287 | if ((err = crypto_register_alg(&ablk_xts_alg))) |
771 | goto ablk_xts_err; | 1288 | goto ablk_xts_err; |
772 | #endif | 1289 | #endif |
773 | |||
774 | return err; | 1290 | return err; |
775 | 1291 | ||
776 | #ifdef HAS_XTS | 1292 | #ifdef HAS_XTS |
@@ -784,18 +1300,24 @@ ablk_pcbc_err: | |||
784 | crypto_unregister_alg(&ablk_lrw_alg); | 1300 | crypto_unregister_alg(&ablk_lrw_alg); |
785 | ablk_lrw_err: | 1301 | ablk_lrw_err: |
786 | #endif | 1302 | #endif |
1303 | #ifdef CONFIG_X86_64 | ||
787 | #ifdef HAS_CTR | 1304 | #ifdef HAS_CTR |
788 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); | 1305 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); |
789 | ablk_rfc3686_ctr_err: | 1306 | ablk_rfc3686_ctr_err: |
790 | #endif | 1307 | #endif |
1308 | crypto_unregister_alg(&rfc4106_alg); | ||
1309 | aead_gcm_err: | ||
1310 | crypto_unregister_alg(&__rfc4106_alg); | ||
1311 | __aead_gcm_err: | ||
791 | crypto_unregister_alg(&ablk_ctr_alg); | 1312 | crypto_unregister_alg(&ablk_ctr_alg); |
792 | ablk_ctr_err: | 1313 | ablk_ctr_err: |
1314 | crypto_unregister_alg(&blk_ctr_alg); | ||
1315 | blk_ctr_err: | ||
1316 | #endif | ||
793 | crypto_unregister_alg(&ablk_cbc_alg); | 1317 | crypto_unregister_alg(&ablk_cbc_alg); |
794 | ablk_cbc_err: | 1318 | ablk_cbc_err: |
795 | crypto_unregister_alg(&ablk_ecb_alg); | 1319 | crypto_unregister_alg(&ablk_ecb_alg); |
796 | ablk_ecb_err: | 1320 | ablk_ecb_err: |
797 | crypto_unregister_alg(&blk_ctr_alg); | ||
798 | blk_ctr_err: | ||
799 | crypto_unregister_alg(&blk_cbc_alg); | 1321 | crypto_unregister_alg(&blk_cbc_alg); |
800 | blk_cbc_err: | 1322 | blk_cbc_err: |
801 | crypto_unregister_alg(&blk_ecb_alg); | 1323 | crypto_unregister_alg(&blk_ecb_alg); |
@@ -818,13 +1340,17 @@ static void __exit aesni_exit(void) | |||
818 | #ifdef HAS_LRW | 1340 | #ifdef HAS_LRW |
819 | crypto_unregister_alg(&ablk_lrw_alg); | 1341 | crypto_unregister_alg(&ablk_lrw_alg); |
820 | #endif | 1342 | #endif |
1343 | #ifdef CONFIG_X86_64 | ||
821 | #ifdef HAS_CTR | 1344 | #ifdef HAS_CTR |
822 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); | 1345 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); |
823 | #endif | 1346 | #endif |
1347 | crypto_unregister_alg(&rfc4106_alg); | ||
1348 | crypto_unregister_alg(&__rfc4106_alg); | ||
824 | crypto_unregister_alg(&ablk_ctr_alg); | 1349 | crypto_unregister_alg(&ablk_ctr_alg); |
1350 | crypto_unregister_alg(&blk_ctr_alg); | ||
1351 | #endif | ||
825 | crypto_unregister_alg(&ablk_cbc_alg); | 1352 | crypto_unregister_alg(&ablk_cbc_alg); |
826 | crypto_unregister_alg(&ablk_ecb_alg); | 1353 | crypto_unregister_alg(&ablk_ecb_alg); |
827 | crypto_unregister_alg(&blk_ctr_alg); | ||
828 | crypto_unregister_alg(&blk_cbc_alg); | 1354 | crypto_unregister_alg(&blk_cbc_alg); |
829 | crypto_unregister_alg(&blk_ecb_alg); | 1355 | crypto_unregister_alg(&blk_ecb_alg); |
830 | crypto_unregister_alg(&__aesni_alg); | 1356 | crypto_unregister_alg(&__aesni_alg); |
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 3b62ab56c7a0..5e1a2eef3e7c 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h | |||
@@ -32,11 +32,7 @@ | |||
32 | #define BOOT_HEAP_SIZE 0x400000 | 32 | #define BOOT_HEAP_SIZE 0x400000 |
33 | #else /* !CONFIG_KERNEL_BZIP2 */ | 33 | #else /* !CONFIG_KERNEL_BZIP2 */ |
34 | 34 | ||
35 | #ifdef CONFIG_X86_64 | 35 | #define BOOT_HEAP_SIZE 0x8000 |
36 | #define BOOT_HEAP_SIZE 0x7000 | ||
37 | #else | ||
38 | #define BOOT_HEAP_SIZE 0x4000 | ||
39 | #endif | ||
40 | 36 | ||
41 | #endif /* !CONFIG_KERNEL_BZIP2 */ | 37 | #endif /* !CONFIG_KERNEL_BZIP2 */ |
42 | 38 | ||
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index b81002f23614..078ad0caefc6 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h | |||
@@ -94,7 +94,7 @@ static inline void hw_breakpoint_disable(void) | |||
94 | 94 | ||
95 | static inline int hw_breakpoint_active(void) | 95 | static inline int hw_breakpoint_active(void) |
96 | { | 96 | { |
97 | return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; | 97 | return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; |
98 | } | 98 | } |
99 | 99 | ||
100 | extern void aout_dump_debugregs(struct user *dump); | 100 | extern void aout_dump_debugregs(struct user *dump); |
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index ff2546ce7178..7a15153c675d 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h | |||
@@ -20,6 +20,9 @@ | |||
20 | #ifndef _ASM_X86_HYPERVISOR_H | 20 | #ifndef _ASM_X86_HYPERVISOR_H |
21 | #define _ASM_X86_HYPERVISOR_H | 21 | #define _ASM_X86_HYPERVISOR_H |
22 | 22 | ||
23 | #include <asm/kvm_para.h> | ||
24 | #include <asm/xen/hypervisor.h> | ||
25 | |||
23 | extern void init_hypervisor(struct cpuinfo_x86 *c); | 26 | extern void init_hypervisor(struct cpuinfo_x86 *c); |
24 | extern void init_hypervisor_platform(void); | 27 | extern void init_hypervisor_platform(void); |
25 | 28 | ||
@@ -47,4 +50,13 @@ extern const struct hypervisor_x86 x86_hyper_vmware; | |||
47 | extern const struct hypervisor_x86 x86_hyper_ms_hyperv; | 50 | extern const struct hypervisor_x86 x86_hyper_ms_hyperv; |
48 | extern const struct hypervisor_x86 x86_hyper_xen_hvm; | 51 | extern const struct hypervisor_x86 x86_hyper_xen_hvm; |
49 | 52 | ||
53 | static inline bool hypervisor_x2apic_available(void) | ||
54 | { | ||
55 | if (kvm_para_available()) | ||
56 | return true; | ||
57 | if (xen_x2apic_para_available()) | ||
58 | return true; | ||
59 | return false; | ||
60 | } | ||
61 | |||
50 | #endif | 62 | #endif |
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index ba870bb6dd8e..c704b38c57a2 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -10,6 +10,9 @@ | |||
10 | #include <asm/apicdef.h> | 10 | #include <asm/apicdef.h> |
11 | #include <asm/irq_vectors.h> | 11 | #include <asm/irq_vectors.h> |
12 | 12 | ||
13 | /* Even though we don't support this, supply it to appease OF */ | ||
14 | static inline void irq_dispose_mapping(unsigned int virq) { } | ||
15 | |||
13 | static inline int irq_canonicalize(int irq) | 16 | static inline int irq_canonicalize(int irq) |
14 | { | 17 | { |
15 | return ((irq == 2) ? 9 : irq); | 18 | return ((irq == 2) ? 9 : irq); |
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index f23eb2528464..ca242d35e873 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h | |||
@@ -18,7 +18,6 @@ enum die_val { | |||
18 | DIE_TRAP, | 18 | DIE_TRAP, |
19 | DIE_GPF, | 19 | DIE_GPF, |
20 | DIE_CALL, | 20 | DIE_CALL, |
21 | DIE_NMI_IPI, | ||
22 | DIE_PAGE_FAULT, | 21 | DIE_PAGE_FAULT, |
23 | DIE_NMIUNKNOWN, | 22 | DIE_NMIUNKNOWN, |
24 | }; | 23 | }; |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b36c6b3fe144..8e37deb1eb38 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -15,6 +15,14 @@ | |||
15 | 15 | ||
16 | struct x86_emulate_ctxt; | 16 | struct x86_emulate_ctxt; |
17 | 17 | ||
18 | struct x86_exception { | ||
19 | u8 vector; | ||
20 | bool error_code_valid; | ||
21 | u16 error_code; | ||
22 | bool nested_page_fault; | ||
23 | u64 address; /* cr2 or nested page fault gpa */ | ||
24 | }; | ||
25 | |||
18 | /* | 26 | /* |
19 | * x86_emulate_ops: | 27 | * x86_emulate_ops: |
20 | * | 28 | * |
@@ -64,7 +72,8 @@ struct x86_emulate_ops { | |||
64 | * @bytes: [IN ] Number of bytes to read from memory. | 72 | * @bytes: [IN ] Number of bytes to read from memory. |
65 | */ | 73 | */ |
66 | int (*read_std)(unsigned long addr, void *val, | 74 | int (*read_std)(unsigned long addr, void *val, |
67 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); | 75 | unsigned int bytes, struct kvm_vcpu *vcpu, |
76 | struct x86_exception *fault); | ||
68 | 77 | ||
69 | /* | 78 | /* |
70 | * write_std: Write bytes of standard (non-emulated/special) memory. | 79 | * write_std: Write bytes of standard (non-emulated/special) memory. |
@@ -74,7 +83,8 @@ struct x86_emulate_ops { | |||
74 | * @bytes: [IN ] Number of bytes to write to memory. | 83 | * @bytes: [IN ] Number of bytes to write to memory. |
75 | */ | 84 | */ |
76 | int (*write_std)(unsigned long addr, void *val, | 85 | int (*write_std)(unsigned long addr, void *val, |
77 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); | 86 | unsigned int bytes, struct kvm_vcpu *vcpu, |
87 | struct x86_exception *fault); | ||
78 | /* | 88 | /* |
79 | * fetch: Read bytes of standard (non-emulated/special) memory. | 89 | * fetch: Read bytes of standard (non-emulated/special) memory. |
80 | * Used for instruction fetch. | 90 | * Used for instruction fetch. |
@@ -83,7 +93,8 @@ struct x86_emulate_ops { | |||
83 | * @bytes: [IN ] Number of bytes to read from memory. | 93 | * @bytes: [IN ] Number of bytes to read from memory. |
84 | */ | 94 | */ |
85 | int (*fetch)(unsigned long addr, void *val, | 95 | int (*fetch)(unsigned long addr, void *val, |
86 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); | 96 | unsigned int bytes, struct kvm_vcpu *vcpu, |
97 | struct x86_exception *fault); | ||
87 | 98 | ||
88 | /* | 99 | /* |
89 | * read_emulated: Read bytes from emulated/special memory area. | 100 | * read_emulated: Read bytes from emulated/special memory area. |
@@ -94,7 +105,7 @@ struct x86_emulate_ops { | |||
94 | int (*read_emulated)(unsigned long addr, | 105 | int (*read_emulated)(unsigned long addr, |
95 | void *val, | 106 | void *val, |
96 | unsigned int bytes, | 107 | unsigned int bytes, |
97 | unsigned int *error, | 108 | struct x86_exception *fault, |
98 | struct kvm_vcpu *vcpu); | 109 | struct kvm_vcpu *vcpu); |
99 | 110 | ||
100 | /* | 111 | /* |
@@ -107,7 +118,7 @@ struct x86_emulate_ops { | |||
107 | int (*write_emulated)(unsigned long addr, | 118 | int (*write_emulated)(unsigned long addr, |
108 | const void *val, | 119 | const void *val, |
109 | unsigned int bytes, | 120 | unsigned int bytes, |
110 | unsigned int *error, | 121 | struct x86_exception *fault, |
111 | struct kvm_vcpu *vcpu); | 122 | struct kvm_vcpu *vcpu); |
112 | 123 | ||
113 | /* | 124 | /* |
@@ -122,7 +133,7 @@ struct x86_emulate_ops { | |||
122 | const void *old, | 133 | const void *old, |
123 | const void *new, | 134 | const void *new, |
124 | unsigned int bytes, | 135 | unsigned int bytes, |
125 | unsigned int *error, | 136 | struct x86_exception *fault, |
126 | struct kvm_vcpu *vcpu); | 137 | struct kvm_vcpu *vcpu); |
127 | 138 | ||
128 | int (*pio_in_emulated)(int size, unsigned short port, void *val, | 139 | int (*pio_in_emulated)(int size, unsigned short port, void *val, |
@@ -159,7 +170,10 @@ struct operand { | |||
159 | }; | 170 | }; |
160 | union { | 171 | union { |
161 | unsigned long *reg; | 172 | unsigned long *reg; |
162 | unsigned long mem; | 173 | struct segmented_address { |
174 | ulong ea; | ||
175 | unsigned seg; | ||
176 | } mem; | ||
163 | } addr; | 177 | } addr; |
164 | union { | 178 | union { |
165 | unsigned long val; | 179 | unsigned long val; |
@@ -226,9 +240,8 @@ struct x86_emulate_ctxt { | |||
226 | 240 | ||
227 | bool perm_ok; /* do not check permissions if true */ | 241 | bool perm_ok; /* do not check permissions if true */ |
228 | 242 | ||
229 | int exception; /* exception that happens during emulation or -1 */ | 243 | bool have_exception; |
230 | u32 error_code; /* error code for exception */ | 244 | struct x86_exception exception; |
231 | bool error_code_valid; | ||
232 | 245 | ||
233 | /* decode cache */ | 246 | /* decode cache */ |
234 | struct decode_cache decode; | 247 | struct decode_cache decode; |
@@ -252,7 +265,7 @@ struct x86_emulate_ctxt { | |||
252 | #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 | 265 | #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 |
253 | #endif | 266 | #endif |
254 | 267 | ||
255 | int x86_decode_insn(struct x86_emulate_ctxt *ctxt); | 268 | int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); |
256 | #define EMULATION_FAILED -1 | 269 | #define EMULATION_FAILED -1 |
257 | #define EMULATION_OK 0 | 270 | #define EMULATION_OK 0 |
258 | #define EMULATION_RESTART 1 | 271 | #define EMULATION_RESTART 1 |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f702f82aa1eb..ffd7f8d29187 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -83,11 +83,14 @@ | |||
83 | #define KVM_NR_FIXED_MTRR_REGION 88 | 83 | #define KVM_NR_FIXED_MTRR_REGION 88 |
84 | #define KVM_NR_VAR_MTRR 8 | 84 | #define KVM_NR_VAR_MTRR 8 |
85 | 85 | ||
86 | #define ASYNC_PF_PER_VCPU 64 | ||
87 | |||
86 | extern spinlock_t kvm_lock; | 88 | extern spinlock_t kvm_lock; |
87 | extern struct list_head vm_list; | 89 | extern struct list_head vm_list; |
88 | 90 | ||
89 | struct kvm_vcpu; | 91 | struct kvm_vcpu; |
90 | struct kvm; | 92 | struct kvm; |
93 | struct kvm_async_pf; | ||
91 | 94 | ||
92 | enum kvm_reg { | 95 | enum kvm_reg { |
93 | VCPU_REGS_RAX = 0, | 96 | VCPU_REGS_RAX = 0, |
@@ -114,6 +117,7 @@ enum kvm_reg { | |||
114 | 117 | ||
115 | enum kvm_reg_ex { | 118 | enum kvm_reg_ex { |
116 | VCPU_EXREG_PDPTR = NR_VCPU_REGS, | 119 | VCPU_EXREG_PDPTR = NR_VCPU_REGS, |
120 | VCPU_EXREG_CR3, | ||
117 | }; | 121 | }; |
118 | 122 | ||
119 | enum { | 123 | enum { |
@@ -238,16 +242,18 @@ struct kvm_mmu { | |||
238 | void (*new_cr3)(struct kvm_vcpu *vcpu); | 242 | void (*new_cr3)(struct kvm_vcpu *vcpu); |
239 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); | 243 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); |
240 | unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); | 244 | unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); |
241 | int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); | 245 | int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, |
242 | void (*inject_page_fault)(struct kvm_vcpu *vcpu); | 246 | bool prefault); |
247 | void (*inject_page_fault)(struct kvm_vcpu *vcpu, | ||
248 | struct x86_exception *fault); | ||
243 | void (*free)(struct kvm_vcpu *vcpu); | 249 | void (*free)(struct kvm_vcpu *vcpu); |
244 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, | 250 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, |
245 | u32 *error); | 251 | struct x86_exception *exception); |
246 | gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); | 252 | gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); |
247 | void (*prefetch_page)(struct kvm_vcpu *vcpu, | 253 | void (*prefetch_page)(struct kvm_vcpu *vcpu, |
248 | struct kvm_mmu_page *page); | 254 | struct kvm_mmu_page *page); |
249 | int (*sync_page)(struct kvm_vcpu *vcpu, | 255 | int (*sync_page)(struct kvm_vcpu *vcpu, |
250 | struct kvm_mmu_page *sp, bool clear_unsync); | 256 | struct kvm_mmu_page *sp); |
251 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); | 257 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); |
252 | hpa_t root_hpa; | 258 | hpa_t root_hpa; |
253 | int root_level; | 259 | int root_level; |
@@ -315,16 +321,6 @@ struct kvm_vcpu_arch { | |||
315 | */ | 321 | */ |
316 | struct kvm_mmu *walk_mmu; | 322 | struct kvm_mmu *walk_mmu; |
317 | 323 | ||
318 | /* | ||
319 | * This struct is filled with the necessary information to propagate a | ||
320 | * page fault into the guest | ||
321 | */ | ||
322 | struct { | ||
323 | u64 address; | ||
324 | unsigned error_code; | ||
325 | bool nested; | ||
326 | } fault; | ||
327 | |||
328 | /* only needed in kvm_pv_mmu_op() path, but it's hot so | 324 | /* only needed in kvm_pv_mmu_op() path, but it's hot so |
329 | * put it here to avoid allocation */ | 325 | * put it here to avoid allocation */ |
330 | struct kvm_pv_mmu_op_buffer mmu_op_buffer; | 326 | struct kvm_pv_mmu_op_buffer mmu_op_buffer; |
@@ -412,6 +408,15 @@ struct kvm_vcpu_arch { | |||
412 | u64 hv_vapic; | 408 | u64 hv_vapic; |
413 | 409 | ||
414 | cpumask_var_t wbinvd_dirty_mask; | 410 | cpumask_var_t wbinvd_dirty_mask; |
411 | |||
412 | struct { | ||
413 | bool halted; | ||
414 | gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; | ||
415 | struct gfn_to_hva_cache data; | ||
416 | u64 msr_val; | ||
417 | u32 id; | ||
418 | bool send_user_only; | ||
419 | } apf; | ||
415 | }; | 420 | }; |
416 | 421 | ||
417 | struct kvm_arch { | 422 | struct kvm_arch { |
@@ -456,6 +461,10 @@ struct kvm_arch { | |||
456 | /* fields used by HYPER-V emulation */ | 461 | /* fields used by HYPER-V emulation */ |
457 | u64 hv_guest_os_id; | 462 | u64 hv_guest_os_id; |
458 | u64 hv_hypercall; | 463 | u64 hv_hypercall; |
464 | |||
465 | #ifdef CONFIG_KVM_MMU_AUDIT | ||
466 | int audit_point; | ||
467 | #endif | ||
459 | }; | 468 | }; |
460 | 469 | ||
461 | struct kvm_vm_stat { | 470 | struct kvm_vm_stat { |
@@ -529,6 +538,7 @@ struct kvm_x86_ops { | |||
529 | struct kvm_segment *var, int seg); | 538 | struct kvm_segment *var, int seg); |
530 | void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); | 539 | void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); |
531 | void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); | 540 | void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); |
541 | void (*decache_cr3)(struct kvm_vcpu *vcpu); | ||
532 | void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); | 542 | void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); |
533 | void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); | 543 | void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); |
534 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | 544 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
@@ -582,9 +592,17 @@ struct kvm_x86_ops { | |||
582 | 592 | ||
583 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); | 593 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); |
584 | 594 | ||
595 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); | ||
585 | const struct trace_print_flags *exit_reasons_str; | 596 | const struct trace_print_flags *exit_reasons_str; |
586 | }; | 597 | }; |
587 | 598 | ||
599 | struct kvm_arch_async_pf { | ||
600 | u32 token; | ||
601 | gfn_t gfn; | ||
602 | unsigned long cr3; | ||
603 | bool direct_map; | ||
604 | }; | ||
605 | |||
588 | extern struct kvm_x86_ops *kvm_x86_ops; | 606 | extern struct kvm_x86_ops *kvm_x86_ops; |
589 | 607 | ||
590 | int kvm_mmu_module_init(void); | 608 | int kvm_mmu_module_init(void); |
@@ -594,7 +612,6 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu); | |||
594 | int kvm_mmu_create(struct kvm_vcpu *vcpu); | 612 | int kvm_mmu_create(struct kvm_vcpu *vcpu); |
595 | int kvm_mmu_setup(struct kvm_vcpu *vcpu); | 613 | int kvm_mmu_setup(struct kvm_vcpu *vcpu); |
596 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); | 614 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); |
597 | void kvm_mmu_set_base_ptes(u64 base_pte); | ||
598 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 615 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
599 | u64 dirty_mask, u64 nx_mask, u64 x_mask); | 616 | u64 dirty_mask, u64 nx_mask, u64 x_mask); |
600 | 617 | ||
@@ -623,8 +640,15 @@ enum emulation_result { | |||
623 | #define EMULTYPE_NO_DECODE (1 << 0) | 640 | #define EMULTYPE_NO_DECODE (1 << 0) |
624 | #define EMULTYPE_TRAP_UD (1 << 1) | 641 | #define EMULTYPE_TRAP_UD (1 << 1) |
625 | #define EMULTYPE_SKIP (1 << 2) | 642 | #define EMULTYPE_SKIP (1 << 2) |
626 | int emulate_instruction(struct kvm_vcpu *vcpu, | 643 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, |
627 | unsigned long cr2, u16 error_code, int emulation_type); | 644 | int emulation_type, void *insn, int insn_len); |
645 | |||
646 | static inline int emulate_instruction(struct kvm_vcpu *vcpu, | ||
647 | int emulation_type) | ||
648 | { | ||
649 | return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); | ||
650 | } | ||
651 | |||
628 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 652 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
629 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 653 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
630 | 654 | ||
@@ -650,7 +674,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | |||
650 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 674 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
651 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 675 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
652 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); | 676 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
653 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); | 677 | int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); |
654 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); | 678 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); |
655 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); | 679 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); |
656 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); | 680 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); |
@@ -668,11 +692,11 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); | |||
668 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | 692 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); |
669 | void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); | 693 | void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
670 | void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | 694 | void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); |
671 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu); | 695 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); |
672 | int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | 696 | int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
673 | gfn_t gfn, void *data, int offset, int len, | 697 | gfn_t gfn, void *data, int offset, int len, |
674 | u32 access); | 698 | u32 access); |
675 | void kvm_propagate_fault(struct kvm_vcpu *vcpu); | 699 | void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); |
676 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); | 700 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); |
677 | 701 | ||
678 | int kvm_pic_set_irq(void *opaque, int irq, int level); | 702 | int kvm_pic_set_irq(void *opaque, int irq, int level); |
@@ -690,16 +714,21 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | |||
690 | int kvm_mmu_load(struct kvm_vcpu *vcpu); | 714 | int kvm_mmu_load(struct kvm_vcpu *vcpu); |
691 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); | 715 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); |
692 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); | 716 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); |
693 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); | 717 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, |
694 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); | 718 | struct x86_exception *exception); |
695 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); | 719 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, |
696 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); | 720 | struct x86_exception *exception); |
721 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, | ||
722 | struct x86_exception *exception); | ||
723 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, | ||
724 | struct x86_exception *exception); | ||
697 | 725 | ||
698 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); | 726 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); |
699 | 727 | ||
700 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu); | 728 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu); |
701 | 729 | ||
702 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code); | 730 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, |
731 | void *insn, int insn_len); | ||
703 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); | 732 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); |
704 | 733 | ||
705 | void kvm_enable_tdp(void); | 734 | void kvm_enable_tdp(void); |
@@ -766,20 +795,25 @@ enum { | |||
766 | #define HF_VINTR_MASK (1 << 2) | 795 | #define HF_VINTR_MASK (1 << 2) |
767 | #define HF_NMI_MASK (1 << 3) | 796 | #define HF_NMI_MASK (1 << 3) |
768 | #define HF_IRET_MASK (1 << 4) | 797 | #define HF_IRET_MASK (1 << 4) |
798 | #define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ | ||
769 | 799 | ||
770 | /* | 800 | /* |
771 | * Hardware virtualization extension instructions may fault if a | 801 | * Hardware virtualization extension instructions may fault if a |
772 | * reboot turns off virtualization while processes are running. | 802 | * reboot turns off virtualization while processes are running. |
773 | * Trap the fault and ignore the instruction if that happens. | 803 | * Trap the fault and ignore the instruction if that happens. |
774 | */ | 804 | */ |
775 | asmlinkage void kvm_handle_fault_on_reboot(void); | 805 | asmlinkage void kvm_spurious_fault(void); |
806 | extern bool kvm_rebooting; | ||
776 | 807 | ||
777 | #define __kvm_handle_fault_on_reboot(insn) \ | 808 | #define __kvm_handle_fault_on_reboot(insn) \ |
778 | "666: " insn "\n\t" \ | 809 | "666: " insn "\n\t" \ |
810 | "668: \n\t" \ | ||
779 | ".pushsection .fixup, \"ax\" \n" \ | 811 | ".pushsection .fixup, \"ax\" \n" \ |
780 | "667: \n\t" \ | 812 | "667: \n\t" \ |
813 | "cmpb $0, kvm_rebooting \n\t" \ | ||
814 | "jne 668b \n\t" \ | ||
781 | __ASM_SIZE(push) " $666b \n\t" \ | 815 | __ASM_SIZE(push) " $666b \n\t" \ |
782 | "jmp kvm_handle_fault_on_reboot \n\t" \ | 816 | "call kvm_spurious_fault \n\t" \ |
783 | ".popsection \n\t" \ | 817 | ".popsection \n\t" \ |
784 | ".pushsection __ex_table, \"a\" \n\t" \ | 818 | ".pushsection __ex_table, \"a\" \n\t" \ |
785 | _ASM_PTR " 666b, 667b \n\t" \ | 819 | _ASM_PTR " 666b, 667b \n\t" \ |
@@ -788,6 +822,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void); | |||
788 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 822 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
789 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 823 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
790 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 824 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); |
825 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | ||
791 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | 826 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); |
792 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); | 827 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); |
793 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | 828 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
@@ -799,4 +834,15 @@ void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | |||
799 | 834 | ||
800 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); | 835 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); |
801 | 836 | ||
837 | void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, | ||
838 | struct kvm_async_pf *work); | ||
839 | void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, | ||
840 | struct kvm_async_pf *work); | ||
841 | void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, | ||
842 | struct kvm_async_pf *work); | ||
843 | bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); | ||
844 | extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); | ||
845 | |||
846 | void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); | ||
847 | |||
802 | #endif /* _ASM_X86_KVM_HOST_H */ | 848 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 7b562b6184bc..a427bf77a93d 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -20,6 +20,7 @@ | |||
20 | * are available. The use of 0x11 and 0x12 is deprecated | 20 | * are available. The use of 0x11 and 0x12 is deprecated |
21 | */ | 21 | */ |
22 | #define KVM_FEATURE_CLOCKSOURCE2 3 | 22 | #define KVM_FEATURE_CLOCKSOURCE2 3 |
23 | #define KVM_FEATURE_ASYNC_PF 4 | ||
23 | 24 | ||
24 | /* The last 8 bits are used to indicate how to interpret the flags field | 25 | /* The last 8 bits are used to indicate how to interpret the flags field |
25 | * in pvclock structure. If no bits are set, all flags are ignored. | 26 | * in pvclock structure. If no bits are set, all flags are ignored. |
@@ -32,9 +33,13 @@ | |||
32 | /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ | 33 | /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ |
33 | #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 | 34 | #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 |
34 | #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 | 35 | #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 |
36 | #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 | ||
35 | 37 | ||
36 | #define KVM_MAX_MMU_OP_BATCH 32 | 38 | #define KVM_MAX_MMU_OP_BATCH 32 |
37 | 39 | ||
40 | #define KVM_ASYNC_PF_ENABLED (1 << 0) | ||
41 | #define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) | ||
42 | |||
38 | /* Operations for KVM_HC_MMU_OP */ | 43 | /* Operations for KVM_HC_MMU_OP */ |
39 | #define KVM_MMU_OP_WRITE_PTE 1 | 44 | #define KVM_MMU_OP_WRITE_PTE 1 |
40 | #define KVM_MMU_OP_FLUSH_TLB 2 | 45 | #define KVM_MMU_OP_FLUSH_TLB 2 |
@@ -61,10 +66,20 @@ struct kvm_mmu_op_release_pt { | |||
61 | __u64 pt_phys; | 66 | __u64 pt_phys; |
62 | }; | 67 | }; |
63 | 68 | ||
69 | #define KVM_PV_REASON_PAGE_NOT_PRESENT 1 | ||
70 | #define KVM_PV_REASON_PAGE_READY 2 | ||
71 | |||
72 | struct kvm_vcpu_pv_apf_data { | ||
73 | __u32 reason; | ||
74 | __u8 pad[60]; | ||
75 | __u32 enabled; | ||
76 | }; | ||
77 | |||
64 | #ifdef __KERNEL__ | 78 | #ifdef __KERNEL__ |
65 | #include <asm/processor.h> | 79 | #include <asm/processor.h> |
66 | 80 | ||
67 | extern void kvmclock_init(void); | 81 | extern void kvmclock_init(void); |
82 | extern int kvm_register_clock(char *txt); | ||
68 | 83 | ||
69 | 84 | ||
70 | /* This instruction is vmcall. On non-VT architectures, it will generate a | 85 | /* This instruction is vmcall. On non-VT architectures, it will generate a |
@@ -160,8 +175,17 @@ static inline unsigned int kvm_arch_para_features(void) | |||
160 | 175 | ||
161 | #ifdef CONFIG_KVM_GUEST | 176 | #ifdef CONFIG_KVM_GUEST |
162 | void __init kvm_guest_init(void); | 177 | void __init kvm_guest_init(void); |
178 | void kvm_async_pf_task_wait(u32 token); | ||
179 | void kvm_async_pf_task_wake(u32 token); | ||
180 | u32 kvm_read_and_reset_pf_reason(void); | ||
163 | #else | 181 | #else |
164 | #define kvm_guest_init() do { } while (0) | 182 | #define kvm_guest_init() do { } while (0) |
183 | #define kvm_async_pf_task_wait(T) do {} while(0) | ||
184 | #define kvm_async_pf_task_wake(T) do {} while(0) | ||
185 | static inline u32 kvm_read_and_reset_pf_reason(void) | ||
186 | { | ||
187 | return 0; | ||
188 | } | ||
165 | #endif | 189 | #endif |
166 | 190 | ||
167 | #endif /* __KERNEL__ */ | 191 | #endif /* __KERNEL__ */ |
diff --git a/arch/x86/include/asm/mach_traps.h b/arch/x86/include/asm/mach_traps.h index f7920601e472..72a8b52e7dfd 100644 --- a/arch/x86/include/asm/mach_traps.h +++ b/arch/x86/include/asm/mach_traps.h | |||
@@ -7,9 +7,19 @@ | |||
7 | 7 | ||
8 | #include <asm/mc146818rtc.h> | 8 | #include <asm/mc146818rtc.h> |
9 | 9 | ||
10 | #define NMI_REASON_PORT 0x61 | ||
11 | |||
12 | #define NMI_REASON_SERR 0x80 | ||
13 | #define NMI_REASON_IOCHK 0x40 | ||
14 | #define NMI_REASON_MASK (NMI_REASON_SERR | NMI_REASON_IOCHK) | ||
15 | |||
16 | #define NMI_REASON_CLEAR_SERR 0x04 | ||
17 | #define NMI_REASON_CLEAR_IOCHK 0x08 | ||
18 | #define NMI_REASON_CLEAR_MASK 0x0f | ||
19 | |||
10 | static inline unsigned char get_nmi_reason(void) | 20 | static inline unsigned char get_nmi_reason(void) |
11 | { | 21 | { |
12 | return inb(0x61); | 22 | return inb(NMI_REASON_PORT); |
13 | } | 23 | } |
14 | 24 | ||
15 | static inline void reassert_nmi(void) | 25 | static inline void reassert_nmi(void) |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c4021b953510..c76f5b92b840 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -23,6 +23,26 @@ void arch_trigger_all_cpu_backtrace(void); | |||
23 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace | 23 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace |
24 | #endif | 24 | #endif |
25 | 25 | ||
26 | /* | ||
27 | * Define some priorities for the nmi notifier call chain. | ||
28 | * | ||
29 | * Create a local nmi bit that has a higher priority than | ||
30 | * external nmis, because the local ones are more frequent. | ||
31 | * | ||
32 | * Also setup some default high/normal/low settings for | ||
33 | * subsystems to registers with. Using 4 bits to seperate | ||
34 | * the priorities. This can go alot higher if needed be. | ||
35 | */ | ||
36 | |||
37 | #define NMI_LOCAL_SHIFT 16 /* randomly picked */ | ||
38 | #define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT) | ||
39 | #define NMI_HIGH_PRIOR (1ULL << 8) | ||
40 | #define NMI_NORMAL_PRIOR (1ULL << 4) | ||
41 | #define NMI_LOW_PRIOR (1ULL << 0) | ||
42 | #define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR) | ||
43 | #define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR) | ||
44 | #define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR) | ||
45 | |||
26 | void stop_nmi(void); | 46 | void stop_nmi(void); |
27 | void restart_nmi(void); | 47 | void restart_nmi(void); |
28 | 48 | ||
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h index 42a978c0c1b3..f482010350fb 100644 --- a/arch/x86/include/asm/olpc.h +++ b/arch/x86/include/asm/olpc.h | |||
@@ -107,10 +107,14 @@ extern int olpc_ec_mask_unset(uint8_t bits); | |||
107 | /* GPIO assignments */ | 107 | /* GPIO assignments */ |
108 | 108 | ||
109 | #define OLPC_GPIO_MIC_AC 1 | 109 | #define OLPC_GPIO_MIC_AC 1 |
110 | #define OLPC_GPIO_DCON_IRQ geode_gpio(7) | 110 | #define OLPC_GPIO_DCON_STAT0 5 |
111 | #define OLPC_GPIO_DCON_STAT1 6 | ||
112 | #define OLPC_GPIO_DCON_IRQ 7 | ||
111 | #define OLPC_GPIO_THRM_ALRM geode_gpio(10) | 113 | #define OLPC_GPIO_THRM_ALRM geode_gpio(10) |
112 | #define OLPC_GPIO_SMB_CLK geode_gpio(14) | 114 | #define OLPC_GPIO_DCON_LOAD 11 |
113 | #define OLPC_GPIO_SMB_DATA geode_gpio(15) | 115 | #define OLPC_GPIO_DCON_BLANK 12 |
116 | #define OLPC_GPIO_SMB_CLK 14 | ||
117 | #define OLPC_GPIO_SMB_DATA 15 | ||
114 | #define OLPC_GPIO_WORKAUX geode_gpio(24) | 118 | #define OLPC_GPIO_WORKAUX geode_gpio(24) |
115 | #define OLPC_GPIO_LID geode_gpio(26) | 119 | #define OLPC_GPIO_LID geode_gpio(26) |
116 | #define OLPC_GPIO_ECSCI geode_gpio(27) | 120 | #define OLPC_GPIO_ECSCI geode_gpio(27) |
diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h index 2a8478140bb3..641988efe063 100644 --- a/arch/x86/include/asm/olpc_ofw.h +++ b/arch/x86/include/asm/olpc_ofw.h | |||
@@ -8,6 +8,8 @@ | |||
8 | 8 | ||
9 | #ifdef CONFIG_OLPC_OPENFIRMWARE | 9 | #ifdef CONFIG_OLPC_OPENFIRMWARE |
10 | 10 | ||
11 | extern bool olpc_ofw_is_installed(void); | ||
12 | |||
11 | /* run an OFW command by calling into the firmware */ | 13 | /* run an OFW command by calling into the firmware */ |
12 | #define olpc_ofw(name, args, res) \ | 14 | #define olpc_ofw(name, args, res) \ |
13 | __olpc_ofw((name), ARRAY_SIZE(args), args, ARRAY_SIZE(res), res) | 15 | __olpc_ofw((name), ARRAY_SIZE(args), args, ARRAY_SIZE(res), res) |
@@ -26,10 +28,17 @@ extern bool olpc_ofw_present(void); | |||
26 | 28 | ||
27 | #else /* !CONFIG_OLPC_OPENFIRMWARE */ | 29 | #else /* !CONFIG_OLPC_OPENFIRMWARE */ |
28 | 30 | ||
31 | static inline bool olpc_ofw_is_installed(void) { return false; } | ||
29 | static inline void olpc_ofw_detect(void) { } | 32 | static inline void olpc_ofw_detect(void) { } |
30 | static inline void setup_olpc_ofw_pgd(void) { } | 33 | static inline void setup_olpc_ofw_pgd(void) { } |
31 | static inline bool olpc_ofw_present(void) { return false; } | 34 | static inline bool olpc_ofw_present(void) { return false; } |
32 | 35 | ||
33 | #endif /* !CONFIG_OLPC_OPENFIRMWARE */ | 36 | #endif /* !CONFIG_OLPC_OPENFIRMWARE */ |
34 | 37 | ||
38 | #ifdef CONFIG_OLPC_OPENFIRMWARE_DT | ||
39 | extern void olpc_dt_build_devicetree(void); | ||
40 | #else | ||
41 | static inline void olpc_dt_build_devicetree(void) { } | ||
42 | #endif /* CONFIG_OLPC_OPENFIRMWARE_DT */ | ||
43 | |||
35 | #endif /* _ASM_X86_OLPC_OFW_H */ | 44 | #endif /* _ASM_X86_OLPC_OFW_H */ |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 7709c12431b8..2071a8b2b32f 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -435,6 +435,11 @@ static inline void pte_update(struct mm_struct *mm, unsigned long addr, | |||
435 | { | 435 | { |
436 | PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); | 436 | PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); |
437 | } | 437 | } |
438 | static inline void pmd_update(struct mm_struct *mm, unsigned long addr, | ||
439 | pmd_t *pmdp) | ||
440 | { | ||
441 | PVOP_VCALL3(pv_mmu_ops.pmd_update, mm, addr, pmdp); | ||
442 | } | ||
438 | 443 | ||
439 | static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, | 444 | static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, |
440 | pte_t *ptep) | 445 | pte_t *ptep) |
@@ -442,6 +447,12 @@ static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, | |||
442 | PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); | 447 | PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); |
443 | } | 448 | } |
444 | 449 | ||
450 | static inline void pmd_update_defer(struct mm_struct *mm, unsigned long addr, | ||
451 | pmd_t *pmdp) | ||
452 | { | ||
453 | PVOP_VCALL3(pv_mmu_ops.pmd_update_defer, mm, addr, pmdp); | ||
454 | } | ||
455 | |||
445 | static inline pte_t __pte(pteval_t val) | 456 | static inline pte_t __pte(pteval_t val) |
446 | { | 457 | { |
447 | pteval_t ret; | 458 | pteval_t ret; |
@@ -543,6 +554,20 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, | |||
543 | PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); | 554 | PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); |
544 | } | 555 | } |
545 | 556 | ||
557 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
558 | static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, | ||
559 | pmd_t *pmdp, pmd_t pmd) | ||
560 | { | ||
561 | #if PAGETABLE_LEVELS >= 3 | ||
562 | if (sizeof(pmdval_t) > sizeof(long)) | ||
563 | /* 5 arg words */ | ||
564 | pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd); | ||
565 | else | ||
566 | PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp, pmd.pmd); | ||
567 | #endif | ||
568 | } | ||
569 | #endif | ||
570 | |||
546 | static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) | 571 | static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) |
547 | { | 572 | { |
548 | pmdval_t val = native_pmd_val(pmd); | 573 | pmdval_t val = native_pmd_val(pmd); |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index b82bac975250..82885099c869 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -265,10 +265,16 @@ struct pv_mmu_ops { | |||
265 | void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, | 265 | void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, |
266 | pte_t *ptep, pte_t pteval); | 266 | pte_t *ptep, pte_t pteval); |
267 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); | 267 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); |
268 | void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr, | ||
269 | pmd_t *pmdp, pmd_t pmdval); | ||
268 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, | 270 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, |
269 | pte_t *ptep); | 271 | pte_t *ptep); |
270 | void (*pte_update_defer)(struct mm_struct *mm, | 272 | void (*pte_update_defer)(struct mm_struct *mm, |
271 | unsigned long addr, pte_t *ptep); | 273 | unsigned long addr, pte_t *ptep); |
274 | void (*pmd_update)(struct mm_struct *mm, unsigned long addr, | ||
275 | pmd_t *pmdp); | ||
276 | void (*pmd_update_defer)(struct mm_struct *mm, | ||
277 | unsigned long addr, pmd_t *pmdp); | ||
272 | 278 | ||
273 | pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, | 279 | pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, |
274 | pte_t *ptep); | 280 | pte_t *ptep); |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index f899e01a8ac9..8ee45167e817 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -230,6 +230,125 @@ do { \ | |||
230 | }) | 230 | }) |
231 | 231 | ||
232 | /* | 232 | /* |
233 | * Add return operation | ||
234 | */ | ||
235 | #define percpu_add_return_op(var, val) \ | ||
236 | ({ \ | ||
237 | typeof(var) paro_ret__ = val; \ | ||
238 | switch (sizeof(var)) { \ | ||
239 | case 1: \ | ||
240 | asm("xaddb %0, "__percpu_arg(1) \ | ||
241 | : "+q" (paro_ret__), "+m" (var) \ | ||
242 | : : "memory"); \ | ||
243 | break; \ | ||
244 | case 2: \ | ||
245 | asm("xaddw %0, "__percpu_arg(1) \ | ||
246 | : "+r" (paro_ret__), "+m" (var) \ | ||
247 | : : "memory"); \ | ||
248 | break; \ | ||
249 | case 4: \ | ||
250 | asm("xaddl %0, "__percpu_arg(1) \ | ||
251 | : "+r" (paro_ret__), "+m" (var) \ | ||
252 | : : "memory"); \ | ||
253 | break; \ | ||
254 | case 8: \ | ||
255 | asm("xaddq %0, "__percpu_arg(1) \ | ||
256 | : "+re" (paro_ret__), "+m" (var) \ | ||
257 | : : "memory"); \ | ||
258 | break; \ | ||
259 | default: __bad_percpu_size(); \ | ||
260 | } \ | ||
261 | paro_ret__ += val; \ | ||
262 | paro_ret__; \ | ||
263 | }) | ||
264 | |||
265 | /* | ||
266 | * xchg is implemented using cmpxchg without a lock prefix. xchg is | ||
267 | * expensive due to the implied lock prefix. The processor cannot prefetch | ||
268 | * cachelines if xchg is used. | ||
269 | */ | ||
270 | #define percpu_xchg_op(var, nval) \ | ||
271 | ({ \ | ||
272 | typeof(var) pxo_ret__; \ | ||
273 | typeof(var) pxo_new__ = (nval); \ | ||
274 | switch (sizeof(var)) { \ | ||
275 | case 1: \ | ||
276 | asm("\n1:mov "__percpu_arg(1)",%%al" \ | ||
277 | "\n\tcmpxchgb %2, "__percpu_arg(1) \ | ||
278 | "\n\tjnz 1b" \ | ||
279 | : "=a" (pxo_ret__), "+m" (var) \ | ||
280 | : "q" (pxo_new__) \ | ||
281 | : "memory"); \ | ||
282 | break; \ | ||
283 | case 2: \ | ||
284 | asm("\n1:mov "__percpu_arg(1)",%%ax" \ | ||
285 | "\n\tcmpxchgw %2, "__percpu_arg(1) \ | ||
286 | "\n\tjnz 1b" \ | ||
287 | : "=a" (pxo_ret__), "+m" (var) \ | ||
288 | : "r" (pxo_new__) \ | ||
289 | : "memory"); \ | ||
290 | break; \ | ||
291 | case 4: \ | ||
292 | asm("\n1:mov "__percpu_arg(1)",%%eax" \ | ||
293 | "\n\tcmpxchgl %2, "__percpu_arg(1) \ | ||
294 | "\n\tjnz 1b" \ | ||
295 | : "=a" (pxo_ret__), "+m" (var) \ | ||
296 | : "r" (pxo_new__) \ | ||
297 | : "memory"); \ | ||
298 | break; \ | ||
299 | case 8: \ | ||
300 | asm("\n1:mov "__percpu_arg(1)",%%rax" \ | ||
301 | "\n\tcmpxchgq %2, "__percpu_arg(1) \ | ||
302 | "\n\tjnz 1b" \ | ||
303 | : "=a" (pxo_ret__), "+m" (var) \ | ||
304 | : "r" (pxo_new__) \ | ||
305 | : "memory"); \ | ||
306 | break; \ | ||
307 | default: __bad_percpu_size(); \ | ||
308 | } \ | ||
309 | pxo_ret__; \ | ||
310 | }) | ||
311 | |||
312 | /* | ||
313 | * cmpxchg has no such implied lock semantics as a result it is much | ||
314 | * more efficient for cpu local operations. | ||
315 | */ | ||
316 | #define percpu_cmpxchg_op(var, oval, nval) \ | ||
317 | ({ \ | ||
318 | typeof(var) pco_ret__; \ | ||
319 | typeof(var) pco_old__ = (oval); \ | ||
320 | typeof(var) pco_new__ = (nval); \ | ||
321 | switch (sizeof(var)) { \ | ||
322 | case 1: \ | ||
323 | asm("cmpxchgb %2, "__percpu_arg(1) \ | ||
324 | : "=a" (pco_ret__), "+m" (var) \ | ||
325 | : "q" (pco_new__), "0" (pco_old__) \ | ||
326 | : "memory"); \ | ||
327 | break; \ | ||
328 | case 2: \ | ||
329 | asm("cmpxchgw %2, "__percpu_arg(1) \ | ||
330 | : "=a" (pco_ret__), "+m" (var) \ | ||
331 | : "r" (pco_new__), "0" (pco_old__) \ | ||
332 | : "memory"); \ | ||
333 | break; \ | ||
334 | case 4: \ | ||
335 | asm("cmpxchgl %2, "__percpu_arg(1) \ | ||
336 | : "=a" (pco_ret__), "+m" (var) \ | ||
337 | : "r" (pco_new__), "0" (pco_old__) \ | ||
338 | : "memory"); \ | ||
339 | break; \ | ||
340 | case 8: \ | ||
341 | asm("cmpxchgq %2, "__percpu_arg(1) \ | ||
342 | : "=a" (pco_ret__), "+m" (var) \ | ||
343 | : "r" (pco_new__), "0" (pco_old__) \ | ||
344 | : "memory"); \ | ||
345 | break; \ | ||
346 | default: __bad_percpu_size(); \ | ||
347 | } \ | ||
348 | pco_ret__; \ | ||
349 | }) | ||
350 | |||
351 | /* | ||
233 | * percpu_read() makes gcc load the percpu variable every time it is | 352 | * percpu_read() makes gcc load the percpu variable every time it is |
234 | * accessed while percpu_read_stable() allows the value to be cached. | 353 | * accessed while percpu_read_stable() allows the value to be cached. |
235 | * percpu_read_stable() is more efficient and can be used if its value | 354 | * percpu_read_stable() is more efficient and can be used if its value |
@@ -267,6 +386,12 @@ do { \ | |||
267 | #define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) | 386 | #define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) |
268 | #define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 387 | #define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
269 | #define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 388 | #define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
389 | /* | ||
390 | * Generic fallback operations for __this_cpu_xchg_[1-4] are okay and much | ||
391 | * faster than an xchg with forced lock semantics. | ||
392 | */ | ||
393 | #define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
394 | #define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
270 | 395 | ||
271 | #define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 396 | #define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
272 | #define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 397 | #define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
@@ -286,6 +411,11 @@ do { \ | |||
286 | #define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) | 411 | #define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) |
287 | #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 412 | #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
288 | #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 413 | #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
414 | #define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) | ||
415 | #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) | ||
416 | #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) | ||
417 | #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
418 | #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
289 | 419 | ||
290 | #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) | 420 | #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) |
291 | #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) | 421 | #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) |
@@ -299,6 +429,31 @@ do { \ | |||
299 | #define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) | 429 | #define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) |
300 | #define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 430 | #define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
301 | #define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 431 | #define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
432 | #define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) | ||
433 | #define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) | ||
434 | #define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) | ||
435 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
436 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
437 | |||
438 | #ifndef CONFIG_M386 | ||
439 | #define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) | ||
440 | #define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) | ||
441 | #define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) | ||
442 | #define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
443 | #define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
444 | #define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
445 | |||
446 | #define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) | ||
447 | #define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) | ||
448 | #define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) | ||
449 | #define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
450 | #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
451 | #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
452 | |||
453 | #define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
454 | #define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
455 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
456 | #endif /* !CONFIG_M386 */ | ||
302 | 457 | ||
303 | /* | 458 | /* |
304 | * Per cpu atomic 64 bit operations are only available under 64 bit. | 459 | * Per cpu atomic 64 bit operations are only available under 64 bit. |
@@ -311,6 +466,7 @@ do { \ | |||
311 | #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 466 | #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
312 | #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 467 | #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
313 | #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 468 | #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
469 | #define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) | ||
314 | 470 | ||
315 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 471 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
316 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) | 472 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) |
@@ -318,12 +474,12 @@ do { \ | |||
318 | #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 474 | #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
319 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 475 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
320 | #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 476 | #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
477 | #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) | ||
321 | 478 | ||
322 | #define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) | 479 | #define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) |
323 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 480 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
324 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 481 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
325 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 482 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
326 | |||
327 | #endif | 483 | #endif |
328 | 484 | ||
329 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ | 485 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 295e2ff18a6a..e2f6a99f14ab 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -20,6 +20,9 @@ | |||
20 | #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) | 20 | #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) |
21 | #define ARCH_P4_MAX_CCCR (18) | 21 | #define ARCH_P4_MAX_CCCR (18) |
22 | 22 | ||
23 | #define ARCH_P4_CNTRVAL_BITS (40) | ||
24 | #define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1) | ||
25 | |||
23 | #define P4_ESCR_EVENT_MASK 0x7e000000U | 26 | #define P4_ESCR_EVENT_MASK 0x7e000000U |
24 | #define P4_ESCR_EVENT_SHIFT 25 | 27 | #define P4_ESCR_EVENT_SHIFT 25 |
25 | #define P4_ESCR_EVENTMASK_MASK 0x01fffe00U | 28 | #define P4_ESCR_EVENTMASK_MASK 0x01fffe00U |
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 271de94c3810..b4389a468fb6 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h | |||
@@ -92,7 +92,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) | |||
92 | extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); | 92 | extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); |
93 | 93 | ||
94 | static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, | 94 | static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, |
95 | unsigned long adddress) | 95 | unsigned long address) |
96 | { | 96 | { |
97 | ___pmd_free_tlb(tlb, pmd); | 97 | ___pmd_free_tlb(tlb, pmd); |
98 | } | 98 | } |
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 2334982b339e..98391db840c6 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h | |||
@@ -46,6 +46,15 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) | |||
46 | #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) | 46 | #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) |
47 | #endif | 47 | #endif |
48 | 48 | ||
49 | #ifdef CONFIG_SMP | ||
50 | static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) | ||
51 | { | ||
52 | return __pmd(xchg((pmdval_t *)xp, 0)); | ||
53 | } | ||
54 | #else | ||
55 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) | ||
56 | #endif | ||
57 | |||
49 | /* | 58 | /* |
50 | * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, | 59 | * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, |
51 | * split up the 29 bits of offset into this range: | 60 | * split up the 29 bits of offset into this range: |
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 177b0165ea01..94b979d1b58d 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h | |||
@@ -104,6 +104,29 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep) | |||
104 | #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) | 104 | #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) |
105 | #endif | 105 | #endif |
106 | 106 | ||
107 | #ifdef CONFIG_SMP | ||
108 | union split_pmd { | ||
109 | struct { | ||
110 | u32 pmd_low; | ||
111 | u32 pmd_high; | ||
112 | }; | ||
113 | pmd_t pmd; | ||
114 | }; | ||
115 | static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) | ||
116 | { | ||
117 | union split_pmd res, *orig = (union split_pmd *)pmdp; | ||
118 | |||
119 | /* xchg acts as a barrier before setting of the high bits */ | ||
120 | res.pmd_low = xchg(&orig->pmd_low, 0); | ||
121 | res.pmd_high = orig->pmd_high; | ||
122 | orig->pmd_high = 0; | ||
123 | |||
124 | return res.pmd; | ||
125 | } | ||
126 | #else | ||
127 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) | ||
128 | #endif | ||
129 | |||
107 | /* | 130 | /* |
108 | * Bits 0, 6 and 7 are taken in the low part of the pte, | 131 | * Bits 0, 6 and 7 are taken in the low part of the pte, |
109 | * put the 32 bits of offset into the high part. | 132 | * put the 32 bits of offset into the high part. |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index ada823a13c7c..18601c86fab1 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -35,6 +35,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); | |||
35 | #else /* !CONFIG_PARAVIRT */ | 35 | #else /* !CONFIG_PARAVIRT */ |
36 | #define set_pte(ptep, pte) native_set_pte(ptep, pte) | 36 | #define set_pte(ptep, pte) native_set_pte(ptep, pte) |
37 | #define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) | 37 | #define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) |
38 | #define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd) | ||
38 | 39 | ||
39 | #define set_pte_atomic(ptep, pte) \ | 40 | #define set_pte_atomic(ptep, pte) \ |
40 | native_set_pte_atomic(ptep, pte) | 41 | native_set_pte_atomic(ptep, pte) |
@@ -59,6 +60,8 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); | |||
59 | 60 | ||
60 | #define pte_update(mm, addr, ptep) do { } while (0) | 61 | #define pte_update(mm, addr, ptep) do { } while (0) |
61 | #define pte_update_defer(mm, addr, ptep) do { } while (0) | 62 | #define pte_update_defer(mm, addr, ptep) do { } while (0) |
63 | #define pmd_update(mm, addr, ptep) do { } while (0) | ||
64 | #define pmd_update_defer(mm, addr, ptep) do { } while (0) | ||
62 | 65 | ||
63 | #define pgd_val(x) native_pgd_val(x) | 66 | #define pgd_val(x) native_pgd_val(x) |
64 | #define __pgd(x) native_make_pgd(x) | 67 | #define __pgd(x) native_make_pgd(x) |
@@ -94,6 +97,11 @@ static inline int pte_young(pte_t pte) | |||
94 | return pte_flags(pte) & _PAGE_ACCESSED; | 97 | return pte_flags(pte) & _PAGE_ACCESSED; |
95 | } | 98 | } |
96 | 99 | ||
100 | static inline int pmd_young(pmd_t pmd) | ||
101 | { | ||
102 | return pmd_flags(pmd) & _PAGE_ACCESSED; | ||
103 | } | ||
104 | |||
97 | static inline int pte_write(pte_t pte) | 105 | static inline int pte_write(pte_t pte) |
98 | { | 106 | { |
99 | return pte_flags(pte) & _PAGE_RW; | 107 | return pte_flags(pte) & _PAGE_RW; |
@@ -142,6 +150,23 @@ static inline int pmd_large(pmd_t pte) | |||
142 | (_PAGE_PSE | _PAGE_PRESENT); | 150 | (_PAGE_PSE | _PAGE_PRESENT); |
143 | } | 151 | } |
144 | 152 | ||
153 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
154 | static inline int pmd_trans_splitting(pmd_t pmd) | ||
155 | { | ||
156 | return pmd_val(pmd) & _PAGE_SPLITTING; | ||
157 | } | ||
158 | |||
159 | static inline int pmd_trans_huge(pmd_t pmd) | ||
160 | { | ||
161 | return pmd_val(pmd) & _PAGE_PSE; | ||
162 | } | ||
163 | |||
164 | static inline int has_transparent_hugepage(void) | ||
165 | { | ||
166 | return cpu_has_pse; | ||
167 | } | ||
168 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
169 | |||
145 | static inline pte_t pte_set_flags(pte_t pte, pteval_t set) | 170 | static inline pte_t pte_set_flags(pte_t pte, pteval_t set) |
146 | { | 171 | { |
147 | pteval_t v = native_pte_val(pte); | 172 | pteval_t v = native_pte_val(pte); |
@@ -216,6 +241,55 @@ static inline pte_t pte_mkspecial(pte_t pte) | |||
216 | return pte_set_flags(pte, _PAGE_SPECIAL); | 241 | return pte_set_flags(pte, _PAGE_SPECIAL); |
217 | } | 242 | } |
218 | 243 | ||
244 | static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) | ||
245 | { | ||
246 | pmdval_t v = native_pmd_val(pmd); | ||
247 | |||
248 | return __pmd(v | set); | ||
249 | } | ||
250 | |||
251 | static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) | ||
252 | { | ||
253 | pmdval_t v = native_pmd_val(pmd); | ||
254 | |||
255 | return __pmd(v & ~clear); | ||
256 | } | ||
257 | |||
258 | static inline pmd_t pmd_mkold(pmd_t pmd) | ||
259 | { | ||
260 | return pmd_clear_flags(pmd, _PAGE_ACCESSED); | ||
261 | } | ||
262 | |||
263 | static inline pmd_t pmd_wrprotect(pmd_t pmd) | ||
264 | { | ||
265 | return pmd_clear_flags(pmd, _PAGE_RW); | ||
266 | } | ||
267 | |||
268 | static inline pmd_t pmd_mkdirty(pmd_t pmd) | ||
269 | { | ||
270 | return pmd_set_flags(pmd, _PAGE_DIRTY); | ||
271 | } | ||
272 | |||
273 | static inline pmd_t pmd_mkhuge(pmd_t pmd) | ||
274 | { | ||
275 | return pmd_set_flags(pmd, _PAGE_PSE); | ||
276 | } | ||
277 | |||
278 | static inline pmd_t pmd_mkyoung(pmd_t pmd) | ||
279 | { | ||
280 | return pmd_set_flags(pmd, _PAGE_ACCESSED); | ||
281 | } | ||
282 | |||
283 | static inline pmd_t pmd_mkwrite(pmd_t pmd) | ||
284 | { | ||
285 | return pmd_set_flags(pmd, _PAGE_RW); | ||
286 | } | ||
287 | |||
288 | static inline pmd_t pmd_mknotpresent(pmd_t pmd) | ||
289 | { | ||
290 | return pmd_clear_flags(pmd, _PAGE_PRESENT); | ||
291 | } | ||
292 | |||
219 | /* | 293 | /* |
220 | * Mask out unsupported bits in a present pgprot. Non-present pgprots | 294 | * Mask out unsupported bits in a present pgprot. Non-present pgprots |
221 | * can use those bits for other purposes, so leave them be. | 295 | * can use those bits for other purposes, so leave them be. |
@@ -256,6 +330,16 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | |||
256 | return __pte(val); | 330 | return __pte(val); |
257 | } | 331 | } |
258 | 332 | ||
333 | static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) | ||
334 | { | ||
335 | pmdval_t val = pmd_val(pmd); | ||
336 | |||
337 | val &= _HPAGE_CHG_MASK; | ||
338 | val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK; | ||
339 | |||
340 | return __pmd(val); | ||
341 | } | ||
342 | |||
259 | /* mprotect needs to preserve PAT bits when updating vm_page_prot */ | 343 | /* mprotect needs to preserve PAT bits when updating vm_page_prot */ |
260 | #define pgprot_modify pgprot_modify | 344 | #define pgprot_modify pgprot_modify |
261 | static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) | 345 | static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) |
@@ -350,7 +434,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) | |||
350 | * Currently stuck as a macro due to indirect forward reference to | 434 | * Currently stuck as a macro due to indirect forward reference to |
351 | * linux/mmzone.h's __section_mem_map_addr() definition: | 435 | * linux/mmzone.h's __section_mem_map_addr() definition: |
352 | */ | 436 | */ |
353 | #define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) | 437 | #define pmd_page(pmd) pfn_to_page((pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT) |
354 | 438 | ||
355 | /* | 439 | /* |
356 | * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] | 440 | * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] |
@@ -524,12 +608,26 @@ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) | |||
524 | return res; | 608 | return res; |
525 | } | 609 | } |
526 | 610 | ||
611 | static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp) | ||
612 | { | ||
613 | pmd_t res = *pmdp; | ||
614 | |||
615 | native_pmd_clear(pmdp); | ||
616 | return res; | ||
617 | } | ||
618 | |||
527 | static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, | 619 | static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, |
528 | pte_t *ptep , pte_t pte) | 620 | pte_t *ptep , pte_t pte) |
529 | { | 621 | { |
530 | native_set_pte(ptep, pte); | 622 | native_set_pte(ptep, pte); |
531 | } | 623 | } |
532 | 624 | ||
625 | static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr, | ||
626 | pmd_t *pmdp , pmd_t pmd) | ||
627 | { | ||
628 | native_set_pmd(pmdp, pmd); | ||
629 | } | ||
630 | |||
533 | #ifndef CONFIG_PARAVIRT | 631 | #ifndef CONFIG_PARAVIRT |
534 | /* | 632 | /* |
535 | * Rules for using pte_update - it must be called after any PTE update which | 633 | * Rules for using pte_update - it must be called after any PTE update which |
@@ -607,6 +705,49 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, | |||
607 | 705 | ||
608 | #define flush_tlb_fix_spurious_fault(vma, address) | 706 | #define flush_tlb_fix_spurious_fault(vma, address) |
609 | 707 | ||
708 | #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) | ||
709 | |||
710 | #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS | ||
711 | extern int pmdp_set_access_flags(struct vm_area_struct *vma, | ||
712 | unsigned long address, pmd_t *pmdp, | ||
713 | pmd_t entry, int dirty); | ||
714 | |||
715 | #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG | ||
716 | extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, | ||
717 | unsigned long addr, pmd_t *pmdp); | ||
718 | |||
719 | #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH | ||
720 | extern int pmdp_clear_flush_young(struct vm_area_struct *vma, | ||
721 | unsigned long address, pmd_t *pmdp); | ||
722 | |||
723 | |||
724 | #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH | ||
725 | extern void pmdp_splitting_flush(struct vm_area_struct *vma, | ||
726 | unsigned long addr, pmd_t *pmdp); | ||
727 | |||
728 | #define __HAVE_ARCH_PMD_WRITE | ||
729 | static inline int pmd_write(pmd_t pmd) | ||
730 | { | ||
731 | return pmd_flags(pmd) & _PAGE_RW; | ||
732 | } | ||
733 | |||
734 | #define __HAVE_ARCH_PMDP_GET_AND_CLEAR | ||
735 | static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, unsigned long addr, | ||
736 | pmd_t *pmdp) | ||
737 | { | ||
738 | pmd_t pmd = native_pmdp_get_and_clear(pmdp); | ||
739 | pmd_update(mm, addr, pmdp); | ||
740 | return pmd; | ||
741 | } | ||
742 | |||
743 | #define __HAVE_ARCH_PMDP_SET_WRPROTECT | ||
744 | static inline void pmdp_set_wrprotect(struct mm_struct *mm, | ||
745 | unsigned long addr, pmd_t *pmdp) | ||
746 | { | ||
747 | clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); | ||
748 | pmd_update(mm, addr, pmdp); | ||
749 | } | ||
750 | |||
610 | /* | 751 | /* |
611 | * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); | 752 | * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); |
612 | * | 753 | * |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index f86da20347f2..975f709e09ae 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -59,6 +59,16 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) | |||
59 | native_set_pte(ptep, pte); | 59 | native_set_pte(ptep, pte); |
60 | } | 60 | } |
61 | 61 | ||
62 | static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) | ||
63 | { | ||
64 | *pmdp = pmd; | ||
65 | } | ||
66 | |||
67 | static inline void native_pmd_clear(pmd_t *pmd) | ||
68 | { | ||
69 | native_set_pmd(pmd, native_make_pmd(0)); | ||
70 | } | ||
71 | |||
62 | static inline pte_t native_ptep_get_and_clear(pte_t *xp) | 72 | static inline pte_t native_ptep_get_and_clear(pte_t *xp) |
63 | { | 73 | { |
64 | #ifdef CONFIG_SMP | 74 | #ifdef CONFIG_SMP |
@@ -72,14 +82,17 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) | |||
72 | #endif | 82 | #endif |
73 | } | 83 | } |
74 | 84 | ||
75 | static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) | 85 | static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) |
76 | { | 86 | { |
77 | *pmdp = pmd; | 87 | #ifdef CONFIG_SMP |
78 | } | 88 | return native_make_pmd(xchg(&xp->pmd, 0)); |
79 | 89 | #else | |
80 | static inline void native_pmd_clear(pmd_t *pmd) | 90 | /* native_local_pmdp_get_and_clear, |
81 | { | 91 | but duplicated because of cyclic dependency */ |
82 | native_set_pmd(pmd, native_make_pmd(0)); | 92 | pmd_t ret = *xp; |
93 | native_pmd_clear(xp); | ||
94 | return ret; | ||
95 | #endif | ||
83 | } | 96 | } |
84 | 97 | ||
85 | static inline void native_set_pud(pud_t *pudp, pud_t pud) | 98 | static inline void native_set_pud(pud_t *pudp, pud_t pud) |
@@ -168,6 +181,7 @@ extern void cleanup_highmap(void); | |||
168 | #define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK) | 181 | #define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK) |
169 | 182 | ||
170 | #define __HAVE_ARCH_PTE_SAME | 183 | #define __HAVE_ARCH_PTE_SAME |
184 | |||
171 | #endif /* !__ASSEMBLY__ */ | 185 | #endif /* !__ASSEMBLY__ */ |
172 | 186 | ||
173 | #endif /* _ASM_X86_PGTABLE_64_H */ | 187 | #endif /* _ASM_X86_PGTABLE_64_H */ |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index d1f4a760be23..7db7723d1f32 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -22,6 +22,7 @@ | |||
22 | #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ | 22 | #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ |
23 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 | 23 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 |
24 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 | 24 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 |
25 | #define _PAGE_BIT_SPLITTING _PAGE_BIT_UNUSED1 /* only valid on a PSE pmd */ | ||
25 | #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ | 26 | #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ |
26 | 27 | ||
27 | /* If _PAGE_BIT_PRESENT is clear, we use these: */ | 28 | /* If _PAGE_BIT_PRESENT is clear, we use these: */ |
@@ -45,6 +46,7 @@ | |||
45 | #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) | 46 | #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) |
46 | #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) | 47 | #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) |
47 | #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) | 48 | #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) |
49 | #define _PAGE_SPLITTING (_AT(pteval_t, 1) << _PAGE_BIT_SPLITTING) | ||
48 | #define __HAVE_ARCH_PTE_SPECIAL | 50 | #define __HAVE_ARCH_PTE_SPECIAL |
49 | 51 | ||
50 | #ifdef CONFIG_KMEMCHECK | 52 | #ifdef CONFIG_KMEMCHECK |
@@ -70,6 +72,7 @@ | |||
70 | /* Set of bits not changed in pte_modify */ | 72 | /* Set of bits not changed in pte_modify */ |
71 | #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ | 73 | #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ |
72 | _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) | 74 | _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) |
75 | #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) | ||
73 | 76 | ||
74 | #define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) | 77 | #define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) |
75 | #define _PAGE_CACHE_WB (0) | 78 | #define _PAGE_CACHE_WB (0) |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index cae9c3cb95cf..45636cefa186 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -141,10 +141,9 @@ extern __u32 cpu_caps_set[NCAPINTS]; | |||
141 | #ifdef CONFIG_SMP | 141 | #ifdef CONFIG_SMP |
142 | DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); | 142 | DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); |
143 | #define cpu_data(cpu) per_cpu(cpu_info, cpu) | 143 | #define cpu_data(cpu) per_cpu(cpu_info, cpu) |
144 | #define current_cpu_data __get_cpu_var(cpu_info) | ||
145 | #else | 144 | #else |
145 | #define cpu_info boot_cpu_data | ||
146 | #define cpu_data(cpu) boot_cpu_data | 146 | #define cpu_data(cpu) boot_cpu_data |
147 | #define current_cpu_data boot_cpu_data | ||
148 | #endif | 147 | #endif |
149 | 148 | ||
150 | extern const struct seq_operations cpuinfo_op; | 149 | extern const struct seq_operations cpuinfo_op; |
@@ -762,10 +761,11 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c); | |||
762 | extern void init_c1e_mask(void); | 761 | extern void init_c1e_mask(void); |
763 | 762 | ||
764 | extern unsigned long boot_option_idle_override; | 763 | extern unsigned long boot_option_idle_override; |
765 | extern unsigned long idle_halt; | ||
766 | extern unsigned long idle_nomwait; | ||
767 | extern bool c1e_detected; | 764 | extern bool c1e_detected; |
768 | 765 | ||
766 | enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, | ||
767 | IDLE_POLL, IDLE_FORCE_MWAIT}; | ||
768 | |||
769 | extern void enable_sep_cpu(void); | 769 | extern void enable_sep_cpu(void); |
770 | extern int sysenter_setup(void); | 770 | extern int sysenter_setup(void); |
771 | 771 | ||
@@ -902,7 +902,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); | |||
902 | /* | 902 | /* |
903 | * The below -8 is to reserve 8 bytes on top of the ring0 stack. | 903 | * The below -8 is to reserve 8 bytes on top of the ring0 stack. |
904 | * This is necessary to guarantee that the entire "struct pt_regs" | 904 | * This is necessary to guarantee that the entire "struct pt_regs" |
905 | * is accessable even if the CPU haven't stored the SS/ESP registers | 905 | * is accessible even if the CPU haven't stored the SS/ESP registers |
906 | * on the stack (interrupt gate does not save these registers | 906 | * on the stack (interrupt gate does not save these registers |
907 | * when switching to the same priv ring). | 907 | * when switching to the same priv ring). |
908 | * Therefore beware: accessing the ss/esp fields of the | 908 | * Therefore beware: accessing the ss/esp fields of the |
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h new file mode 100644 index 000000000000..b4ec95f07518 --- /dev/null +++ b/arch/x86/include/asm/prom.h | |||
@@ -0,0 +1 @@ | |||
/* dummy prom.h; here to make linux/of.h's #includes happy */ | |||
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 0e831059ac5a..f2b83bc7d784 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
@@ -47,14 +47,13 @@ enum { | |||
47 | INTERCEPT_MONITOR, | 47 | INTERCEPT_MONITOR, |
48 | INTERCEPT_MWAIT, | 48 | INTERCEPT_MWAIT, |
49 | INTERCEPT_MWAIT_COND, | 49 | INTERCEPT_MWAIT_COND, |
50 | INTERCEPT_XSETBV, | ||
50 | }; | 51 | }; |
51 | 52 | ||
52 | 53 | ||
53 | struct __attribute__ ((__packed__)) vmcb_control_area { | 54 | struct __attribute__ ((__packed__)) vmcb_control_area { |
54 | u16 intercept_cr_read; | 55 | u32 intercept_cr; |
55 | u16 intercept_cr_write; | 56 | u32 intercept_dr; |
56 | u16 intercept_dr_read; | ||
57 | u16 intercept_dr_write; | ||
58 | u32 intercept_exceptions; | 57 | u32 intercept_exceptions; |
59 | u64 intercept; | 58 | u64 intercept; |
60 | u8 reserved_1[42]; | 59 | u8 reserved_1[42]; |
@@ -81,14 +80,19 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
81 | u32 event_inj_err; | 80 | u32 event_inj_err; |
82 | u64 nested_cr3; | 81 | u64 nested_cr3; |
83 | u64 lbr_ctl; | 82 | u64 lbr_ctl; |
84 | u64 reserved_5; | 83 | u32 clean; |
84 | u32 reserved_5; | ||
85 | u64 next_rip; | 85 | u64 next_rip; |
86 | u8 reserved_6[816]; | 86 | u8 insn_len; |
87 | u8 insn_bytes[15]; | ||
88 | u8 reserved_6[800]; | ||
87 | }; | 89 | }; |
88 | 90 | ||
89 | 91 | ||
90 | #define TLB_CONTROL_DO_NOTHING 0 | 92 | #define TLB_CONTROL_DO_NOTHING 0 |
91 | #define TLB_CONTROL_FLUSH_ALL_ASID 1 | 93 | #define TLB_CONTROL_FLUSH_ALL_ASID 1 |
94 | #define TLB_CONTROL_FLUSH_ASID 3 | ||
95 | #define TLB_CONTROL_FLUSH_ASID_LOCAL 7 | ||
92 | 96 | ||
93 | #define V_TPR_MASK 0x0f | 97 | #define V_TPR_MASK 0x0f |
94 | 98 | ||
@@ -204,19 +208,31 @@ struct __attribute__ ((__packed__)) vmcb { | |||
204 | #define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK | 208 | #define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK |
205 | #define SVM_SELECTOR_CODE_MASK (1 << 3) | 209 | #define SVM_SELECTOR_CODE_MASK (1 << 3) |
206 | 210 | ||
207 | #define INTERCEPT_CR0_MASK 1 | 211 | #define INTERCEPT_CR0_READ 0 |
208 | #define INTERCEPT_CR3_MASK (1 << 3) | 212 | #define INTERCEPT_CR3_READ 3 |
209 | #define INTERCEPT_CR4_MASK (1 << 4) | 213 | #define INTERCEPT_CR4_READ 4 |
210 | #define INTERCEPT_CR8_MASK (1 << 8) | 214 | #define INTERCEPT_CR8_READ 8 |
211 | 215 | #define INTERCEPT_CR0_WRITE (16 + 0) | |
212 | #define INTERCEPT_DR0_MASK 1 | 216 | #define INTERCEPT_CR3_WRITE (16 + 3) |
213 | #define INTERCEPT_DR1_MASK (1 << 1) | 217 | #define INTERCEPT_CR4_WRITE (16 + 4) |
214 | #define INTERCEPT_DR2_MASK (1 << 2) | 218 | #define INTERCEPT_CR8_WRITE (16 + 8) |
215 | #define INTERCEPT_DR3_MASK (1 << 3) | 219 | |
216 | #define INTERCEPT_DR4_MASK (1 << 4) | 220 | #define INTERCEPT_DR0_READ 0 |
217 | #define INTERCEPT_DR5_MASK (1 << 5) | 221 | #define INTERCEPT_DR1_READ 1 |
218 | #define INTERCEPT_DR6_MASK (1 << 6) | 222 | #define INTERCEPT_DR2_READ 2 |
219 | #define INTERCEPT_DR7_MASK (1 << 7) | 223 | #define INTERCEPT_DR3_READ 3 |
224 | #define INTERCEPT_DR4_READ 4 | ||
225 | #define INTERCEPT_DR5_READ 5 | ||
226 | #define INTERCEPT_DR6_READ 6 | ||
227 | #define INTERCEPT_DR7_READ 7 | ||
228 | #define INTERCEPT_DR0_WRITE (16 + 0) | ||
229 | #define INTERCEPT_DR1_WRITE (16 + 1) | ||
230 | #define INTERCEPT_DR2_WRITE (16 + 2) | ||
231 | #define INTERCEPT_DR3_WRITE (16 + 3) | ||
232 | #define INTERCEPT_DR4_WRITE (16 + 4) | ||
233 | #define INTERCEPT_DR5_WRITE (16 + 5) | ||
234 | #define INTERCEPT_DR6_WRITE (16 + 6) | ||
235 | #define INTERCEPT_DR7_WRITE (16 + 7) | ||
220 | 236 | ||
221 | #define SVM_EVTINJ_VEC_MASK 0xff | 237 | #define SVM_EVTINJ_VEC_MASK 0xff |
222 | 238 | ||
@@ -246,6 +262,8 @@ struct __attribute__ ((__packed__)) vmcb { | |||
246 | #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 | 262 | #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 |
247 | #define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 | 263 | #define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 |
248 | 264 | ||
265 | #define SVM_EXITINFO_REG_MASK 0x0F | ||
266 | |||
249 | #define SVM_EXIT_READ_CR0 0x000 | 267 | #define SVM_EXIT_READ_CR0 0x000 |
250 | #define SVM_EXIT_READ_CR3 0x003 | 268 | #define SVM_EXIT_READ_CR3 0x003 |
251 | #define SVM_EXIT_READ_CR4 0x004 | 269 | #define SVM_EXIT_READ_CR4 0x004 |
@@ -316,6 +334,7 @@ struct __attribute__ ((__packed__)) vmcb { | |||
316 | #define SVM_EXIT_MONITOR 0x08a | 334 | #define SVM_EXIT_MONITOR 0x08a |
317 | #define SVM_EXIT_MWAIT 0x08b | 335 | #define SVM_EXIT_MWAIT 0x08b |
318 | #define SVM_EXIT_MWAIT_COND 0x08c | 336 | #define SVM_EXIT_MWAIT_COND 0x08c |
337 | #define SVM_EXIT_XSETBV 0x08d | ||
319 | #define SVM_EXIT_NPF 0x400 | 338 | #define SVM_EXIT_NPF 0x400 |
320 | 339 | ||
321 | #define SVM_EXIT_ERR -1 | 340 | #define SVM_EXIT_ERR -1 |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index f66cda56781d..0310da67307f 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -30,6 +30,7 @@ asmlinkage void segment_not_present(void); | |||
30 | asmlinkage void stack_segment(void); | 30 | asmlinkage void stack_segment(void); |
31 | asmlinkage void general_protection(void); | 31 | asmlinkage void general_protection(void); |
32 | asmlinkage void page_fault(void); | 32 | asmlinkage void page_fault(void); |
33 | asmlinkage void async_page_fault(void); | ||
33 | asmlinkage void spurious_interrupt_bug(void); | 34 | asmlinkage void spurious_interrupt_bug(void); |
34 | asmlinkage void coprocessor_error(void); | 35 | asmlinkage void coprocessor_error(void); |
35 | asmlinkage void alignment_check(void); | 36 | asmlinkage void alignment_check(void); |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9f0cbd987d50..84471b810460 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -66,15 +66,23 @@ | |||
66 | #define PIN_BASED_NMI_EXITING 0x00000008 | 66 | #define PIN_BASED_NMI_EXITING 0x00000008 |
67 | #define PIN_BASED_VIRTUAL_NMIS 0x00000020 | 67 | #define PIN_BASED_VIRTUAL_NMIS 0x00000020 |
68 | 68 | ||
69 | #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 | ||
69 | #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 | 70 | #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 |
71 | #define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 | ||
70 | #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 | 72 | #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 |
71 | #define VM_EXIT_SAVE_IA32_PAT 0x00040000 | 73 | #define VM_EXIT_SAVE_IA32_PAT 0x00040000 |
72 | #define VM_EXIT_LOAD_IA32_PAT 0x00080000 | 74 | #define VM_EXIT_LOAD_IA32_PAT 0x00080000 |
75 | #define VM_EXIT_SAVE_IA32_EFER 0x00100000 | ||
76 | #define VM_EXIT_LOAD_IA32_EFER 0x00200000 | ||
77 | #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 | ||
73 | 78 | ||
79 | #define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 | ||
74 | #define VM_ENTRY_IA32E_MODE 0x00000200 | 80 | #define VM_ENTRY_IA32E_MODE 0x00000200 |
75 | #define VM_ENTRY_SMM 0x00000400 | 81 | #define VM_ENTRY_SMM 0x00000400 |
76 | #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 | 82 | #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 |
83 | #define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 | ||
77 | #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 | 84 | #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 |
85 | #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 | ||
78 | 86 | ||
79 | /* VMCS Encodings */ | 87 | /* VMCS Encodings */ |
80 | enum vmcs_field { | 88 | enum vmcs_field { |
@@ -239,6 +247,7 @@ enum vmcs_field { | |||
239 | #define EXIT_REASON_TASK_SWITCH 9 | 247 | #define EXIT_REASON_TASK_SWITCH 9 |
240 | #define EXIT_REASON_CPUID 10 | 248 | #define EXIT_REASON_CPUID 10 |
241 | #define EXIT_REASON_HLT 12 | 249 | #define EXIT_REASON_HLT 12 |
250 | #define EXIT_REASON_INVD 13 | ||
242 | #define EXIT_REASON_INVLPG 14 | 251 | #define EXIT_REASON_INVLPG 14 |
243 | #define EXIT_REASON_RDPMC 15 | 252 | #define EXIT_REASON_RDPMC 15 |
244 | #define EXIT_REASON_RDTSC 16 | 253 | #define EXIT_REASON_RDTSC 16 |
@@ -296,6 +305,12 @@ enum vmcs_field { | |||
296 | #define GUEST_INTR_STATE_SMI 0x00000004 | 305 | #define GUEST_INTR_STATE_SMI 0x00000004 |
297 | #define GUEST_INTR_STATE_NMI 0x00000008 | 306 | #define GUEST_INTR_STATE_NMI 0x00000008 |
298 | 307 | ||
308 | /* GUEST_ACTIVITY_STATE flags */ | ||
309 | #define GUEST_ACTIVITY_ACTIVE 0 | ||
310 | #define GUEST_ACTIVITY_HLT 1 | ||
311 | #define GUEST_ACTIVITY_SHUTDOWN 2 | ||
312 | #define GUEST_ACTIVITY_WAIT_SIPI 3 | ||
313 | |||
299 | /* | 314 | /* |
300 | * Exit Qualifications for MOV for Control Register Access | 315 | * Exit Qualifications for MOV for Control Register Access |
301 | */ | 316 | */ |
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 396ff4cc8ed4..66d0fff1ee84 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h | |||
@@ -37,4 +37,39 @@ | |||
37 | extern struct shared_info *HYPERVISOR_shared_info; | 37 | extern struct shared_info *HYPERVISOR_shared_info; |
38 | extern struct start_info *xen_start_info; | 38 | extern struct start_info *xen_start_info; |
39 | 39 | ||
40 | #include <asm/processor.h> | ||
41 | |||
42 | static inline uint32_t xen_cpuid_base(void) | ||
43 | { | ||
44 | uint32_t base, eax, ebx, ecx, edx; | ||
45 | char signature[13]; | ||
46 | |||
47 | for (base = 0x40000000; base < 0x40010000; base += 0x100) { | ||
48 | cpuid(base, &eax, &ebx, &ecx, &edx); | ||
49 | *(uint32_t *)(signature + 0) = ebx; | ||
50 | *(uint32_t *)(signature + 4) = ecx; | ||
51 | *(uint32_t *)(signature + 8) = edx; | ||
52 | signature[12] = 0; | ||
53 | |||
54 | if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2)) | ||
55 | return base; | ||
56 | } | ||
57 | |||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | #ifdef CONFIG_XEN | ||
62 | extern bool xen_hvm_need_lapic(void); | ||
63 | |||
64 | static inline bool xen_x2apic_para_available(void) | ||
65 | { | ||
66 | return xen_hvm_need_lapic(); | ||
67 | } | ||
68 | #else | ||
69 | static inline bool xen_x2apic_para_available(void) | ||
70 | { | ||
71 | return (xen_cpuid_base() != 0); | ||
72 | } | ||
73 | #endif | ||
74 | |||
40 | #endif /* _ASM_X86_XEN_HYPERVISOR_H */ | 75 | #endif /* _ASM_X86_XEN_HYPERVISOR_H */ |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 8760cc60a21c..f25bdf238a33 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -42,6 +42,11 @@ extern unsigned int machine_to_phys_order; | |||
42 | extern unsigned long get_phys_to_machine(unsigned long pfn); | 42 | extern unsigned long get_phys_to_machine(unsigned long pfn); |
43 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 43 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
44 | 44 | ||
45 | extern int m2p_add_override(unsigned long mfn, struct page *page); | ||
46 | extern int m2p_remove_override(struct page *page); | ||
47 | extern struct page *m2p_find_override(unsigned long mfn); | ||
48 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); | ||
49 | |||
45 | static inline unsigned long pfn_to_mfn(unsigned long pfn) | 50 | static inline unsigned long pfn_to_mfn(unsigned long pfn) |
46 | { | 51 | { |
47 | unsigned long mfn; | 52 | unsigned long mfn; |
@@ -72,9 +77,6 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
72 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 77 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
73 | return mfn; | 78 | return mfn; |
74 | 79 | ||
75 | if (unlikely((mfn >> machine_to_phys_order) != 0)) | ||
76 | return ~0; | ||
77 | |||
78 | pfn = 0; | 80 | pfn = 0; |
79 | /* | 81 | /* |
80 | * The array access can fail (e.g., device space beyond end of RAM). | 82 | * The array access can fail (e.g., device space beyond end of RAM). |
@@ -83,6 +85,14 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
83 | */ | 85 | */ |
84 | __get_user(pfn, &machine_to_phys_mapping[mfn]); | 86 | __get_user(pfn, &machine_to_phys_mapping[mfn]); |
85 | 87 | ||
88 | /* | ||
89 | * If this appears to be a foreign mfn (because the pfn | ||
90 | * doesn't map back to the mfn), then check the local override | ||
91 | * table to see if there's a better pfn to use. | ||
92 | */ | ||
93 | if (get_phys_to_machine(pfn) != mfn) | ||
94 | pfn = m2p_find_override_pfn(mfn, pfn); | ||
95 | |||
86 | return pfn; | 96 | return pfn; |
87 | } | 97 | } |
88 | 98 | ||
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ec881c6bfee0..b3a71137983a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -509,6 +509,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | |||
509 | 509 | ||
510 | return 0; | 510 | return 0; |
511 | } | 511 | } |
512 | EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); | ||
512 | 513 | ||
513 | int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) | 514 | int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) |
514 | { | 515 | { |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index d2fdb0826df2..57ca77787220 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -1086,7 +1086,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, | |||
1086 | 1086 | ||
1087 | dma_dom->aperture_size += APERTURE_RANGE_SIZE; | 1087 | dma_dom->aperture_size += APERTURE_RANGE_SIZE; |
1088 | 1088 | ||
1089 | /* Intialize the exclusion range if necessary */ | 1089 | /* Initialize the exclusion range if necessary */ |
1090 | for_each_iommu(iommu) { | 1090 | for_each_iommu(iommu) { |
1091 | if (iommu->exclusion_start && | 1091 | if (iommu->exclusion_start && |
1092 | iommu->exclusion_start >= dma_dom->aperture[index]->offset | 1092 | iommu->exclusion_start >= dma_dom->aperture[index]->offset |
@@ -1353,7 +1353,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) | |||
1353 | 1353 | ||
1354 | /* | 1354 | /* |
1355 | * Allocates a new protection domain usable for the dma_ops functions. | 1355 | * Allocates a new protection domain usable for the dma_ops functions. |
1356 | * It also intializes the page table and the address allocator data | 1356 | * It also initializes the page table and the address allocator data |
1357 | * structures required for the dma_ops interface | 1357 | * structures required for the dma_ops interface |
1358 | */ | 1358 | */ |
1359 | static struct dma_ops_domain *dma_ops_domain_alloc(void) | 1359 | static struct dma_ops_domain *dma_ops_domain_alloc(void) |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 1efd3789e3d4..06c196d7e59c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -49,8 +49,8 @@ | |||
49 | #include <asm/mtrr.h> | 49 | #include <asm/mtrr.h> |
50 | #include <asm/smp.h> | 50 | #include <asm/smp.h> |
51 | #include <asm/mce.h> | 51 | #include <asm/mce.h> |
52 | #include <asm/kvm_para.h> | ||
53 | #include <asm/tsc.h> | 52 | #include <asm/tsc.h> |
53 | #include <asm/hypervisor.h> | ||
54 | 54 | ||
55 | unsigned int num_processors; | 55 | unsigned int num_processors; |
56 | 56 | ||
@@ -516,7 +516,7 @@ static void __cpuinit setup_APIC_timer(void) | |||
516 | { | 516 | { |
517 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | 517 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); |
518 | 518 | ||
519 | if (cpu_has(¤t_cpu_data, X86_FEATURE_ARAT)) { | 519 | if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) { |
520 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; | 520 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; |
521 | /* Make LAPIC timer preferrable over percpu HPET */ | 521 | /* Make LAPIC timer preferrable over percpu HPET */ |
522 | lapic_clockevent.rating = 150; | 522 | lapic_clockevent.rating = 150; |
@@ -1476,7 +1476,8 @@ void __init enable_IR_x2apic(void) | |||
1476 | /* IR is required if there is APIC ID > 255 even when running | 1476 | /* IR is required if there is APIC ID > 255 even when running |
1477 | * under KVM | 1477 | * under KVM |
1478 | */ | 1478 | */ |
1479 | if (max_physical_apicid > 255 || !kvm_para_available()) | 1479 | if (max_physical_apicid > 255 || |
1480 | !hypervisor_x2apic_available()) | ||
1480 | goto nox2apic; | 1481 | goto nox2apic; |
1481 | /* | 1482 | /* |
1482 | * without IR all CPUs can be addressed by IOAPIC/MSI | 1483 | * without IR all CPUs can be addressed by IOAPIC/MSI |
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 72ec29e1ae06..79fd43ca6f96 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
@@ -68,7 +68,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, | |||
68 | 68 | ||
69 | switch (cmd) { | 69 | switch (cmd) { |
70 | case DIE_NMI: | 70 | case DIE_NMI: |
71 | case DIE_NMI_IPI: | ||
72 | break; | 71 | break; |
73 | 72 | ||
74 | default: | 73 | default: |
@@ -96,7 +95,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, | |||
96 | static __read_mostly struct notifier_block backtrace_notifier = { | 95 | static __read_mostly struct notifier_block backtrace_notifier = { |
97 | .notifier_call = arch_trigger_all_cpu_backtrace_handler, | 96 | .notifier_call = arch_trigger_all_cpu_backtrace_handler, |
98 | .next = NULL, | 97 | .next = NULL, |
99 | .priority = 1 | 98 | .priority = NMI_LOCAL_LOW_PRIOR, |
100 | }; | 99 | }; |
101 | 100 | ||
102 | static int __init register_trigger_all_cpu_backtrace(void) | 101 | static int __init register_trigger_all_cpu_backtrace(void) |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 52735a710c30..697dc34b7b87 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -2329,7 +2329,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2329 | unsigned int irr; | 2329 | unsigned int irr; |
2330 | struct irq_desc *desc; | 2330 | struct irq_desc *desc; |
2331 | struct irq_cfg *cfg; | 2331 | struct irq_cfg *cfg; |
2332 | irq = __get_cpu_var(vector_irq)[vector]; | 2332 | irq = __this_cpu_read(vector_irq[vector]); |
2333 | 2333 | ||
2334 | if (irq == -1) | 2334 | if (irq == -1) |
2335 | continue; | 2335 | continue; |
@@ -2363,7 +2363,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2363 | apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); | 2363 | apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); |
2364 | goto unlock; | 2364 | goto unlock; |
2365 | } | 2365 | } |
2366 | __get_cpu_var(vector_irq)[vector] = -1; | 2366 | __this_cpu_write(vector_irq[vector], -1); |
2367 | unlock: | 2367 | unlock: |
2368 | raw_spin_unlock(&desc->lock); | 2368 | raw_spin_unlock(&desc->lock); |
2369 | } | 2369 | } |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f4f9e95aa151..bd16b58b8850 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -120,8 +120,8 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
120 | else if (!strcmp(oem_table_id, "UVX")) | 120 | else if (!strcmp(oem_table_id, "UVX")) |
121 | uv_system_type = UV_X2APIC; | 121 | uv_system_type = UV_X2APIC; |
122 | else if (!strcmp(oem_table_id, "UVH")) { | 122 | else if (!strcmp(oem_table_id, "UVH")) { |
123 | __get_cpu_var(x2apic_extra_bits) = | 123 | __this_cpu_write(x2apic_extra_bits, |
124 | pnodeid << uvh_apicid.s.pnode_shift; | 124 | pnodeid << uvh_apicid.s.pnode_shift); |
125 | uv_system_type = UV_NON_UNIQUE_APIC; | 125 | uv_system_type = UV_NON_UNIQUE_APIC; |
126 | uv_set_apicid_hibit(); | 126 | uv_set_apicid_hibit(); |
127 | return 1; | 127 | return 1; |
@@ -286,7 +286,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x) | |||
286 | unsigned int id; | 286 | unsigned int id; |
287 | 287 | ||
288 | WARN_ON(preemptible() && num_online_cpus() > 1); | 288 | WARN_ON(preemptible() && num_online_cpus() > 1); |
289 | id = x | __get_cpu_var(x2apic_extra_bits); | 289 | id = x | __this_cpu_read(x2apic_extra_bits); |
290 | 290 | ||
291 | return id; | 291 | return id; |
292 | } | 292 | } |
@@ -378,7 +378,7 @@ struct apic __refdata apic_x2apic_uv_x = { | |||
378 | 378 | ||
379 | static __cpuinit void set_x2apic_extra_bits(int pnode) | 379 | static __cpuinit void set_x2apic_extra_bits(int pnode) |
380 | { | 380 | { |
381 | __get_cpu_var(x2apic_extra_bits) = (pnode << uvh_apicid.s.pnode_shift); | 381 | __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift); |
382 | } | 382 | } |
383 | 383 | ||
384 | /* | 384 | /* |
@@ -641,7 +641,7 @@ void __cpuinit uv_cpu_init(void) | |||
641 | */ | 641 | */ |
642 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | 642 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) |
643 | { | 643 | { |
644 | if (reason != DIE_NMI_IPI) | 644 | if (reason != DIE_NMIUNKNOWN) |
645 | return NOTIFY_OK; | 645 | return NOTIFY_OK; |
646 | 646 | ||
647 | if (in_crash_kexec) | 647 | if (in_crash_kexec) |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9e093f8fe78c..7c7bedb83c5a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -668,7 +668,7 @@ EXPORT_SYMBOL_GPL(amd_erratum_383); | |||
668 | 668 | ||
669 | bool cpu_has_amd_erratum(const int *erratum) | 669 | bool cpu_has_amd_erratum(const int *erratum) |
670 | { | 670 | { |
671 | struct cpuinfo_x86 *cpu = ¤t_cpu_data; | 671 | struct cpuinfo_x86 *cpu = __this_cpu_ptr(&cpu_info); |
672 | int osvw_id = *erratum++; | 672 | int osvw_id = *erratum++; |
673 | u32 range; | 673 | u32 range; |
674 | u32 ms; | 674 | u32 ms; |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 491977baf6c0..35c7e65e59be 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -521,7 +521,7 @@ static void check_supported_cpu(void *_rc) | |||
521 | 521 | ||
522 | *rc = -ENODEV; | 522 | *rc = -ENODEV; |
523 | 523 | ||
524 | if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) | 524 | if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD) |
525 | return; | 525 | return; |
526 | 526 | ||
527 | eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | 527 | eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); |
@@ -1377,7 +1377,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) | |||
1377 | static void query_values_on_cpu(void *_err) | 1377 | static void query_values_on_cpu(void *_err) |
1378 | { | 1378 | { |
1379 | int *err = _err; | 1379 | int *err = _err; |
1380 | struct powernow_k8_data *data = __get_cpu_var(powernow_data); | 1380 | struct powernow_k8_data *data = __this_cpu_read(powernow_data); |
1381 | 1381 | ||
1382 | *err = query_current_values_with_pending_wait(data); | 1382 | *err = query_current_values_with_pending_wait(data); |
1383 | } | 1383 | } |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 9ecf81f9b90f..7283e98deaae 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -265,7 +265,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
265 | line_size = l2.line_size; | 265 | line_size = l2.line_size; |
266 | lines_per_tag = l2.lines_per_tag; | 266 | lines_per_tag = l2.lines_per_tag; |
267 | /* cpu_data has errata corrections for K7 applied */ | 267 | /* cpu_data has errata corrections for K7 applied */ |
268 | size_in_kb = current_cpu_data.x86_cache_size; | 268 | size_in_kb = __this_cpu_read(cpu_info.x86_cache_size); |
269 | break; | 269 | break; |
270 | case 3: | 270 | case 3: |
271 | if (!l3.val) | 271 | if (!l3.val) |
@@ -287,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
287 | eax->split.type = types[leaf]; | 287 | eax->split.type = types[leaf]; |
288 | eax->split.level = levels[leaf]; | 288 | eax->split.level = levels[leaf]; |
289 | eax->split.num_threads_sharing = 0; | 289 | eax->split.num_threads_sharing = 0; |
290 | eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; | 290 | eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1; |
291 | 291 | ||
292 | 292 | ||
293 | if (assoc == 0xffff) | 293 | if (assoc == 0xffff) |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index e7dbde7bfedb..a77971979564 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/gfp.h> | 25 | #include <linux/gfp.h> |
26 | #include <asm/mce.h> | 26 | #include <asm/mce.h> |
27 | #include <asm/apic.h> | 27 | #include <asm/apic.h> |
28 | #include <asm/nmi.h> | ||
28 | 29 | ||
29 | /* Update fake mce registers on current CPU. */ | 30 | /* Update fake mce registers on current CPU. */ |
30 | static void inject_mce(struct mce *m) | 31 | static void inject_mce(struct mce *m) |
@@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self, | |||
83 | struct die_args *args = (struct die_args *)data; | 84 | struct die_args *args = (struct die_args *)data; |
84 | int cpu = smp_processor_id(); | 85 | int cpu = smp_processor_id(); |
85 | struct mce *m = &__get_cpu_var(injectm); | 86 | struct mce *m = &__get_cpu_var(injectm); |
86 | if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) | 87 | if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) |
87 | return NOTIFY_DONE; | 88 | return NOTIFY_DONE; |
88 | cpumask_clear_cpu(cpu, mce_inject_cpumask); | 89 | cpumask_clear_cpu(cpu, mce_inject_cpumask); |
89 | if (m->inject_flags & MCJ_EXCEPTION) | 90 | if (m->inject_flags & MCJ_EXCEPTION) |
@@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self, | |||
95 | 96 | ||
96 | static struct notifier_block mce_raise_nb = { | 97 | static struct notifier_block mce_raise_nb = { |
97 | .notifier_call = mce_raise_notify, | 98 | .notifier_call = mce_raise_notify, |
98 | .priority = 1000, | 99 | .priority = NMI_LOCAL_NORMAL_PRIOR, |
99 | }; | 100 | }; |
100 | 101 | ||
101 | /* Inject mce on current CPU */ | 102 | /* Inject mce on current CPU */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7a35b72d7c03..d916183b7f9c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -326,7 +326,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
326 | 326 | ||
327 | static int msr_to_offset(u32 msr) | 327 | static int msr_to_offset(u32 msr) |
328 | { | 328 | { |
329 | unsigned bank = __get_cpu_var(injectm.bank); | 329 | unsigned bank = __this_cpu_read(injectm.bank); |
330 | 330 | ||
331 | if (msr == rip_msr) | 331 | if (msr == rip_msr) |
332 | return offsetof(struct mce, ip); | 332 | return offsetof(struct mce, ip); |
@@ -346,7 +346,7 @@ static u64 mce_rdmsrl(u32 msr) | |||
346 | { | 346 | { |
347 | u64 v; | 347 | u64 v; |
348 | 348 | ||
349 | if (__get_cpu_var(injectm).finished) { | 349 | if (__this_cpu_read(injectm.finished)) { |
350 | int offset = msr_to_offset(msr); | 350 | int offset = msr_to_offset(msr); |
351 | 351 | ||
352 | if (offset < 0) | 352 | if (offset < 0) |
@@ -369,7 +369,7 @@ static u64 mce_rdmsrl(u32 msr) | |||
369 | 369 | ||
370 | static void mce_wrmsrl(u32 msr, u64 v) | 370 | static void mce_wrmsrl(u32 msr, u64 v) |
371 | { | 371 | { |
372 | if (__get_cpu_var(injectm).finished) { | 372 | if (__this_cpu_read(injectm.finished)) { |
373 | int offset = msr_to_offset(msr); | 373 | int offset = msr_to_offset(msr); |
374 | 374 | ||
375 | if (offset >= 0) | 375 | if (offset >= 0) |
@@ -1159,7 +1159,7 @@ static void mce_start_timer(unsigned long data) | |||
1159 | 1159 | ||
1160 | WARN_ON(smp_processor_id() != data); | 1160 | WARN_ON(smp_processor_id() != data); |
1161 | 1161 | ||
1162 | if (mce_available(¤t_cpu_data)) { | 1162 | if (mce_available(__this_cpu_ptr(&cpu_info))) { |
1163 | machine_check_poll(MCP_TIMESTAMP, | 1163 | machine_check_poll(MCP_TIMESTAMP, |
1164 | &__get_cpu_var(mce_poll_banks)); | 1164 | &__get_cpu_var(mce_poll_banks)); |
1165 | } | 1165 | } |
@@ -1767,7 +1767,7 @@ static int mce_shutdown(struct sys_device *dev) | |||
1767 | static int mce_resume(struct sys_device *dev) | 1767 | static int mce_resume(struct sys_device *dev) |
1768 | { | 1768 | { |
1769 | __mcheck_cpu_init_generic(); | 1769 | __mcheck_cpu_init_generic(); |
1770 | __mcheck_cpu_init_vendor(¤t_cpu_data); | 1770 | __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); |
1771 | 1771 | ||
1772 | return 0; | 1772 | return 0; |
1773 | } | 1773 | } |
@@ -1775,7 +1775,7 @@ static int mce_resume(struct sys_device *dev) | |||
1775 | static void mce_cpu_restart(void *data) | 1775 | static void mce_cpu_restart(void *data) |
1776 | { | 1776 | { |
1777 | del_timer_sync(&__get_cpu_var(mce_timer)); | 1777 | del_timer_sync(&__get_cpu_var(mce_timer)); |
1778 | if (!mce_available(¤t_cpu_data)) | 1778 | if (!mce_available(__this_cpu_ptr(&cpu_info))) |
1779 | return; | 1779 | return; |
1780 | __mcheck_cpu_init_generic(); | 1780 | __mcheck_cpu_init_generic(); |
1781 | __mcheck_cpu_init_timer(); | 1781 | __mcheck_cpu_init_timer(); |
@@ -1790,7 +1790,7 @@ static void mce_restart(void) | |||
1790 | /* Toggle features for corrected errors */ | 1790 | /* Toggle features for corrected errors */ |
1791 | static void mce_disable_ce(void *all) | 1791 | static void mce_disable_ce(void *all) |
1792 | { | 1792 | { |
1793 | if (!mce_available(¤t_cpu_data)) | 1793 | if (!mce_available(__this_cpu_ptr(&cpu_info))) |
1794 | return; | 1794 | return; |
1795 | if (all) | 1795 | if (all) |
1796 | del_timer_sync(&__get_cpu_var(mce_timer)); | 1796 | del_timer_sync(&__get_cpu_var(mce_timer)); |
@@ -1799,7 +1799,7 @@ static void mce_disable_ce(void *all) | |||
1799 | 1799 | ||
1800 | static void mce_enable_ce(void *all) | 1800 | static void mce_enable_ce(void *all) |
1801 | { | 1801 | { |
1802 | if (!mce_available(¤t_cpu_data)) | 1802 | if (!mce_available(__this_cpu_ptr(&cpu_info))) |
1803 | return; | 1803 | return; |
1804 | cmci_reenable(); | 1804 | cmci_reenable(); |
1805 | cmci_recheck(); | 1805 | cmci_recheck(); |
@@ -2022,7 +2022,7 @@ static void __cpuinit mce_disable_cpu(void *h) | |||
2022 | unsigned long action = *(unsigned long *)h; | 2022 | unsigned long action = *(unsigned long *)h; |
2023 | int i; | 2023 | int i; |
2024 | 2024 | ||
2025 | if (!mce_available(¤t_cpu_data)) | 2025 | if (!mce_available(__this_cpu_ptr(&cpu_info))) |
2026 | return; | 2026 | return; |
2027 | 2027 | ||
2028 | if (!(action & CPU_TASKS_FROZEN)) | 2028 | if (!(action & CPU_TASKS_FROZEN)) |
@@ -2040,7 +2040,7 @@ static void __cpuinit mce_reenable_cpu(void *h) | |||
2040 | unsigned long action = *(unsigned long *)h; | 2040 | unsigned long action = *(unsigned long *)h; |
2041 | int i; | 2041 | int i; |
2042 | 2042 | ||
2043 | if (!mce_available(¤t_cpu_data)) | 2043 | if (!mce_available(__this_cpu_ptr(&cpu_info))) |
2044 | return; | 2044 | return; |
2045 | 2045 | ||
2046 | if (!(action & CPU_TASKS_FROZEN)) | 2046 | if (!(action & CPU_TASKS_FROZEN)) |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 6fcd0936194f..8694ef56459d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -130,7 +130,7 @@ void cmci_recheck(void) | |||
130 | unsigned long flags; | 130 | unsigned long flags; |
131 | int banks; | 131 | int banks; |
132 | 132 | ||
133 | if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) | 133 | if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) |
134 | return; | 134 | return; |
135 | local_irq_save(flags); | 135 | local_irq_save(flags); |
136 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | 136 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0a360d146596..9d977a2ea693 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -997,8 +997,7 @@ x86_perf_event_set_period(struct perf_event *event) | |||
997 | 997 | ||
998 | static void x86_pmu_enable_event(struct perf_event *event) | 998 | static void x86_pmu_enable_event(struct perf_event *event) |
999 | { | 999 | { |
1000 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1000 | if (__this_cpu_read(cpu_hw_events.enabled)) |
1001 | if (cpuc->enabled) | ||
1002 | __x86_pmu_enable_event(&event->hw, | 1001 | __x86_pmu_enable_event(&event->hw, |
1003 | ARCH_PERFMON_EVENTSEL_ENABLE); | 1002 | ARCH_PERFMON_EVENTSEL_ENABLE); |
1004 | } | 1003 | } |
@@ -1268,11 +1267,10 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
1268 | 1267 | ||
1269 | switch (cmd) { | 1268 | switch (cmd) { |
1270 | case DIE_NMI: | 1269 | case DIE_NMI: |
1271 | case DIE_NMI_IPI: | ||
1272 | break; | 1270 | break; |
1273 | case DIE_NMIUNKNOWN: | 1271 | case DIE_NMIUNKNOWN: |
1274 | this_nmi = percpu_read(irq_stat.__nmi_count); | 1272 | this_nmi = percpu_read(irq_stat.__nmi_count); |
1275 | if (this_nmi != __get_cpu_var(pmu_nmi).marked) | 1273 | if (this_nmi != __this_cpu_read(pmu_nmi.marked)) |
1276 | /* let the kernel handle the unknown nmi */ | 1274 | /* let the kernel handle the unknown nmi */ |
1277 | return NOTIFY_DONE; | 1275 | return NOTIFY_DONE; |
1278 | /* | 1276 | /* |
@@ -1296,8 +1294,8 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
1296 | this_nmi = percpu_read(irq_stat.__nmi_count); | 1294 | this_nmi = percpu_read(irq_stat.__nmi_count); |
1297 | if ((handled > 1) || | 1295 | if ((handled > 1) || |
1298 | /* the next nmi could be a back-to-back nmi */ | 1296 | /* the next nmi could be a back-to-back nmi */ |
1299 | ((__get_cpu_var(pmu_nmi).marked == this_nmi) && | 1297 | ((__this_cpu_read(pmu_nmi.marked) == this_nmi) && |
1300 | (__get_cpu_var(pmu_nmi).handled > 1))) { | 1298 | (__this_cpu_read(pmu_nmi.handled) > 1))) { |
1301 | /* | 1299 | /* |
1302 | * We could have two subsequent back-to-back nmis: The | 1300 | * We could have two subsequent back-to-back nmis: The |
1303 | * first handles more than one counter, the 2nd | 1301 | * first handles more than one counter, the 2nd |
@@ -1308,8 +1306,8 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
1308 | * handling more than one counter. We will mark the | 1306 | * handling more than one counter. We will mark the |
1309 | * next (3rd) and then drop it if unhandled. | 1307 | * next (3rd) and then drop it if unhandled. |
1310 | */ | 1308 | */ |
1311 | __get_cpu_var(pmu_nmi).marked = this_nmi + 1; | 1309 | __this_cpu_write(pmu_nmi.marked, this_nmi + 1); |
1312 | __get_cpu_var(pmu_nmi).handled = handled; | 1310 | __this_cpu_write(pmu_nmi.handled, handled); |
1313 | } | 1311 | } |
1314 | 1312 | ||
1315 | return NOTIFY_STOP; | 1313 | return NOTIFY_STOP; |
@@ -1318,7 +1316,7 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
1318 | static __read_mostly struct notifier_block perf_event_nmi_notifier = { | 1316 | static __read_mostly struct notifier_block perf_event_nmi_notifier = { |
1319 | .notifier_call = perf_event_nmi_handler, | 1317 | .notifier_call = perf_event_nmi_handler, |
1320 | .next = NULL, | 1318 | .next = NULL, |
1321 | .priority = 1 | 1319 | .priority = NMI_LOCAL_LOW_PRIOR, |
1322 | }; | 1320 | }; |
1323 | 1321 | ||
1324 | static struct event_constraint unconstrained; | 1322 | static struct event_constraint unconstrained; |
@@ -1484,11 +1482,9 @@ static inline void x86_pmu_read(struct perf_event *event) | |||
1484 | */ | 1482 | */ |
1485 | static void x86_pmu_start_txn(struct pmu *pmu) | 1483 | static void x86_pmu_start_txn(struct pmu *pmu) |
1486 | { | 1484 | { |
1487 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1488 | |||
1489 | perf_pmu_disable(pmu); | 1485 | perf_pmu_disable(pmu); |
1490 | cpuc->group_flag |= PERF_EVENT_TXN; | 1486 | __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN); |
1491 | cpuc->n_txn = 0; | 1487 | __this_cpu_write(cpu_hw_events.n_txn, 0); |
1492 | } | 1488 | } |
1493 | 1489 | ||
1494 | /* | 1490 | /* |
@@ -1498,14 +1494,12 @@ static void x86_pmu_start_txn(struct pmu *pmu) | |||
1498 | */ | 1494 | */ |
1499 | static void x86_pmu_cancel_txn(struct pmu *pmu) | 1495 | static void x86_pmu_cancel_txn(struct pmu *pmu) |
1500 | { | 1496 | { |
1501 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1497 | __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); |
1502 | |||
1503 | cpuc->group_flag &= ~PERF_EVENT_TXN; | ||
1504 | /* | 1498 | /* |
1505 | * Truncate the collected events. | 1499 | * Truncate the collected events. |
1506 | */ | 1500 | */ |
1507 | cpuc->n_added -= cpuc->n_txn; | 1501 | __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn)); |
1508 | cpuc->n_events -= cpuc->n_txn; | 1502 | __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn)); |
1509 | perf_pmu_enable(pmu); | 1503 | perf_pmu_enable(pmu); |
1510 | } | 1504 | } |
1511 | 1505 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 24e390e40f2e..008835c1d79c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -649,7 +649,7 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
649 | struct hw_perf_event *hwc = &event->hw; | 649 | struct hw_perf_event *hwc = &event->hw; |
650 | 650 | ||
651 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { | 651 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { |
652 | if (!__get_cpu_var(cpu_hw_events).enabled) | 652 | if (!__this_cpu_read(cpu_hw_events.enabled)) |
653 | return; | 653 | return; |
654 | 654 | ||
655 | intel_pmu_enable_bts(hwc->config); | 655 | intel_pmu_enable_bts(hwc->config); |
@@ -679,7 +679,7 @@ static int intel_pmu_save_and_restart(struct perf_event *event) | |||
679 | 679 | ||
680 | static void intel_pmu_reset(void) | 680 | static void intel_pmu_reset(void) |
681 | { | 681 | { |
682 | struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; | 682 | struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); |
683 | unsigned long flags; | 683 | unsigned long flags; |
684 | int idx; | 684 | int idx; |
685 | 685 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 81400b93e694..e56b9bfbabd1 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -753,19 +753,21 @@ out: | |||
753 | 753 | ||
754 | static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | 754 | static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) |
755 | { | 755 | { |
756 | int overflow = 0; | 756 | u64 v; |
757 | u32 low, high; | ||
758 | 757 | ||
759 | rdmsr(hwc->config_base + hwc->idx, low, high); | 758 | /* an official way for overflow indication */ |
760 | 759 | rdmsrl(hwc->config_base + hwc->idx, v); | |
761 | /* we need to check high bit for unflagged overflows */ | 760 | if (v & P4_CCCR_OVF) { |
762 | if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { | 761 | wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF); |
763 | overflow = 1; | 762 | return 1; |
764 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
765 | ((u64)low) & ~P4_CCCR_OVF); | ||
766 | } | 763 | } |
767 | 764 | ||
768 | return overflow; | 765 | /* it might be unflagged overflow */ |
766 | rdmsrl(hwc->event_base + hwc->idx, v); | ||
767 | if (!(v & ARCH_P4_CNTRVAL_MASK)) | ||
768 | return 1; | ||
769 | |||
770 | return 0; | ||
769 | } | 771 | } |
770 | 772 | ||
771 | static void p4_pmu_disable_pebs(void) | 773 | static void p4_pmu_disable_pebs(void) |
@@ -1152,9 +1154,9 @@ static __initconst const struct x86_pmu p4_pmu = { | |||
1152 | */ | 1154 | */ |
1153 | .num_counters = ARCH_P4_MAX_CCCR, | 1155 | .num_counters = ARCH_P4_MAX_CCCR, |
1154 | .apic = 1, | 1156 | .apic = 1, |
1155 | .cntval_bits = 40, | 1157 | .cntval_bits = ARCH_P4_CNTRVAL_BITS, |
1156 | .cntval_mask = (1ULL << 40) - 1, | 1158 | .cntval_mask = ARCH_P4_CNTRVAL_MASK, |
1157 | .max_period = (1ULL << 39) - 1, | 1159 | .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1, |
1158 | .hw_config = p4_hw_config, | 1160 | .hw_config = p4_hw_config, |
1159 | .schedule_events = p4_pmu_schedule_events, | 1161 | .schedule_events = p4_pmu_schedule_events, |
1160 | /* | 1162 | /* |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 8474c998cbd4..df20723a6a1b 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -197,14 +197,8 @@ void show_stack(struct task_struct *task, unsigned long *sp) | |||
197 | */ | 197 | */ |
198 | void dump_stack(void) | 198 | void dump_stack(void) |
199 | { | 199 | { |
200 | unsigned long bp = 0; | ||
201 | unsigned long stack; | 200 | unsigned long stack; |
202 | 201 | ||
203 | #ifdef CONFIG_FRAME_POINTER | ||
204 | if (!bp) | ||
205 | get_bp(bp); | ||
206 | #endif | ||
207 | |||
208 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | 202 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", |
209 | current->pid, current->comm, print_tainted(), | 203 | current->pid, current->comm, print_tainted(), |
210 | init_utsname()->release, | 204 | init_utsname()->release, |
@@ -240,6 +234,7 @@ unsigned __kprobes long oops_begin(void) | |||
240 | bust_spinlocks(1); | 234 | bust_spinlocks(1); |
241 | return flags; | 235 | return flags; |
242 | } | 236 | } |
237 | EXPORT_SYMBOL_GPL(oops_begin); | ||
243 | 238 | ||
244 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | 239 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) |
245 | { | 240 | { |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 0c2b7ef7a34d..294f26da0c0c 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/bootmem.h> | 14 | #include <linux/bootmem.h> |
15 | #include <linux/pfn.h> | 15 | #include <linux/pfn.h> |
16 | #include <linux/suspend.h> | 16 | #include <linux/suspend.h> |
17 | #include <linux/acpi.h> | ||
17 | #include <linux/firmware-map.h> | 18 | #include <linux/firmware-map.h> |
18 | #include <linux/memblock.h> | 19 | #include <linux/memblock.h> |
19 | 20 | ||
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 591e60104278..c8b4efad7ebb 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -1406,6 +1406,16 @@ ENTRY(general_protection) | |||
1406 | CFI_ENDPROC | 1406 | CFI_ENDPROC |
1407 | END(general_protection) | 1407 | END(general_protection) |
1408 | 1408 | ||
1409 | #ifdef CONFIG_KVM_GUEST | ||
1410 | ENTRY(async_page_fault) | ||
1411 | RING0_EC_FRAME | ||
1412 | pushl $do_async_page_fault | ||
1413 | CFI_ADJUST_CFA_OFFSET 4 | ||
1414 | jmp error_code | ||
1415 | CFI_ENDPROC | ||
1416 | END(apf_page_fault) | ||
1417 | #endif | ||
1418 | |||
1409 | /* | 1419 | /* |
1410 | * End of kprobes section | 1420 | * End of kprobes section |
1411 | */ | 1421 | */ |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e3ba417e8697..aed1ffbeb0c9 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -299,17 +299,21 @@ ENDPROC(native_usergs_sysret64) | |||
299 | ENTRY(save_args) | 299 | ENTRY(save_args) |
300 | XCPT_FRAME | 300 | XCPT_FRAME |
301 | cld | 301 | cld |
302 | movq_cfi rdi, RDI+16-ARGOFFSET | 302 | /* |
303 | movq_cfi rsi, RSI+16-ARGOFFSET | 303 | * start from rbp in pt_regs and jump over |
304 | movq_cfi rdx, RDX+16-ARGOFFSET | 304 | * return address. |
305 | movq_cfi rcx, RCX+16-ARGOFFSET | 305 | */ |
306 | movq_cfi rax, RAX+16-ARGOFFSET | 306 | movq_cfi rdi, RDI+8-RBP |
307 | movq_cfi r8, R8+16-ARGOFFSET | 307 | movq_cfi rsi, RSI+8-RBP |
308 | movq_cfi r9, R9+16-ARGOFFSET | 308 | movq_cfi rdx, RDX+8-RBP |
309 | movq_cfi r10, R10+16-ARGOFFSET | 309 | movq_cfi rcx, RCX+8-RBP |
310 | movq_cfi r11, R11+16-ARGOFFSET | 310 | movq_cfi rax, RAX+8-RBP |
311 | 311 | movq_cfi r8, R8+8-RBP | |
312 | leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ | 312 | movq_cfi r9, R9+8-RBP |
313 | movq_cfi r10, R10+8-RBP | ||
314 | movq_cfi r11, R11+8-RBP | ||
315 | |||
316 | leaq -RBP+8(%rsp),%rdi /* arg1 for handler */ | ||
313 | movq_cfi rbp, 8 /* push %rbp */ | 317 | movq_cfi rbp, 8 /* push %rbp */ |
314 | leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ | 318 | leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ |
315 | testl $3, CS(%rdi) | 319 | testl $3, CS(%rdi) |
@@ -782,8 +786,9 @@ END(interrupt) | |||
782 | 786 | ||
783 | /* 0(%rsp): ~(interrupt number) */ | 787 | /* 0(%rsp): ~(interrupt number) */ |
784 | .macro interrupt func | 788 | .macro interrupt func |
785 | subq $ORIG_RAX-ARGOFFSET+8, %rsp | 789 | /* reserve pt_regs for scratch regs and rbp */ |
786 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8 | 790 | subq $ORIG_RAX-RBP, %rsp |
791 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP | ||
787 | call save_args | 792 | call save_args |
788 | PARTIAL_FRAME 0 | 793 | PARTIAL_FRAME 0 |
789 | call \func | 794 | call \func |
@@ -808,9 +813,14 @@ ret_from_intr: | |||
808 | TRACE_IRQS_OFF | 813 | TRACE_IRQS_OFF |
809 | decl PER_CPU_VAR(irq_count) | 814 | decl PER_CPU_VAR(irq_count) |
810 | leaveq | 815 | leaveq |
816 | |||
811 | CFI_RESTORE rbp | 817 | CFI_RESTORE rbp |
812 | CFI_DEF_CFA_REGISTER rsp | 818 | CFI_DEF_CFA_REGISTER rsp |
813 | CFI_ADJUST_CFA_OFFSET -8 | 819 | CFI_ADJUST_CFA_OFFSET -8 |
820 | |||
821 | /* we did not save rbx, restore only from ARGOFFSET */ | ||
822 | addq $8, %rsp | ||
823 | CFI_ADJUST_CFA_OFFSET -8 | ||
814 | exit_intr: | 824 | exit_intr: |
815 | GET_THREAD_INFO(%rcx) | 825 | GET_THREAD_INFO(%rcx) |
816 | testl $3,CS-ARGOFFSET(%rsp) | 826 | testl $3,CS-ARGOFFSET(%rsp) |
@@ -1319,6 +1329,9 @@ errorentry xen_stack_segment do_stack_segment | |||
1319 | #endif | 1329 | #endif |
1320 | errorentry general_protection do_general_protection | 1330 | errorentry general_protection do_general_protection |
1321 | errorentry page_fault do_page_fault | 1331 | errorentry page_fault do_page_fault |
1332 | #ifdef CONFIG_KVM_GUEST | ||
1333 | errorentry async_page_fault do_async_page_fault | ||
1334 | #endif | ||
1322 | #ifdef CONFIG_X86_MCE | 1335 | #ifdef CONFIG_X86_MCE |
1323 | paranoidzeroentry machine_check *machine_check_vector(%rip) | 1336 | paranoidzeroentry machine_check *machine_check_vector(%rip) |
1324 | #endif | 1337 | #endif |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 298448656b60..382eb2936d4d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -170,9 +170,9 @@ static void ftrace_mod_code(void) | |||
170 | 170 | ||
171 | void ftrace_nmi_enter(void) | 171 | void ftrace_nmi_enter(void) |
172 | { | 172 | { |
173 | __get_cpu_var(save_modifying_code) = modifying_code; | 173 | __this_cpu_write(save_modifying_code, modifying_code); |
174 | 174 | ||
175 | if (!__get_cpu_var(save_modifying_code)) | 175 | if (!__this_cpu_read(save_modifying_code)) |
176 | return; | 176 | return; |
177 | 177 | ||
178 | if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { | 178 | if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { |
@@ -186,7 +186,7 @@ void ftrace_nmi_enter(void) | |||
186 | 186 | ||
187 | void ftrace_nmi_exit(void) | 187 | void ftrace_nmi_exit(void) |
188 | { | 188 | { |
189 | if (!__get_cpu_var(save_modifying_code)) | 189 | if (!__this_cpu_read(save_modifying_code)) |
190 | return; | 190 | return; |
191 | 191 | ||
192 | /* Finish all executions before clearing nmi_running */ | 192 | /* Finish all executions before clearing nmi_running */ |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 9f54b209c378..fc293dc8dc35 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -126,7 +126,7 @@ ENTRY(startup_32) | |||
126 | movsl | 126 | movsl |
127 | movl pa(boot_params) + NEW_CL_POINTER,%esi | 127 | movl pa(boot_params) + NEW_CL_POINTER,%esi |
128 | andl %esi,%esi | 128 | andl %esi,%esi |
129 | jz 1f # No comand line | 129 | jz 1f # No command line |
130 | movl $pa(boot_command_line),%edi | 130 | movl $pa(boot_command_line),%edi |
131 | movl $(COMMAND_LINE_SIZE/4),%ecx | 131 | movl $(COMMAND_LINE_SIZE/4),%ecx |
132 | rep | 132 | rep |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 42c594254507..02f07634d265 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -122,7 +122,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) | |||
122 | return -EBUSY; | 122 | return -EBUSY; |
123 | 123 | ||
124 | set_debugreg(info->address, i); | 124 | set_debugreg(info->address, i); |
125 | __get_cpu_var(cpu_debugreg[i]) = info->address; | 125 | __this_cpu_write(cpu_debugreg[i], info->address); |
126 | 126 | ||
127 | dr7 = &__get_cpu_var(cpu_dr7); | 127 | dr7 = &__get_cpu_var(cpu_dr7); |
128 | *dr7 |= encode_dr7(i, info->len, info->type); | 128 | *dr7 |= encode_dr7(i, info->len, info->type); |
@@ -397,12 +397,12 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) | |||
397 | 397 | ||
398 | void hw_breakpoint_restore(void) | 398 | void hw_breakpoint_restore(void) |
399 | { | 399 | { |
400 | set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); | 400 | set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0); |
401 | set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); | 401 | set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1); |
402 | set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); | 402 | set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2); |
403 | set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); | 403 | set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3); |
404 | set_debugreg(current->thread.debugreg6, 6); | 404 | set_debugreg(current->thread.debugreg6, 6); |
405 | set_debugreg(__get_cpu_var(cpu_dr7), 7); | 405 | set_debugreg(__this_cpu_read(cpu_dr7), 7); |
406 | } | 406 | } |
407 | EXPORT_SYMBOL_GPL(hw_breakpoint_restore); | 407 | EXPORT_SYMBOL_GPL(hw_breakpoint_restore); |
408 | 408 | ||
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 58bb239a2fd7..e60c38cc0eed 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -169,6 +169,7 @@ int init_fpu(struct task_struct *tsk) | |||
169 | set_stopped_child_used_math(tsk); | 169 | set_stopped_child_used_math(tsk); |
170 | return 0; | 170 | return 0; |
171 | } | 171 | } |
172 | EXPORT_SYMBOL_GPL(init_fpu); | ||
172 | 173 | ||
173 | /* | 174 | /* |
174 | * The xstateregs_active() routine is the same as the fpregs_active() routine, | 175 | * The xstateregs_active() routine is the same as the fpregs_active() routine, |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 83ec0175f986..52945da52a94 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/cpu.h> | 4 | #include <linux/cpu.h> |
5 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/kernel_stat.h> | 6 | #include <linux/kernel_stat.h> |
7 | #include <linux/of.h> | ||
7 | #include <linux/seq_file.h> | 8 | #include <linux/seq_file.h> |
8 | #include <linux/smp.h> | 9 | #include <linux/smp.h> |
9 | #include <linux/ftrace.h> | 10 | #include <linux/ftrace.h> |
@@ -234,7 +235,7 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | |||
234 | exit_idle(); | 235 | exit_idle(); |
235 | irq_enter(); | 236 | irq_enter(); |
236 | 237 | ||
237 | irq = __get_cpu_var(vector_irq)[vector]; | 238 | irq = __this_cpu_read(vector_irq[vector]); |
238 | 239 | ||
239 | if (!handle_irq(irq, regs)) { | 240 | if (!handle_irq(irq, regs)) { |
240 | ack_APIC_irq(); | 241 | ack_APIC_irq(); |
@@ -275,6 +276,15 @@ void smp_x86_platform_ipi(struct pt_regs *regs) | |||
275 | 276 | ||
276 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); | 277 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); |
277 | 278 | ||
279 | #ifdef CONFIG_OF | ||
280 | unsigned int irq_create_of_mapping(struct device_node *controller, | ||
281 | const u32 *intspec, unsigned int intsize) | ||
282 | { | ||
283 | return intspec[0]; | ||
284 | } | ||
285 | EXPORT_SYMBOL_GPL(irq_create_of_mapping); | ||
286 | #endif | ||
287 | |||
278 | #ifdef CONFIG_HOTPLUG_CPU | 288 | #ifdef CONFIG_HOTPLUG_CPU |
279 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ | 289 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ |
280 | void fixup_irqs(void) | 290 | void fixup_irqs(void) |
@@ -350,12 +360,12 @@ void fixup_irqs(void) | |||
350 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { | 360 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { |
351 | unsigned int irr; | 361 | unsigned int irr; |
352 | 362 | ||
353 | if (__get_cpu_var(vector_irq)[vector] < 0) | 363 | if (__this_cpu_read(vector_irq[vector]) < 0) |
354 | continue; | 364 | continue; |
355 | 365 | ||
356 | irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); | 366 | irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); |
357 | if (irr & (1 << (vector % 32))) { | 367 | if (irr & (1 << (vector % 32))) { |
358 | irq = __get_cpu_var(vector_irq)[vector]; | 368 | irq = __this_cpu_read(vector_irq[vector]); |
359 | 369 | ||
360 | data = irq_get_irq_data(irq); | 370 | data = irq_get_irq_data(irq); |
361 | raw_spin_lock(&desc->lock); | 371 | raw_spin_lock(&desc->lock); |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 96656f207751..48ff6dcffa02 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -79,7 +79,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) | |||
79 | u32 *isp, arg1, arg2; | 79 | u32 *isp, arg1, arg2; |
80 | 80 | ||
81 | curctx = (union irq_ctx *) current_thread_info(); | 81 | curctx = (union irq_ctx *) current_thread_info(); |
82 | irqctx = __get_cpu_var(hardirq_ctx); | 82 | irqctx = __this_cpu_read(hardirq_ctx); |
83 | 83 | ||
84 | /* | 84 | /* |
85 | * this is where we switch to the IRQ stack. However, if we are | 85 | * this is where we switch to the IRQ stack. However, if we are |
@@ -166,7 +166,7 @@ asmlinkage void do_softirq(void) | |||
166 | 166 | ||
167 | if (local_softirq_pending()) { | 167 | if (local_softirq_pending()) { |
168 | curctx = current_thread_info(); | 168 | curctx = current_thread_info(); |
169 | irqctx = __get_cpu_var(softirq_ctx); | 169 | irqctx = __this_cpu_read(softirq_ctx); |
170 | irqctx->tinfo.task = curctx->task; | 170 | irqctx->tinfo.task = curctx->task; |
171 | irqctx->tinfo.previous_esp = current_stack_pointer; | 171 | irqctx->tinfo.previous_esp = current_stack_pointer; |
172 | 172 | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index cd21b654dec6..a4130005028a 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <asm/apicdef.h> | 48 | #include <asm/apicdef.h> |
49 | #include <asm/system.h> | 49 | #include <asm/system.h> |
50 | #include <asm/apic.h> | 50 | #include <asm/apic.h> |
51 | #include <asm/nmi.h> | ||
51 | 52 | ||
52 | struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = | 53 | struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = |
53 | { | 54 | { |
@@ -525,10 +526,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
525 | } | 526 | } |
526 | return NOTIFY_DONE; | 527 | return NOTIFY_DONE; |
527 | 528 | ||
528 | case DIE_NMI_IPI: | ||
529 | /* Just ignore, we will handle the roundup on DIE_NMI. */ | ||
530 | return NOTIFY_DONE; | ||
531 | |||
532 | case DIE_NMIUNKNOWN: | 529 | case DIE_NMIUNKNOWN: |
533 | if (was_in_debug_nmi[raw_smp_processor_id()]) { | 530 | if (was_in_debug_nmi[raw_smp_processor_id()]) { |
534 | was_in_debug_nmi[raw_smp_processor_id()] = 0; | 531 | was_in_debug_nmi[raw_smp_processor_id()] = 0; |
@@ -606,7 +603,7 @@ static struct notifier_block kgdb_notifier = { | |||
606 | /* | 603 | /* |
607 | * Lowest-prio notifier priority, we want to be notified last: | 604 | * Lowest-prio notifier priority, we want to be notified last: |
608 | */ | 605 | */ |
609 | .priority = -INT_MAX, | 606 | .priority = NMI_LOCAL_LOW_PRIOR, |
610 | }; | 607 | }; |
611 | 608 | ||
612 | /** | 609 | /** |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 5940282bd2f9..d91c477b3f62 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -403,7 +403,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) | |||
403 | 403 | ||
404 | static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) | 404 | static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) |
405 | { | 405 | { |
406 | __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; | 406 | __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); |
407 | kcb->kprobe_status = kcb->prev_kprobe.status; | 407 | kcb->kprobe_status = kcb->prev_kprobe.status; |
408 | kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags; | 408 | kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags; |
409 | kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; | 409 | kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; |
@@ -412,7 +412,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) | |||
412 | static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, | 412 | static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, |
413 | struct kprobe_ctlblk *kcb) | 413 | struct kprobe_ctlblk *kcb) |
414 | { | 414 | { |
415 | __get_cpu_var(current_kprobe) = p; | 415 | __this_cpu_write(current_kprobe, p); |
416 | kcb->kprobe_saved_flags = kcb->kprobe_old_flags | 416 | kcb->kprobe_saved_flags = kcb->kprobe_old_flags |
417 | = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); | 417 | = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); |
418 | if (is_IF_modifier(p->ainsn.insn)) | 418 | if (is_IF_modifier(p->ainsn.insn)) |
@@ -586,7 +586,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
586 | preempt_enable_no_resched(); | 586 | preempt_enable_no_resched(); |
587 | return 1; | 587 | return 1; |
588 | } else if (kprobe_running()) { | 588 | } else if (kprobe_running()) { |
589 | p = __get_cpu_var(current_kprobe); | 589 | p = __this_cpu_read(current_kprobe); |
590 | if (p->break_handler && p->break_handler(p, regs)) { | 590 | if (p->break_handler && p->break_handler(p, regs)) { |
591 | setup_singlestep(p, regs, kcb, 0); | 591 | setup_singlestep(p, regs, kcb, 0); |
592 | return 1; | 592 | return 1; |
@@ -759,11 +759,11 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
759 | 759 | ||
760 | orig_ret_address = (unsigned long)ri->ret_addr; | 760 | orig_ret_address = (unsigned long)ri->ret_addr; |
761 | if (ri->rp && ri->rp->handler) { | 761 | if (ri->rp && ri->rp->handler) { |
762 | __get_cpu_var(current_kprobe) = &ri->rp->kp; | 762 | __this_cpu_write(current_kprobe, &ri->rp->kp); |
763 | get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; | 763 | get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; |
764 | ri->ret_addr = correct_ret_addr; | 764 | ri->ret_addr = correct_ret_addr; |
765 | ri->rp->handler(ri, regs); | 765 | ri->rp->handler(ri, regs); |
766 | __get_cpu_var(current_kprobe) = NULL; | 766 | __this_cpu_write(current_kprobe, NULL); |
767 | } | 767 | } |
768 | 768 | ||
769 | recycle_rp_inst(ri, &empty_rp); | 769 | recycle_rp_inst(ri, &empty_rp); |
@@ -1202,10 +1202,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, | |||
1202 | regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; | 1202 | regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; |
1203 | regs->orig_ax = ~0UL; | 1203 | regs->orig_ax = ~0UL; |
1204 | 1204 | ||
1205 | __get_cpu_var(current_kprobe) = &op->kp; | 1205 | __this_cpu_write(current_kprobe, &op->kp); |
1206 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | 1206 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; |
1207 | opt_pre_handler(&op->kp, regs); | 1207 | opt_pre_handler(&op->kp, regs); |
1208 | __get_cpu_var(current_kprobe) = NULL; | 1208 | __this_cpu_write(current_kprobe, NULL); |
1209 | } | 1209 | } |
1210 | preempt_enable_no_resched(); | 1210 | preempt_enable_no_resched(); |
1211 | } | 1211 | } |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 63b0ec8d3d4a..8dc44662394b 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -27,16 +27,37 @@ | |||
27 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
29 | #include <linux/hardirq.h> | 29 | #include <linux/hardirq.h> |
30 | #include <linux/notifier.h> | ||
31 | #include <linux/reboot.h> | ||
32 | #include <linux/hash.h> | ||
33 | #include <linux/sched.h> | ||
34 | #include <linux/slab.h> | ||
35 | #include <linux/kprobes.h> | ||
30 | #include <asm/timer.h> | 36 | #include <asm/timer.h> |
37 | #include <asm/cpu.h> | ||
38 | #include <asm/traps.h> | ||
39 | #include <asm/desc.h> | ||
40 | #include <asm/tlbflush.h> | ||
31 | 41 | ||
32 | #define MMU_QUEUE_SIZE 1024 | 42 | #define MMU_QUEUE_SIZE 1024 |
33 | 43 | ||
44 | static int kvmapf = 1; | ||
45 | |||
46 | static int parse_no_kvmapf(char *arg) | ||
47 | { | ||
48 | kvmapf = 0; | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | early_param("no-kvmapf", parse_no_kvmapf); | ||
53 | |||
34 | struct kvm_para_state { | 54 | struct kvm_para_state { |
35 | u8 mmu_queue[MMU_QUEUE_SIZE]; | 55 | u8 mmu_queue[MMU_QUEUE_SIZE]; |
36 | int mmu_queue_len; | 56 | int mmu_queue_len; |
37 | }; | 57 | }; |
38 | 58 | ||
39 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); | 59 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); |
60 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); | ||
40 | 61 | ||
41 | static struct kvm_para_state *kvm_para_state(void) | 62 | static struct kvm_para_state *kvm_para_state(void) |
42 | { | 63 | { |
@@ -50,6 +71,195 @@ static void kvm_io_delay(void) | |||
50 | { | 71 | { |
51 | } | 72 | } |
52 | 73 | ||
74 | #define KVM_TASK_SLEEP_HASHBITS 8 | ||
75 | #define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS) | ||
76 | |||
77 | struct kvm_task_sleep_node { | ||
78 | struct hlist_node link; | ||
79 | wait_queue_head_t wq; | ||
80 | u32 token; | ||
81 | int cpu; | ||
82 | bool halted; | ||
83 | struct mm_struct *mm; | ||
84 | }; | ||
85 | |||
86 | static struct kvm_task_sleep_head { | ||
87 | spinlock_t lock; | ||
88 | struct hlist_head list; | ||
89 | } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE]; | ||
90 | |||
91 | static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, | ||
92 | u32 token) | ||
93 | { | ||
94 | struct hlist_node *p; | ||
95 | |||
96 | hlist_for_each(p, &b->list) { | ||
97 | struct kvm_task_sleep_node *n = | ||
98 | hlist_entry(p, typeof(*n), link); | ||
99 | if (n->token == token) | ||
100 | return n; | ||
101 | } | ||
102 | |||
103 | return NULL; | ||
104 | } | ||
105 | |||
106 | void kvm_async_pf_task_wait(u32 token) | ||
107 | { | ||
108 | u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); | ||
109 | struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; | ||
110 | struct kvm_task_sleep_node n, *e; | ||
111 | DEFINE_WAIT(wait); | ||
112 | int cpu, idle; | ||
113 | |||
114 | cpu = get_cpu(); | ||
115 | idle = idle_cpu(cpu); | ||
116 | put_cpu(); | ||
117 | |||
118 | spin_lock(&b->lock); | ||
119 | e = _find_apf_task(b, token); | ||
120 | if (e) { | ||
121 | /* dummy entry exist -> wake up was delivered ahead of PF */ | ||
122 | hlist_del(&e->link); | ||
123 | kfree(e); | ||
124 | spin_unlock(&b->lock); | ||
125 | return; | ||
126 | } | ||
127 | |||
128 | n.token = token; | ||
129 | n.cpu = smp_processor_id(); | ||
130 | n.mm = current->active_mm; | ||
131 | n.halted = idle || preempt_count() > 1; | ||
132 | atomic_inc(&n.mm->mm_count); | ||
133 | init_waitqueue_head(&n.wq); | ||
134 | hlist_add_head(&n.link, &b->list); | ||
135 | spin_unlock(&b->lock); | ||
136 | |||
137 | for (;;) { | ||
138 | if (!n.halted) | ||
139 | prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE); | ||
140 | if (hlist_unhashed(&n.link)) | ||
141 | break; | ||
142 | |||
143 | if (!n.halted) { | ||
144 | local_irq_enable(); | ||
145 | schedule(); | ||
146 | local_irq_disable(); | ||
147 | } else { | ||
148 | /* | ||
149 | * We cannot reschedule. So halt. | ||
150 | */ | ||
151 | native_safe_halt(); | ||
152 | local_irq_disable(); | ||
153 | } | ||
154 | } | ||
155 | if (!n.halted) | ||
156 | finish_wait(&n.wq, &wait); | ||
157 | |||
158 | return; | ||
159 | } | ||
160 | EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); | ||
161 | |||
162 | static void apf_task_wake_one(struct kvm_task_sleep_node *n) | ||
163 | { | ||
164 | hlist_del_init(&n->link); | ||
165 | if (!n->mm) | ||
166 | return; | ||
167 | mmdrop(n->mm); | ||
168 | if (n->halted) | ||
169 | smp_send_reschedule(n->cpu); | ||
170 | else if (waitqueue_active(&n->wq)) | ||
171 | wake_up(&n->wq); | ||
172 | } | ||
173 | |||
174 | static void apf_task_wake_all(void) | ||
175 | { | ||
176 | int i; | ||
177 | |||
178 | for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) { | ||
179 | struct hlist_node *p, *next; | ||
180 | struct kvm_task_sleep_head *b = &async_pf_sleepers[i]; | ||
181 | spin_lock(&b->lock); | ||
182 | hlist_for_each_safe(p, next, &b->list) { | ||
183 | struct kvm_task_sleep_node *n = | ||
184 | hlist_entry(p, typeof(*n), link); | ||
185 | if (n->cpu == smp_processor_id()) | ||
186 | apf_task_wake_one(n); | ||
187 | } | ||
188 | spin_unlock(&b->lock); | ||
189 | } | ||
190 | } | ||
191 | |||
192 | void kvm_async_pf_task_wake(u32 token) | ||
193 | { | ||
194 | u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); | ||
195 | struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; | ||
196 | struct kvm_task_sleep_node *n; | ||
197 | |||
198 | if (token == ~0) { | ||
199 | apf_task_wake_all(); | ||
200 | return; | ||
201 | } | ||
202 | |||
203 | again: | ||
204 | spin_lock(&b->lock); | ||
205 | n = _find_apf_task(b, token); | ||
206 | if (!n) { | ||
207 | /* | ||
208 | * async PF was not yet handled. | ||
209 | * Add dummy entry for the token. | ||
210 | */ | ||
211 | n = kmalloc(sizeof(*n), GFP_ATOMIC); | ||
212 | if (!n) { | ||
213 | /* | ||
214 | * Allocation failed! Busy wait while other cpu | ||
215 | * handles async PF. | ||
216 | */ | ||
217 | spin_unlock(&b->lock); | ||
218 | cpu_relax(); | ||
219 | goto again; | ||
220 | } | ||
221 | n->token = token; | ||
222 | n->cpu = smp_processor_id(); | ||
223 | n->mm = NULL; | ||
224 | init_waitqueue_head(&n->wq); | ||
225 | hlist_add_head(&n->link, &b->list); | ||
226 | } else | ||
227 | apf_task_wake_one(n); | ||
228 | spin_unlock(&b->lock); | ||
229 | return; | ||
230 | } | ||
231 | EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); | ||
232 | |||
233 | u32 kvm_read_and_reset_pf_reason(void) | ||
234 | { | ||
235 | u32 reason = 0; | ||
236 | |||
237 | if (__get_cpu_var(apf_reason).enabled) { | ||
238 | reason = __get_cpu_var(apf_reason).reason; | ||
239 | __get_cpu_var(apf_reason).reason = 0; | ||
240 | } | ||
241 | |||
242 | return reason; | ||
243 | } | ||
244 | EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); | ||
245 | |||
246 | dotraplinkage void __kprobes | ||
247 | do_async_page_fault(struct pt_regs *regs, unsigned long error_code) | ||
248 | { | ||
249 | switch (kvm_read_and_reset_pf_reason()) { | ||
250 | default: | ||
251 | do_page_fault(regs, error_code); | ||
252 | break; | ||
253 | case KVM_PV_REASON_PAGE_NOT_PRESENT: | ||
254 | /* page is swapped out by the host. */ | ||
255 | kvm_async_pf_task_wait((u32)read_cr2()); | ||
256 | break; | ||
257 | case KVM_PV_REASON_PAGE_READY: | ||
258 | kvm_async_pf_task_wake((u32)read_cr2()); | ||
259 | break; | ||
260 | } | ||
261 | } | ||
262 | |||
53 | static void kvm_mmu_op(void *buffer, unsigned len) | 263 | static void kvm_mmu_op(void *buffer, unsigned len) |
54 | { | 264 | { |
55 | int r; | 265 | int r; |
@@ -231,10 +441,117 @@ static void __init paravirt_ops_setup(void) | |||
231 | #endif | 441 | #endif |
232 | } | 442 | } |
233 | 443 | ||
444 | void __cpuinit kvm_guest_cpu_init(void) | ||
445 | { | ||
446 | if (!kvm_para_available()) | ||
447 | return; | ||
448 | |||
449 | if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { | ||
450 | u64 pa = __pa(&__get_cpu_var(apf_reason)); | ||
451 | |||
452 | #ifdef CONFIG_PREEMPT | ||
453 | pa |= KVM_ASYNC_PF_SEND_ALWAYS; | ||
454 | #endif | ||
455 | wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED); | ||
456 | __get_cpu_var(apf_reason).enabled = 1; | ||
457 | printk(KERN_INFO"KVM setup async PF for cpu %d\n", | ||
458 | smp_processor_id()); | ||
459 | } | ||
460 | } | ||
461 | |||
462 | static void kvm_pv_disable_apf(void *unused) | ||
463 | { | ||
464 | if (!__get_cpu_var(apf_reason).enabled) | ||
465 | return; | ||
466 | |||
467 | wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); | ||
468 | __get_cpu_var(apf_reason).enabled = 0; | ||
469 | |||
470 | printk(KERN_INFO"Unregister pv shared memory for cpu %d\n", | ||
471 | smp_processor_id()); | ||
472 | } | ||
473 | |||
474 | static int kvm_pv_reboot_notify(struct notifier_block *nb, | ||
475 | unsigned long code, void *unused) | ||
476 | { | ||
477 | if (code == SYS_RESTART) | ||
478 | on_each_cpu(kvm_pv_disable_apf, NULL, 1); | ||
479 | return NOTIFY_DONE; | ||
480 | } | ||
481 | |||
482 | static struct notifier_block kvm_pv_reboot_nb = { | ||
483 | .notifier_call = kvm_pv_reboot_notify, | ||
484 | }; | ||
485 | |||
486 | #ifdef CONFIG_SMP | ||
487 | static void __init kvm_smp_prepare_boot_cpu(void) | ||
488 | { | ||
489 | #ifdef CONFIG_KVM_CLOCK | ||
490 | WARN_ON(kvm_register_clock("primary cpu clock")); | ||
491 | #endif | ||
492 | kvm_guest_cpu_init(); | ||
493 | native_smp_prepare_boot_cpu(); | ||
494 | } | ||
495 | |||
496 | static void kvm_guest_cpu_online(void *dummy) | ||
497 | { | ||
498 | kvm_guest_cpu_init(); | ||
499 | } | ||
500 | |||
501 | static void kvm_guest_cpu_offline(void *dummy) | ||
502 | { | ||
503 | kvm_pv_disable_apf(NULL); | ||
504 | apf_task_wake_all(); | ||
505 | } | ||
506 | |||
507 | static int __cpuinit kvm_cpu_notify(struct notifier_block *self, | ||
508 | unsigned long action, void *hcpu) | ||
509 | { | ||
510 | int cpu = (unsigned long)hcpu; | ||
511 | switch (action) { | ||
512 | case CPU_ONLINE: | ||
513 | case CPU_DOWN_FAILED: | ||
514 | case CPU_ONLINE_FROZEN: | ||
515 | smp_call_function_single(cpu, kvm_guest_cpu_online, NULL, 0); | ||
516 | break; | ||
517 | case CPU_DOWN_PREPARE: | ||
518 | case CPU_DOWN_PREPARE_FROZEN: | ||
519 | smp_call_function_single(cpu, kvm_guest_cpu_offline, NULL, 1); | ||
520 | break; | ||
521 | default: | ||
522 | break; | ||
523 | } | ||
524 | return NOTIFY_OK; | ||
525 | } | ||
526 | |||
527 | static struct notifier_block __cpuinitdata kvm_cpu_notifier = { | ||
528 | .notifier_call = kvm_cpu_notify, | ||
529 | }; | ||
530 | #endif | ||
531 | |||
532 | static void __init kvm_apf_trap_init(void) | ||
533 | { | ||
534 | set_intr_gate(14, &async_page_fault); | ||
535 | } | ||
536 | |||
234 | void __init kvm_guest_init(void) | 537 | void __init kvm_guest_init(void) |
235 | { | 538 | { |
539 | int i; | ||
540 | |||
236 | if (!kvm_para_available()) | 541 | if (!kvm_para_available()) |
237 | return; | 542 | return; |
238 | 543 | ||
239 | paravirt_ops_setup(); | 544 | paravirt_ops_setup(); |
545 | register_reboot_notifier(&kvm_pv_reboot_nb); | ||
546 | for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) | ||
547 | spin_lock_init(&async_pf_sleepers[i].lock); | ||
548 | if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) | ||
549 | x86_init.irqs.trap_init = kvm_apf_trap_init; | ||
550 | |||
551 | #ifdef CONFIG_SMP | ||
552 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | ||
553 | register_cpu_notifier(&kvm_cpu_notifier); | ||
554 | #else | ||
555 | kvm_guest_cpu_init(); | ||
556 | #endif | ||
240 | } | 557 | } |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index ca43ce31a19c..f98d3eafe07a 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -125,7 +125,7 @@ static struct clocksource kvm_clock = { | |||
125 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 125 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
126 | }; | 126 | }; |
127 | 127 | ||
128 | static int kvm_register_clock(char *txt) | 128 | int kvm_register_clock(char *txt) |
129 | { | 129 | { |
130 | int cpu = smp_processor_id(); | 130 | int cpu = smp_processor_id(); |
131 | int low, high, ret; | 131 | int low, high, ret; |
@@ -152,14 +152,6 @@ static void __cpuinit kvm_setup_secondary_clock(void) | |||
152 | } | 152 | } |
153 | #endif | 153 | #endif |
154 | 154 | ||
155 | #ifdef CONFIG_SMP | ||
156 | static void __init kvm_smp_prepare_boot_cpu(void) | ||
157 | { | ||
158 | WARN_ON(kvm_register_clock("primary cpu clock")); | ||
159 | native_smp_prepare_boot_cpu(); | ||
160 | } | ||
161 | #endif | ||
162 | |||
163 | /* | 155 | /* |
164 | * After the clock is registered, the host will keep writing to the | 156 | * After the clock is registered, the host will keep writing to the |
165 | * registered memory location. If the guest happens to shutdown, this memory | 157 | * registered memory location. If the guest happens to shutdown, this memory |
@@ -206,9 +198,6 @@ void __init kvmclock_init(void) | |||
206 | x86_cpuinit.setup_percpu_clockev = | 198 | x86_cpuinit.setup_percpu_clockev = |
207 | kvm_setup_secondary_clock; | 199 | kvm_setup_secondary_clock; |
208 | #endif | 200 | #endif |
209 | #ifdef CONFIG_SMP | ||
210 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | ||
211 | #endif | ||
212 | machine_ops.shutdown = kvm_shutdown; | 201 | machine_ops.shutdown = kvm_shutdown; |
213 | #ifdef CONFIG_KEXEC | 202 | #ifdef CONFIG_KEXEC |
214 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 203 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 8f2956091735..ab23f1ad4bf1 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -37,20 +37,11 @@ | |||
37 | 37 | ||
38 | void *module_alloc(unsigned long size) | 38 | void *module_alloc(unsigned long size) |
39 | { | 39 | { |
40 | struct vm_struct *area; | 40 | if (PAGE_ALIGN(size) > MODULES_LEN) |
41 | |||
42 | if (!size) | ||
43 | return NULL; | ||
44 | size = PAGE_ALIGN(size); | ||
45 | if (size > MODULES_LEN) | ||
46 | return NULL; | 41 | return NULL; |
47 | 42 | return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, | |
48 | area = __get_vm_area(size, VM_ALLOC, MODULES_VADDR, MODULES_END); | 43 | GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, |
49 | if (!area) | 44 | -1, __builtin_return_address(0)); |
50 | return NULL; | ||
51 | |||
52 | return __vmalloc_area(area, GFP_KERNEL | __GFP_HIGHMEM, | ||
53 | PAGE_KERNEL_EXEC); | ||
54 | } | 45 | } |
55 | 46 | ||
56 | /* Free memory returned from module_alloc */ | 47 | /* Free memory returned from module_alloc */ |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index c5b250011fd4..869e1aeeb71b 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -421,8 +421,11 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
421 | .set_pte = native_set_pte, | 421 | .set_pte = native_set_pte, |
422 | .set_pte_at = native_set_pte_at, | 422 | .set_pte_at = native_set_pte_at, |
423 | .set_pmd = native_set_pmd, | 423 | .set_pmd = native_set_pmd, |
424 | .set_pmd_at = native_set_pmd_at, | ||
424 | .pte_update = paravirt_nop, | 425 | .pte_update = paravirt_nop, |
425 | .pte_update_defer = paravirt_nop, | 426 | .pte_update_defer = paravirt_nop, |
427 | .pmd_update = paravirt_nop, | ||
428 | .pmd_update_defer = paravirt_nop, | ||
426 | 429 | ||
427 | .ptep_modify_prot_start = __ptep_modify_prot_start, | 430 | .ptep_modify_prot_start = __ptep_modify_prot_start, |
428 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | 431 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c852041bfc3d..d8286ed54ffa 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -22,11 +22,6 @@ | |||
22 | #include <asm/i387.h> | 22 | #include <asm/i387.h> |
23 | #include <asm/debugreg.h> | 23 | #include <asm/debugreg.h> |
24 | 24 | ||
25 | unsigned long idle_halt; | ||
26 | EXPORT_SYMBOL(idle_halt); | ||
27 | unsigned long idle_nomwait; | ||
28 | EXPORT_SYMBOL(idle_nomwait); | ||
29 | |||
30 | struct kmem_cache *task_xstate_cachep; | 25 | struct kmem_cache *task_xstate_cachep; |
31 | EXPORT_SYMBOL_GPL(task_xstate_cachep); | 26 | EXPORT_SYMBOL_GPL(task_xstate_cachep); |
32 | 27 | ||
@@ -327,7 +322,7 @@ long sys_execve(const char __user *name, | |||
327 | /* | 322 | /* |
328 | * Idle related variables and functions | 323 | * Idle related variables and functions |
329 | */ | 324 | */ |
330 | unsigned long boot_option_idle_override = 0; | 325 | unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; |
331 | EXPORT_SYMBOL(boot_option_idle_override); | 326 | EXPORT_SYMBOL(boot_option_idle_override); |
332 | 327 | ||
333 | /* | 328 | /* |
@@ -386,6 +381,8 @@ void default_idle(void) | |||
386 | else | 381 | else |
387 | local_irq_enable(); | 382 | local_irq_enable(); |
388 | current_thread_info()->status |= TS_POLLING; | 383 | current_thread_info()->status |= TS_POLLING; |
384 | trace_power_end(smp_processor_id()); | ||
385 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | ||
389 | } else { | 386 | } else { |
390 | local_irq_enable(); | 387 | local_irq_enable(); |
391 | /* loop is done by the caller */ | 388 | /* loop is done by the caller */ |
@@ -443,10 +440,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
443 | */ | 440 | */ |
444 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | 441 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) |
445 | { | 442 | { |
446 | trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); | ||
447 | trace_cpu_idle((ax>>4)+1, smp_processor_id()); | ||
448 | if (!need_resched()) { | 443 | if (!need_resched()) { |
449 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) | 444 | if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) |
450 | clflush((void *)¤t_thread_info()->flags); | 445 | clflush((void *)¤t_thread_info()->flags); |
451 | 446 | ||
452 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 447 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
@@ -462,7 +457,7 @@ static void mwait_idle(void) | |||
462 | if (!need_resched()) { | 457 | if (!need_resched()) { |
463 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); | 458 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); |
464 | trace_cpu_idle(1, smp_processor_id()); | 459 | trace_cpu_idle(1, smp_processor_id()); |
465 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) | 460 | if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) |
466 | clflush((void *)¤t_thread_info()->flags); | 461 | clflush((void *)¤t_thread_info()->flags); |
467 | 462 | ||
468 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 463 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
@@ -471,6 +466,8 @@ static void mwait_idle(void) | |||
471 | __sti_mwait(0, 0); | 466 | __sti_mwait(0, 0); |
472 | else | 467 | else |
473 | local_irq_enable(); | 468 | local_irq_enable(); |
469 | trace_power_end(smp_processor_id()); | ||
470 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | ||
474 | } else | 471 | } else |
475 | local_irq_enable(); | 472 | local_irq_enable(); |
476 | } | 473 | } |
@@ -503,7 +500,6 @@ static void poll_idle(void) | |||
503 | * | 500 | * |
504 | * idle=mwait overrides this decision and forces the usage of mwait. | 501 | * idle=mwait overrides this decision and forces the usage of mwait. |
505 | */ | 502 | */ |
506 | static int __cpuinitdata force_mwait; | ||
507 | 503 | ||
508 | #define MWAIT_INFO 0x05 | 504 | #define MWAIT_INFO 0x05 |
509 | #define MWAIT_ECX_EXTENDED_INFO 0x01 | 505 | #define MWAIT_ECX_EXTENDED_INFO 0x01 |
@@ -513,7 +509,7 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) | |||
513 | { | 509 | { |
514 | u32 eax, ebx, ecx, edx; | 510 | u32 eax, ebx, ecx, edx; |
515 | 511 | ||
516 | if (force_mwait) | 512 | if (boot_option_idle_override == IDLE_FORCE_MWAIT) |
517 | return 1; | 513 | return 1; |
518 | 514 | ||
519 | if (c->cpuid_level < MWAIT_INFO) | 515 | if (c->cpuid_level < MWAIT_INFO) |
@@ -633,9 +629,10 @@ static int __init idle_setup(char *str) | |||
633 | if (!strcmp(str, "poll")) { | 629 | if (!strcmp(str, "poll")) { |
634 | printk("using polling idle threads.\n"); | 630 | printk("using polling idle threads.\n"); |
635 | pm_idle = poll_idle; | 631 | pm_idle = poll_idle; |
636 | } else if (!strcmp(str, "mwait")) | 632 | boot_option_idle_override = IDLE_POLL; |
637 | force_mwait = 1; | 633 | } else if (!strcmp(str, "mwait")) { |
638 | else if (!strcmp(str, "halt")) { | 634 | boot_option_idle_override = IDLE_FORCE_MWAIT; |
635 | } else if (!strcmp(str, "halt")) { | ||
639 | /* | 636 | /* |
640 | * When the boot option of idle=halt is added, halt is | 637 | * When the boot option of idle=halt is added, halt is |
641 | * forced to be used for CPU idle. In such case CPU C2/C3 | 638 | * forced to be used for CPU idle. In such case CPU C2/C3 |
@@ -644,8 +641,7 @@ static int __init idle_setup(char *str) | |||
644 | * the boot_option_idle_override. | 641 | * the boot_option_idle_override. |
645 | */ | 642 | */ |
646 | pm_idle = default_idle; | 643 | pm_idle = default_idle; |
647 | idle_halt = 1; | 644 | boot_option_idle_override = IDLE_HALT; |
648 | return 0; | ||
649 | } else if (!strcmp(str, "nomwait")) { | 645 | } else if (!strcmp(str, "nomwait")) { |
650 | /* | 646 | /* |
651 | * If the boot option of "idle=nomwait" is added, | 647 | * If the boot option of "idle=nomwait" is added, |
@@ -653,12 +649,10 @@ static int __init idle_setup(char *str) | |||
653 | * states. In such case it won't touch the variable | 649 | * states. In such case it won't touch the variable |
654 | * of boot_option_idle_override. | 650 | * of boot_option_idle_override. |
655 | */ | 651 | */ |
656 | idle_nomwait = 1; | 652 | boot_option_idle_override = IDLE_NOMWAIT; |
657 | return 0; | ||
658 | } else | 653 | } else |
659 | return -1; | 654 | return -1; |
660 | 655 | ||
661 | boot_option_idle_override = 1; | ||
662 | return 0; | 656 | return 0; |
663 | } | 657 | } |
664 | early_param("idle", idle_setup); | 658 | early_param("idle", idle_setup); |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4b9befa0e347..8d128783af47 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -57,8 +57,6 @@ | |||
57 | #include <asm/syscalls.h> | 57 | #include <asm/syscalls.h> |
58 | #include <asm/debugreg.h> | 58 | #include <asm/debugreg.h> |
59 | 59 | ||
60 | #include <trace/events/power.h> | ||
61 | |||
62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 60 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
63 | 61 | ||
64 | /* | 62 | /* |
@@ -113,8 +111,6 @@ void cpu_idle(void) | |||
113 | stop_critical_timings(); | 111 | stop_critical_timings(); |
114 | pm_idle(); | 112 | pm_idle(); |
115 | start_critical_timings(); | 113 | start_critical_timings(); |
116 | trace_power_end(smp_processor_id()); | ||
117 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | ||
118 | } | 114 | } |
119 | tick_nohz_restart_sched_tick(); | 115 | tick_nohz_restart_sched_tick(); |
120 | preempt_enable_no_resched(); | 116 | preempt_enable_no_resched(); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4c818a738396..bd387e8f73b4 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -51,8 +51,6 @@ | |||
51 | #include <asm/syscalls.h> | 51 | #include <asm/syscalls.h> |
52 | #include <asm/debugreg.h> | 52 | #include <asm/debugreg.h> |
53 | 53 | ||
54 | #include <trace/events/power.h> | ||
55 | |||
56 | asmlinkage extern void ret_from_fork(void); | 54 | asmlinkage extern void ret_from_fork(void); |
57 | 55 | ||
58 | DEFINE_PER_CPU(unsigned long, old_rsp); | 56 | DEFINE_PER_CPU(unsigned long, old_rsp); |
@@ -141,10 +139,6 @@ void cpu_idle(void) | |||
141 | pm_idle(); | 139 | pm_idle(); |
142 | start_critical_timings(); | 140 | start_critical_timings(); |
143 | 141 | ||
144 | trace_power_end(smp_processor_id()); | ||
145 | trace_cpu_idle(PWR_EVENT_EXIT, | ||
146 | smp_processor_id()); | ||
147 | |||
148 | /* In many cases the interrupt that ended idle | 142 | /* In many cases the interrupt that ended idle |
149 | has already called exit_idle. But some idle | 143 | has already called exit_idle. But some idle |
150 | loops can be woken up without interrupt. */ | 144 | loops can be woken up without interrupt. */ |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index c495aa8d4815..fc7aae1e2bc7 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/pci_x86.h> | 18 | #include <asm/pci_x86.h> |
19 | #include <asm/virtext.h> | 19 | #include <asm/virtext.h> |
20 | #include <asm/cpu.h> | 20 | #include <asm/cpu.h> |
21 | #include <asm/nmi.h> | ||
21 | 22 | ||
22 | #ifdef CONFIG_X86_32 | 23 | #ifdef CONFIG_X86_32 |
23 | # include <linux/ctype.h> | 24 | # include <linux/ctype.h> |
@@ -747,7 +748,7 @@ static int crash_nmi_callback(struct notifier_block *self, | |||
747 | { | 748 | { |
748 | int cpu; | 749 | int cpu; |
749 | 750 | ||
750 | if (val != DIE_NMI_IPI) | 751 | if (val != DIE_NMI) |
751 | return NOTIFY_OK; | 752 | return NOTIFY_OK; |
752 | 753 | ||
753 | cpu = raw_smp_processor_id(); | 754 | cpu = raw_smp_processor_id(); |
@@ -778,6 +779,8 @@ static void smp_send_nmi_allbutself(void) | |||
778 | 779 | ||
779 | static struct notifier_block crash_nmi_nb = { | 780 | static struct notifier_block crash_nmi_nb = { |
780 | .notifier_call = crash_nmi_callback, | 781 | .notifier_call = crash_nmi_callback, |
782 | /* we want to be the first one called */ | ||
783 | .priority = NMI_LOCAL_HIGH_PRIOR+1, | ||
781 | }; | 784 | }; |
782 | 785 | ||
783 | /* Halt all other CPUs, calling the specified function on each of them | 786 | /* Halt all other CPUs, calling the specified function on each of them |
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 1cfbbfc3ae26..6f39cab052d5 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
@@ -76,7 +76,7 @@ int mach_set_rtc_mmss(unsigned long nowtime) | |||
76 | CMOS_WRITE(real_seconds, RTC_SECONDS); | 76 | CMOS_WRITE(real_seconds, RTC_SECONDS); |
77 | CMOS_WRITE(real_minutes, RTC_MINUTES); | 77 | CMOS_WRITE(real_minutes, RTC_MINUTES); |
78 | } else { | 78 | } else { |
79 | printk(KERN_WARNING | 79 | printk_once(KERN_NOTICE |
80 | "set_rtc_mmss: can't update from %d to %d\n", | 80 | "set_rtc_mmss: can't update from %d to %d\n", |
81 | cmos_minutes, real_minutes); | 81 | cmos_minutes, real_minutes); |
82 | retval = -1; | 82 | retval = -1; |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5fdc0950da1d..763df77343dd 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -427,7 +427,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
427 | 427 | ||
428 | cpumask_set_cpu(cpu, c->llc_shared_map); | 428 | cpumask_set_cpu(cpu, c->llc_shared_map); |
429 | 429 | ||
430 | if (current_cpu_data.x86_max_cores == 1) { | 430 | if (__this_cpu_read(cpu_info.x86_max_cores) == 1) { |
431 | cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); | 431 | cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); |
432 | c->booted_cores = 1; | 432 | c->booted_cores = 1; |
433 | return; | 433 | return; |
@@ -1089,7 +1089,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1089 | 1089 | ||
1090 | preempt_disable(); | 1090 | preempt_disable(); |
1091 | smp_cpu_index_default(); | 1091 | smp_cpu_index_default(); |
1092 | current_cpu_data = boot_cpu_data; | 1092 | memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info)); |
1093 | cpumask_copy(cpu_callin_mask, cpumask_of(0)); | 1093 | cpumask_copy(cpu_callin_mask, cpumask_of(0)); |
1094 | mb(); | 1094 | mb(); |
1095 | /* | 1095 | /* |
@@ -1383,7 +1383,7 @@ void play_dead_common(void) | |||
1383 | 1383 | ||
1384 | mb(); | 1384 | mb(); |
1385 | /* Ack it */ | 1385 | /* Ack it */ |
1386 | __get_cpu_var(cpu_state) = CPU_DEAD; | 1386 | __this_cpu_write(cpu_state, CPU_DEAD); |
1387 | 1387 | ||
1388 | /* | 1388 | /* |
1389 | * With physical CPU hotplug, we should halt the cpu | 1389 | * With physical CPU hotplug, we should halt the cpu |
@@ -1403,11 +1403,11 @@ static inline void mwait_play_dead(void) | |||
1403 | int i; | 1403 | int i; |
1404 | void *mwait_ptr; | 1404 | void *mwait_ptr; |
1405 | 1405 | ||
1406 | if (!cpu_has(¤t_cpu_data, X86_FEATURE_MWAIT)) | 1406 | if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_MWAIT)) |
1407 | return; | 1407 | return; |
1408 | if (!cpu_has(¤t_cpu_data, X86_FEATURE_CLFLSH)) | 1408 | if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH)) |
1409 | return; | 1409 | return; |
1410 | if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) | 1410 | if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) |
1411 | return; | 1411 | return; |
1412 | 1412 | ||
1413 | eax = CPUID_MWAIT_LEAF; | 1413 | eax = CPUID_MWAIT_LEAF; |
@@ -1458,7 +1458,7 @@ static inline void mwait_play_dead(void) | |||
1458 | 1458 | ||
1459 | static inline void hlt_play_dead(void) | 1459 | static inline void hlt_play_dead(void) |
1460 | { | 1460 | { |
1461 | if (current_cpu_data.x86 >= 4) | 1461 | if (__this_cpu_read(cpu_info.x86) >= 4) |
1462 | wbinvd(); | 1462 | wbinvd(); |
1463 | 1463 | ||
1464 | while (1) { | 1464 | while (1) { |
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index c2f1b26141e2..998e972f3b1a 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
@@ -133,7 +133,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, | |||
133 | pmd = pmd_alloc(&tboot_mm, pud, vaddr); | 133 | pmd = pmd_alloc(&tboot_mm, pud, vaddr); |
134 | if (!pmd) | 134 | if (!pmd) |
135 | return -1; | 135 | return -1; |
136 | pte = pte_alloc_map(&tboot_mm, pmd, vaddr); | 136 | pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr); |
137 | if (!pte) | 137 | if (!pte) |
138 | return -1; | 138 | return -1; |
139 | set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); | 139 | set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c76aaca5694d..b9b67166f9de 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -84,6 +84,11 @@ EXPORT_SYMBOL_GPL(used_vectors); | |||
84 | static int ignore_nmis; | 84 | static int ignore_nmis; |
85 | 85 | ||
86 | int unknown_nmi_panic; | 86 | int unknown_nmi_panic; |
87 | /* | ||
88 | * Prevent NMI reason port (0x61) being accessed simultaneously, can | ||
89 | * only be used in NMI handler. | ||
90 | */ | ||
91 | static DEFINE_RAW_SPINLOCK(nmi_reason_lock); | ||
87 | 92 | ||
88 | static inline void conditional_sti(struct pt_regs *regs) | 93 | static inline void conditional_sti(struct pt_regs *regs) |
89 | { | 94 | { |
@@ -310,15 +315,15 @@ static int __init setup_unknown_nmi_panic(char *str) | |||
310 | __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | 315 | __setup("unknown_nmi_panic", setup_unknown_nmi_panic); |
311 | 316 | ||
312 | static notrace __kprobes void | 317 | static notrace __kprobes void |
313 | mem_parity_error(unsigned char reason, struct pt_regs *regs) | 318 | pci_serr_error(unsigned char reason, struct pt_regs *regs) |
314 | { | 319 | { |
315 | printk(KERN_EMERG | 320 | pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", |
316 | "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", | 321 | reason, smp_processor_id()); |
317 | reason, smp_processor_id()); | ||
318 | |||
319 | printk(KERN_EMERG | ||
320 | "You have some hardware problem, likely on the PCI bus.\n"); | ||
321 | 322 | ||
323 | /* | ||
324 | * On some machines, PCI SERR line is used to report memory | ||
325 | * errors. EDAC makes use of it. | ||
326 | */ | ||
322 | #if defined(CONFIG_EDAC) | 327 | #if defined(CONFIG_EDAC) |
323 | if (edac_handler_set()) { | 328 | if (edac_handler_set()) { |
324 | edac_atomic_assert_error(); | 329 | edac_atomic_assert_error(); |
@@ -329,11 +334,11 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs) | |||
329 | if (panic_on_unrecovered_nmi) | 334 | if (panic_on_unrecovered_nmi) |
330 | panic("NMI: Not continuing"); | 335 | panic("NMI: Not continuing"); |
331 | 336 | ||
332 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | 337 | pr_emerg("Dazed and confused, but trying to continue\n"); |
333 | 338 | ||
334 | /* Clear and disable the memory parity error line. */ | 339 | /* Clear and disable the PCI SERR error line. */ |
335 | reason = (reason & 0xf) | 4; | 340 | reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; |
336 | outb(reason, 0x61); | 341 | outb(reason, NMI_REASON_PORT); |
337 | } | 342 | } |
338 | 343 | ||
339 | static notrace __kprobes void | 344 | static notrace __kprobes void |
@@ -341,15 +346,17 @@ io_check_error(unsigned char reason, struct pt_regs *regs) | |||
341 | { | 346 | { |
342 | unsigned long i; | 347 | unsigned long i; |
343 | 348 | ||
344 | printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); | 349 | pr_emerg( |
350 | "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", | ||
351 | reason, smp_processor_id()); | ||
345 | show_registers(regs); | 352 | show_registers(regs); |
346 | 353 | ||
347 | if (panic_on_io_nmi) | 354 | if (panic_on_io_nmi) |
348 | panic("NMI IOCK error: Not continuing"); | 355 | panic("NMI IOCK error: Not continuing"); |
349 | 356 | ||
350 | /* Re-enable the IOCK line, wait for a few seconds */ | 357 | /* Re-enable the IOCK line, wait for a few seconds */ |
351 | reason = (reason & 0xf) | 8; | 358 | reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; |
352 | outb(reason, 0x61); | 359 | outb(reason, NMI_REASON_PORT); |
353 | 360 | ||
354 | i = 20000; | 361 | i = 20000; |
355 | while (--i) { | 362 | while (--i) { |
@@ -357,8 +364,8 @@ io_check_error(unsigned char reason, struct pt_regs *regs) | |||
357 | udelay(100); | 364 | udelay(100); |
358 | } | 365 | } |
359 | 366 | ||
360 | reason &= ~8; | 367 | reason &= ~NMI_REASON_CLEAR_IOCHK; |
361 | outb(reason, 0x61); | 368 | outb(reason, NMI_REASON_PORT); |
362 | } | 369 | } |
363 | 370 | ||
364 | static notrace __kprobes void | 371 | static notrace __kprobes void |
@@ -377,57 +384,50 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | |||
377 | return; | 384 | return; |
378 | } | 385 | } |
379 | #endif | 386 | #endif |
380 | printk(KERN_EMERG | 387 | pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", |
381 | "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", | 388 | reason, smp_processor_id()); |
382 | reason, smp_processor_id()); | ||
383 | 389 | ||
384 | printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); | 390 | pr_emerg("Do you have a strange power saving mode enabled?\n"); |
385 | if (unknown_nmi_panic || panic_on_unrecovered_nmi) | 391 | if (unknown_nmi_panic || panic_on_unrecovered_nmi) |
386 | panic("NMI: Not continuing"); | 392 | panic("NMI: Not continuing"); |
387 | 393 | ||
388 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | 394 | pr_emerg("Dazed and confused, but trying to continue\n"); |
389 | } | 395 | } |
390 | 396 | ||
391 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | 397 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) |
392 | { | 398 | { |
393 | unsigned char reason = 0; | 399 | unsigned char reason = 0; |
394 | int cpu; | ||
395 | 400 | ||
396 | cpu = smp_processor_id(); | 401 | /* |
397 | 402 | * CPU-specific NMI must be processed before non-CPU-specific | |
398 | /* Only the BSP gets external NMIs from the system. */ | 403 | * NMI, otherwise we may lose it, because the CPU-specific |
399 | if (!cpu) | 404 | * NMI can not be detected/processed on other CPUs. |
400 | reason = get_nmi_reason(); | 405 | */ |
406 | if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
407 | return; | ||
401 | 408 | ||
402 | if (!(reason & 0xc0)) { | 409 | /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ |
403 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) | 410 | raw_spin_lock(&nmi_reason_lock); |
404 | == NOTIFY_STOP) | 411 | reason = get_nmi_reason(); |
405 | return; | ||
406 | 412 | ||
407 | #ifdef CONFIG_X86_LOCAL_APIC | 413 | if (reason & NMI_REASON_MASK) { |
408 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | 414 | if (reason & NMI_REASON_SERR) |
409 | == NOTIFY_STOP) | 415 | pci_serr_error(reason, regs); |
410 | return; | 416 | else if (reason & NMI_REASON_IOCHK) |
417 | io_check_error(reason, regs); | ||
418 | #ifdef CONFIG_X86_32 | ||
419 | /* | ||
420 | * Reassert NMI in case it became active | ||
421 | * meanwhile as it's edge-triggered: | ||
422 | */ | ||
423 | reassert_nmi(); | ||
411 | #endif | 424 | #endif |
412 | unknown_nmi_error(reason, regs); | 425 | raw_spin_unlock(&nmi_reason_lock); |
413 | |||
414 | return; | 426 | return; |
415 | } | 427 | } |
416 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | 428 | raw_spin_unlock(&nmi_reason_lock); |
417 | return; | ||
418 | 429 | ||
419 | /* AK: following checks seem to be broken on modern chipsets. FIXME */ | 430 | unknown_nmi_error(reason, regs); |
420 | if (reason & 0x80) | ||
421 | mem_parity_error(reason, regs); | ||
422 | if (reason & 0x40) | ||
423 | io_check_error(reason, regs); | ||
424 | #ifdef CONFIG_X86_32 | ||
425 | /* | ||
426 | * Reassert NMI in case it became active meanwhile | ||
427 | * as it's edge-triggered: | ||
428 | */ | ||
429 | reassert_nmi(); | ||
430 | #endif | ||
431 | } | 431 | } |
432 | 432 | ||
433 | dotraplinkage notrace __kprobes void | 433 | dotraplinkage notrace __kprobes void |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ae09f970e626..ffe5755caa8b 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -659,7 +659,7 @@ void restore_sched_clock_state(void) | |||
659 | 659 | ||
660 | local_irq_save(flags); | 660 | local_irq_save(flags); |
661 | 661 | ||
662 | __get_cpu_var(cyc2ns_offset) = 0; | 662 | __this_cpu_write(cyc2ns_offset, 0); |
663 | offset = cyc2ns_suspend - sched_clock(); | 663 | offset = cyc2ns_suspend - sched_clock(); |
664 | 664 | ||
665 | for_each_possible_cpu(cpu) | 665 | for_each_possible_cpu(cpu) |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 61fb98519622..863f8753ab0a 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -179,6 +179,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) | |||
179 | if (pud_none_or_clear_bad(pud)) | 179 | if (pud_none_or_clear_bad(pud)) |
180 | goto out; | 180 | goto out; |
181 | pmd = pmd_offset(pud, 0xA0000); | 181 | pmd = pmd_offset(pud, 0xA0000); |
182 | split_huge_page_pmd(mm, pmd); | ||
182 | if (pmd_none_or_clear_bad(pmd)) | 183 | if (pmd_none_or_clear_bad(pmd)) |
183 | goto out; | 184 | goto out; |
184 | pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); | 185 | pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index ddc131ff438f..50f63648ce1b 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -28,6 +28,7 @@ config KVM | |||
28 | select HAVE_KVM_IRQCHIP | 28 | select HAVE_KVM_IRQCHIP |
29 | select HAVE_KVM_EVENTFD | 29 | select HAVE_KVM_EVENTFD |
30 | select KVM_APIC_ARCHITECTURE | 30 | select KVM_APIC_ARCHITECTURE |
31 | select KVM_ASYNC_PF | ||
31 | select USER_RETURN_NOTIFIER | 32 | select USER_RETURN_NOTIFIER |
32 | select KVM_MMIO | 33 | select KVM_MMIO |
33 | ---help--- | 34 | ---help--- |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 31a7035c4bd9..f15501f431c8 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | 1 | ||
2 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm | 2 | ccflags-y += -Ivirt/kvm -Iarch/x86/kvm |
3 | 3 | ||
4 | CFLAGS_x86.o := -I. | 4 | CFLAGS_x86.o := -I. |
5 | CFLAGS_svm.o := -I. | 5 | CFLAGS_svm.o := -I. |
@@ -9,6 +9,7 @@ kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | |||
9 | coalesced_mmio.o irq_comm.o eventfd.o \ | 9 | coalesced_mmio.o irq_comm.o eventfd.o \ |
10 | assigned-dev.o) | 10 | assigned-dev.o) |
11 | kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) | 11 | kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) |
12 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) | ||
12 | 13 | ||
13 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
14 | i8254.o timer.o | 15 | i8254.o timer.o |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 38b6e8dafaff..caf966781d25 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -20,16 +20,8 @@ | |||
20 | * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 | 20 | * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 |
21 | */ | 21 | */ |
22 | 22 | ||
23 | #ifndef __KERNEL__ | ||
24 | #include <stdio.h> | ||
25 | #include <stdint.h> | ||
26 | #include <public/xen.h> | ||
27 | #define DPRINTF(_f, _a ...) printf(_f , ## _a) | ||
28 | #else | ||
29 | #include <linux/kvm_host.h> | 23 | #include <linux/kvm_host.h> |
30 | #include "kvm_cache_regs.h" | 24 | #include "kvm_cache_regs.h" |
31 | #define DPRINTF(x...) do {} while (0) | ||
32 | #endif | ||
33 | #include <linux/module.h> | 25 | #include <linux/module.h> |
34 | #include <asm/kvm_emulate.h> | 26 | #include <asm/kvm_emulate.h> |
35 | 27 | ||
@@ -418,9 +410,9 @@ address_mask(struct decode_cache *c, unsigned long reg) | |||
418 | } | 410 | } |
419 | 411 | ||
420 | static inline unsigned long | 412 | static inline unsigned long |
421 | register_address(struct decode_cache *c, unsigned long base, unsigned long reg) | 413 | register_address(struct decode_cache *c, unsigned long reg) |
422 | { | 414 | { |
423 | return base + address_mask(c, reg); | 415 | return address_mask(c, reg); |
424 | } | 416 | } |
425 | 417 | ||
426 | static inline void | 418 | static inline void |
@@ -452,60 +444,55 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, | |||
452 | return ops->get_cached_segment_base(seg, ctxt->vcpu); | 444 | return ops->get_cached_segment_base(seg, ctxt->vcpu); |
453 | } | 445 | } |
454 | 446 | ||
455 | static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt, | 447 | static unsigned seg_override(struct x86_emulate_ctxt *ctxt, |
456 | struct x86_emulate_ops *ops, | 448 | struct x86_emulate_ops *ops, |
457 | struct decode_cache *c) | 449 | struct decode_cache *c) |
458 | { | 450 | { |
459 | if (!c->has_seg_override) | 451 | if (!c->has_seg_override) |
460 | return 0; | 452 | return 0; |
461 | 453 | ||
462 | return seg_base(ctxt, ops, c->seg_override); | 454 | return c->seg_override; |
463 | } | 455 | } |
464 | 456 | ||
465 | static unsigned long es_base(struct x86_emulate_ctxt *ctxt, | 457 | static ulong linear(struct x86_emulate_ctxt *ctxt, |
466 | struct x86_emulate_ops *ops) | 458 | struct segmented_address addr) |
467 | { | 459 | { |
468 | return seg_base(ctxt, ops, VCPU_SREG_ES); | 460 | struct decode_cache *c = &ctxt->decode; |
469 | } | 461 | ulong la; |
470 | |||
471 | static unsigned long ss_base(struct x86_emulate_ctxt *ctxt, | ||
472 | struct x86_emulate_ops *ops) | ||
473 | { | ||
474 | return seg_base(ctxt, ops, VCPU_SREG_SS); | ||
475 | } | ||
476 | 462 | ||
477 | static void emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, | 463 | la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; |
478 | u32 error, bool valid) | 464 | if (c->ad_bytes != 8) |
479 | { | 465 | la &= (u32)-1; |
480 | ctxt->exception = vec; | 466 | return la; |
481 | ctxt->error_code = error; | ||
482 | ctxt->error_code_valid = valid; | ||
483 | } | 467 | } |
484 | 468 | ||
485 | static void emulate_gp(struct x86_emulate_ctxt *ctxt, int err) | 469 | static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, |
470 | u32 error, bool valid) | ||
486 | { | 471 | { |
487 | emulate_exception(ctxt, GP_VECTOR, err, true); | 472 | ctxt->exception.vector = vec; |
473 | ctxt->exception.error_code = error; | ||
474 | ctxt->exception.error_code_valid = valid; | ||
475 | return X86EMUL_PROPAGATE_FAULT; | ||
488 | } | 476 | } |
489 | 477 | ||
490 | static void emulate_pf(struct x86_emulate_ctxt *ctxt) | 478 | static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err) |
491 | { | 479 | { |
492 | emulate_exception(ctxt, PF_VECTOR, 0, true); | 480 | return emulate_exception(ctxt, GP_VECTOR, err, true); |
493 | } | 481 | } |
494 | 482 | ||
495 | static void emulate_ud(struct x86_emulate_ctxt *ctxt) | 483 | static int emulate_ud(struct x86_emulate_ctxt *ctxt) |
496 | { | 484 | { |
497 | emulate_exception(ctxt, UD_VECTOR, 0, false); | 485 | return emulate_exception(ctxt, UD_VECTOR, 0, false); |
498 | } | 486 | } |
499 | 487 | ||
500 | static void emulate_ts(struct x86_emulate_ctxt *ctxt, int err) | 488 | static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err) |
501 | { | 489 | { |
502 | emulate_exception(ctxt, TS_VECTOR, err, true); | 490 | return emulate_exception(ctxt, TS_VECTOR, err, true); |
503 | } | 491 | } |
504 | 492 | ||
505 | static int emulate_de(struct x86_emulate_ctxt *ctxt) | 493 | static int emulate_de(struct x86_emulate_ctxt *ctxt) |
506 | { | 494 | { |
507 | emulate_exception(ctxt, DE_VECTOR, 0, false); | 495 | return emulate_exception(ctxt, DE_VECTOR, 0, false); |
508 | return X86EMUL_PROPAGATE_FAULT; | ||
509 | } | 496 | } |
510 | 497 | ||
511 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 498 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, |
@@ -520,7 +507,7 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | |||
520 | cur_size = fc->end - fc->start; | 507 | cur_size = fc->end - fc->start; |
521 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); | 508 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); |
522 | rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, | 509 | rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, |
523 | size, ctxt->vcpu, NULL); | 510 | size, ctxt->vcpu, &ctxt->exception); |
524 | if (rc != X86EMUL_CONTINUE) | 511 | if (rc != X86EMUL_CONTINUE) |
525 | return rc; | 512 | return rc; |
526 | fc->end += size; | 513 | fc->end += size; |
@@ -564,7 +551,7 @@ static void *decode_register(u8 modrm_reg, unsigned long *regs, | |||
564 | 551 | ||
565 | static int read_descriptor(struct x86_emulate_ctxt *ctxt, | 552 | static int read_descriptor(struct x86_emulate_ctxt *ctxt, |
566 | struct x86_emulate_ops *ops, | 553 | struct x86_emulate_ops *ops, |
567 | ulong addr, | 554 | struct segmented_address addr, |
568 | u16 *size, unsigned long *address, int op_bytes) | 555 | u16 *size, unsigned long *address, int op_bytes) |
569 | { | 556 | { |
570 | int rc; | 557 | int rc; |
@@ -572,10 +559,13 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
572 | if (op_bytes == 2) | 559 | if (op_bytes == 2) |
573 | op_bytes = 3; | 560 | op_bytes = 3; |
574 | *address = 0; | 561 | *address = 0; |
575 | rc = ops->read_std(addr, (unsigned long *)size, 2, ctxt->vcpu, NULL); | 562 | rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2, |
563 | ctxt->vcpu, &ctxt->exception); | ||
576 | if (rc != X86EMUL_CONTINUE) | 564 | if (rc != X86EMUL_CONTINUE) |
577 | return rc; | 565 | return rc; |
578 | rc = ops->read_std(addr + 2, address, op_bytes, ctxt->vcpu, NULL); | 566 | addr.ea += 2; |
567 | rc = ops->read_std(linear(ctxt, addr), address, op_bytes, | ||
568 | ctxt->vcpu, &ctxt->exception); | ||
579 | return rc; | 569 | return rc; |
580 | } | 570 | } |
581 | 571 | ||
@@ -768,7 +758,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
768 | break; | 758 | break; |
769 | } | 759 | } |
770 | } | 760 | } |
771 | op->addr.mem = modrm_ea; | 761 | op->addr.mem.ea = modrm_ea; |
772 | done: | 762 | done: |
773 | return rc; | 763 | return rc; |
774 | } | 764 | } |
@@ -783,13 +773,13 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt, | |||
783 | op->type = OP_MEM; | 773 | op->type = OP_MEM; |
784 | switch (c->ad_bytes) { | 774 | switch (c->ad_bytes) { |
785 | case 2: | 775 | case 2: |
786 | op->addr.mem = insn_fetch(u16, 2, c->eip); | 776 | op->addr.mem.ea = insn_fetch(u16, 2, c->eip); |
787 | break; | 777 | break; |
788 | case 4: | 778 | case 4: |
789 | op->addr.mem = insn_fetch(u32, 4, c->eip); | 779 | op->addr.mem.ea = insn_fetch(u32, 4, c->eip); |
790 | break; | 780 | break; |
791 | case 8: | 781 | case 8: |
792 | op->addr.mem = insn_fetch(u64, 8, c->eip); | 782 | op->addr.mem.ea = insn_fetch(u64, 8, c->eip); |
793 | break; | 783 | break; |
794 | } | 784 | } |
795 | done: | 785 | done: |
@@ -808,7 +798,7 @@ static void fetch_bit_operand(struct decode_cache *c) | |||
808 | else if (c->src.bytes == 4) | 798 | else if (c->src.bytes == 4) |
809 | sv = (s32)c->src.val & (s32)mask; | 799 | sv = (s32)c->src.val & (s32)mask; |
810 | 800 | ||
811 | c->dst.addr.mem += (sv >> 3); | 801 | c->dst.addr.mem.ea += (sv >> 3); |
812 | } | 802 | } |
813 | 803 | ||
814 | /* only subword offset */ | 804 | /* only subword offset */ |
@@ -821,7 +811,6 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, | |||
821 | { | 811 | { |
822 | int rc; | 812 | int rc; |
823 | struct read_cache *mc = &ctxt->decode.mem_read; | 813 | struct read_cache *mc = &ctxt->decode.mem_read; |
824 | u32 err; | ||
825 | 814 | ||
826 | while (size) { | 815 | while (size) { |
827 | int n = min(size, 8u); | 816 | int n = min(size, 8u); |
@@ -829,10 +818,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, | |||
829 | if (mc->pos < mc->end) | 818 | if (mc->pos < mc->end) |
830 | goto read_cached; | 819 | goto read_cached; |
831 | 820 | ||
832 | rc = ops->read_emulated(addr, mc->data + mc->end, n, &err, | 821 | rc = ops->read_emulated(addr, mc->data + mc->end, n, |
833 | ctxt->vcpu); | 822 | &ctxt->exception, ctxt->vcpu); |
834 | if (rc == X86EMUL_PROPAGATE_FAULT) | ||
835 | emulate_pf(ctxt); | ||
836 | if (rc != X86EMUL_CONTINUE) | 823 | if (rc != X86EMUL_CONTINUE) |
837 | return rc; | 824 | return rc; |
838 | mc->end += n; | 825 | mc->end += n; |
@@ -907,19 +894,15 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
907 | struct desc_ptr dt; | 894 | struct desc_ptr dt; |
908 | u16 index = selector >> 3; | 895 | u16 index = selector >> 3; |
909 | int ret; | 896 | int ret; |
910 | u32 err; | ||
911 | ulong addr; | 897 | ulong addr; |
912 | 898 | ||
913 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | 899 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); |
914 | 900 | ||
915 | if (dt.size < index * 8 + 7) { | 901 | if (dt.size < index * 8 + 7) |
916 | emulate_gp(ctxt, selector & 0xfffc); | 902 | return emulate_gp(ctxt, selector & 0xfffc); |
917 | return X86EMUL_PROPAGATE_FAULT; | ||
918 | } | ||
919 | addr = dt.address + index * 8; | 903 | addr = dt.address + index * 8; |
920 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | 904 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, |
921 | if (ret == X86EMUL_PROPAGATE_FAULT) | 905 | &ctxt->exception); |
922 | emulate_pf(ctxt); | ||
923 | 906 | ||
924 | return ret; | 907 | return ret; |
925 | } | 908 | } |
@@ -931,21 +914,17 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
931 | { | 914 | { |
932 | struct desc_ptr dt; | 915 | struct desc_ptr dt; |
933 | u16 index = selector >> 3; | 916 | u16 index = selector >> 3; |
934 | u32 err; | ||
935 | ulong addr; | 917 | ulong addr; |
936 | int ret; | 918 | int ret; |
937 | 919 | ||
938 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | 920 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); |
939 | 921 | ||
940 | if (dt.size < index * 8 + 7) { | 922 | if (dt.size < index * 8 + 7) |
941 | emulate_gp(ctxt, selector & 0xfffc); | 923 | return emulate_gp(ctxt, selector & 0xfffc); |
942 | return X86EMUL_PROPAGATE_FAULT; | ||
943 | } | ||
944 | 924 | ||
945 | addr = dt.address + index * 8; | 925 | addr = dt.address + index * 8; |
946 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | 926 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, |
947 | if (ret == X86EMUL_PROPAGATE_FAULT) | 927 | &ctxt->exception); |
948 | emulate_pf(ctxt); | ||
949 | 928 | ||
950 | return ret; | 929 | return ret; |
951 | } | 930 | } |
@@ -1092,7 +1071,6 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1092 | { | 1071 | { |
1093 | int rc; | 1072 | int rc; |
1094 | struct decode_cache *c = &ctxt->decode; | 1073 | struct decode_cache *c = &ctxt->decode; |
1095 | u32 err; | ||
1096 | 1074 | ||
1097 | switch (c->dst.type) { | 1075 | switch (c->dst.type) { |
1098 | case OP_REG: | 1076 | case OP_REG: |
@@ -1101,21 +1079,19 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1101 | case OP_MEM: | 1079 | case OP_MEM: |
1102 | if (c->lock_prefix) | 1080 | if (c->lock_prefix) |
1103 | rc = ops->cmpxchg_emulated( | 1081 | rc = ops->cmpxchg_emulated( |
1104 | c->dst.addr.mem, | 1082 | linear(ctxt, c->dst.addr.mem), |
1105 | &c->dst.orig_val, | 1083 | &c->dst.orig_val, |
1106 | &c->dst.val, | 1084 | &c->dst.val, |
1107 | c->dst.bytes, | 1085 | c->dst.bytes, |
1108 | &err, | 1086 | &ctxt->exception, |
1109 | ctxt->vcpu); | 1087 | ctxt->vcpu); |
1110 | else | 1088 | else |
1111 | rc = ops->write_emulated( | 1089 | rc = ops->write_emulated( |
1112 | c->dst.addr.mem, | 1090 | linear(ctxt, c->dst.addr.mem), |
1113 | &c->dst.val, | 1091 | &c->dst.val, |
1114 | c->dst.bytes, | 1092 | c->dst.bytes, |
1115 | &err, | 1093 | &ctxt->exception, |
1116 | ctxt->vcpu); | 1094 | ctxt->vcpu); |
1117 | if (rc == X86EMUL_PROPAGATE_FAULT) | ||
1118 | emulate_pf(ctxt); | ||
1119 | if (rc != X86EMUL_CONTINUE) | 1095 | if (rc != X86EMUL_CONTINUE) |
1120 | return rc; | 1096 | return rc; |
1121 | break; | 1097 | break; |
@@ -1137,8 +1113,8 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt, | |||
1137 | c->dst.bytes = c->op_bytes; | 1113 | c->dst.bytes = c->op_bytes; |
1138 | c->dst.val = c->src.val; | 1114 | c->dst.val = c->src.val; |
1139 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); | 1115 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); |
1140 | c->dst.addr.mem = register_address(c, ss_base(ctxt, ops), | 1116 | c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]); |
1141 | c->regs[VCPU_REGS_RSP]); | 1117 | c->dst.addr.mem.seg = VCPU_SREG_SS; |
1142 | } | 1118 | } |
1143 | 1119 | ||
1144 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, | 1120 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, |
@@ -1147,10 +1123,11 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
1147 | { | 1123 | { |
1148 | struct decode_cache *c = &ctxt->decode; | 1124 | struct decode_cache *c = &ctxt->decode; |
1149 | int rc; | 1125 | int rc; |
1126 | struct segmented_address addr; | ||
1150 | 1127 | ||
1151 | rc = read_emulated(ctxt, ops, register_address(c, ss_base(ctxt, ops), | 1128 | addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); |
1152 | c->regs[VCPU_REGS_RSP]), | 1129 | addr.seg = VCPU_SREG_SS; |
1153 | dest, len); | 1130 | rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len); |
1154 | if (rc != X86EMUL_CONTINUE) | 1131 | if (rc != X86EMUL_CONTINUE) |
1155 | return rc; | 1132 | return rc; |
1156 | 1133 | ||
@@ -1184,10 +1161,8 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1184 | change_mask |= EFLG_IF; | 1161 | change_mask |= EFLG_IF; |
1185 | break; | 1162 | break; |
1186 | case X86EMUL_MODE_VM86: | 1163 | case X86EMUL_MODE_VM86: |
1187 | if (iopl < 3) { | 1164 | if (iopl < 3) |
1188 | emulate_gp(ctxt, 0); | 1165 | return emulate_gp(ctxt, 0); |
1189 | return X86EMUL_PROPAGATE_FAULT; | ||
1190 | } | ||
1191 | change_mask |= EFLG_IF; | 1166 | change_mask |= EFLG_IF; |
1192 | break; | 1167 | break; |
1193 | default: /* real mode */ | 1168 | default: /* real mode */ |
@@ -1198,9 +1173,6 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1198 | *(unsigned long *)dest = | 1173 | *(unsigned long *)dest = |
1199 | (ctxt->eflags & ~change_mask) | (val & change_mask); | 1174 | (ctxt->eflags & ~change_mask) | (val & change_mask); |
1200 | 1175 | ||
1201 | if (rc == X86EMUL_PROPAGATE_FAULT) | ||
1202 | emulate_pf(ctxt); | ||
1203 | |||
1204 | return rc; | 1176 | return rc; |
1205 | } | 1177 | } |
1206 | 1178 | ||
@@ -1287,7 +1259,6 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, | |||
1287 | gva_t cs_addr; | 1259 | gva_t cs_addr; |
1288 | gva_t eip_addr; | 1260 | gva_t eip_addr; |
1289 | u16 cs, eip; | 1261 | u16 cs, eip; |
1290 | u32 err; | ||
1291 | 1262 | ||
1292 | /* TODO: Add limit checks */ | 1263 | /* TODO: Add limit checks */ |
1293 | c->src.val = ctxt->eflags; | 1264 | c->src.val = ctxt->eflags; |
@@ -1317,11 +1288,11 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, | |||
1317 | eip_addr = dt.address + (irq << 2); | 1288 | eip_addr = dt.address + (irq << 2); |
1318 | cs_addr = dt.address + (irq << 2) + 2; | 1289 | cs_addr = dt.address + (irq << 2) + 2; |
1319 | 1290 | ||
1320 | rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &err); | 1291 | rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception); |
1321 | if (rc != X86EMUL_CONTINUE) | 1292 | if (rc != X86EMUL_CONTINUE) |
1322 | return rc; | 1293 | return rc; |
1323 | 1294 | ||
1324 | rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &err); | 1295 | rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception); |
1325 | if (rc != X86EMUL_CONTINUE) | 1296 | if (rc != X86EMUL_CONTINUE) |
1326 | return rc; | 1297 | return rc; |
1327 | 1298 | ||
@@ -1370,10 +1341,8 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, | |||
1370 | if (rc != X86EMUL_CONTINUE) | 1341 | if (rc != X86EMUL_CONTINUE) |
1371 | return rc; | 1342 | return rc; |
1372 | 1343 | ||
1373 | if (temp_eip & ~0xffff) { | 1344 | if (temp_eip & ~0xffff) |
1374 | emulate_gp(ctxt, 0); | 1345 | return emulate_gp(ctxt, 0); |
1375 | return X86EMUL_PROPAGATE_FAULT; | ||
1376 | } | ||
1377 | 1346 | ||
1378 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); | 1347 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); |
1379 | 1348 | ||
@@ -1624,10 +1593,8 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1624 | 1593 | ||
1625 | /* syscall is not available in real mode */ | 1594 | /* syscall is not available in real mode */ |
1626 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1595 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1627 | ctxt->mode == X86EMUL_MODE_VM86) { | 1596 | ctxt->mode == X86EMUL_MODE_VM86) |
1628 | emulate_ud(ctxt); | 1597 | return emulate_ud(ctxt); |
1629 | return X86EMUL_PROPAGATE_FAULT; | ||
1630 | } | ||
1631 | 1598 | ||
1632 | setup_syscalls_segments(ctxt, ops, &cs, &ss); | 1599 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1633 | 1600 | ||
@@ -1678,34 +1645,26 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1678 | u16 cs_sel, ss_sel; | 1645 | u16 cs_sel, ss_sel; |
1679 | 1646 | ||
1680 | /* inject #GP if in real mode */ | 1647 | /* inject #GP if in real mode */ |
1681 | if (ctxt->mode == X86EMUL_MODE_REAL) { | 1648 | if (ctxt->mode == X86EMUL_MODE_REAL) |
1682 | emulate_gp(ctxt, 0); | 1649 | return emulate_gp(ctxt, 0); |
1683 | return X86EMUL_PROPAGATE_FAULT; | ||
1684 | } | ||
1685 | 1650 | ||
1686 | /* XXX sysenter/sysexit have not been tested in 64bit mode. | 1651 | /* XXX sysenter/sysexit have not been tested in 64bit mode. |
1687 | * Therefore, we inject an #UD. | 1652 | * Therefore, we inject an #UD. |
1688 | */ | 1653 | */ |
1689 | if (ctxt->mode == X86EMUL_MODE_PROT64) { | 1654 | if (ctxt->mode == X86EMUL_MODE_PROT64) |
1690 | emulate_ud(ctxt); | 1655 | return emulate_ud(ctxt); |
1691 | return X86EMUL_PROPAGATE_FAULT; | ||
1692 | } | ||
1693 | 1656 | ||
1694 | setup_syscalls_segments(ctxt, ops, &cs, &ss); | 1657 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1695 | 1658 | ||
1696 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); | 1659 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); |
1697 | switch (ctxt->mode) { | 1660 | switch (ctxt->mode) { |
1698 | case X86EMUL_MODE_PROT32: | 1661 | case X86EMUL_MODE_PROT32: |
1699 | if ((msr_data & 0xfffc) == 0x0) { | 1662 | if ((msr_data & 0xfffc) == 0x0) |
1700 | emulate_gp(ctxt, 0); | 1663 | return emulate_gp(ctxt, 0); |
1701 | return X86EMUL_PROPAGATE_FAULT; | ||
1702 | } | ||
1703 | break; | 1664 | break; |
1704 | case X86EMUL_MODE_PROT64: | 1665 | case X86EMUL_MODE_PROT64: |
1705 | if (msr_data == 0x0) { | 1666 | if (msr_data == 0x0) |
1706 | emulate_gp(ctxt, 0); | 1667 | return emulate_gp(ctxt, 0); |
1707 | return X86EMUL_PROPAGATE_FAULT; | ||
1708 | } | ||
1709 | break; | 1668 | break; |
1710 | } | 1669 | } |
1711 | 1670 | ||
@@ -1745,10 +1704,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1745 | 1704 | ||
1746 | /* inject #GP if in real mode or Virtual 8086 mode */ | 1705 | /* inject #GP if in real mode or Virtual 8086 mode */ |
1747 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1706 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1748 | ctxt->mode == X86EMUL_MODE_VM86) { | 1707 | ctxt->mode == X86EMUL_MODE_VM86) |
1749 | emulate_gp(ctxt, 0); | 1708 | return emulate_gp(ctxt, 0); |
1750 | return X86EMUL_PROPAGATE_FAULT; | ||
1751 | } | ||
1752 | 1709 | ||
1753 | setup_syscalls_segments(ctxt, ops, &cs, &ss); | 1710 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1754 | 1711 | ||
@@ -1763,18 +1720,14 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1763 | switch (usermode) { | 1720 | switch (usermode) { |
1764 | case X86EMUL_MODE_PROT32: | 1721 | case X86EMUL_MODE_PROT32: |
1765 | cs_sel = (u16)(msr_data + 16); | 1722 | cs_sel = (u16)(msr_data + 16); |
1766 | if ((msr_data & 0xfffc) == 0x0) { | 1723 | if ((msr_data & 0xfffc) == 0x0) |
1767 | emulate_gp(ctxt, 0); | 1724 | return emulate_gp(ctxt, 0); |
1768 | return X86EMUL_PROPAGATE_FAULT; | ||
1769 | } | ||
1770 | ss_sel = (u16)(msr_data + 24); | 1725 | ss_sel = (u16)(msr_data + 24); |
1771 | break; | 1726 | break; |
1772 | case X86EMUL_MODE_PROT64: | 1727 | case X86EMUL_MODE_PROT64: |
1773 | cs_sel = (u16)(msr_data + 32); | 1728 | cs_sel = (u16)(msr_data + 32); |
1774 | if (msr_data == 0x0) { | 1729 | if (msr_data == 0x0) |
1775 | emulate_gp(ctxt, 0); | 1730 | return emulate_gp(ctxt, 0); |
1776 | return X86EMUL_PROPAGATE_FAULT; | ||
1777 | } | ||
1778 | ss_sel = cs_sel + 8; | 1731 | ss_sel = cs_sel + 8; |
1779 | cs.d = 0; | 1732 | cs.d = 0; |
1780 | cs.l = 1; | 1733 | cs.l = 1; |
@@ -1934,33 +1887,27 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
1934 | { | 1887 | { |
1935 | struct tss_segment_16 tss_seg; | 1888 | struct tss_segment_16 tss_seg; |
1936 | int ret; | 1889 | int ret; |
1937 | u32 err, new_tss_base = get_desc_base(new_desc); | 1890 | u32 new_tss_base = get_desc_base(new_desc); |
1938 | 1891 | ||
1939 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 1892 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, |
1940 | &err); | 1893 | &ctxt->exception); |
1941 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 1894 | if (ret != X86EMUL_CONTINUE) |
1942 | /* FIXME: need to provide precise fault address */ | 1895 | /* FIXME: need to provide precise fault address */ |
1943 | emulate_pf(ctxt); | ||
1944 | return ret; | 1896 | return ret; |
1945 | } | ||
1946 | 1897 | ||
1947 | save_state_to_tss16(ctxt, ops, &tss_seg); | 1898 | save_state_to_tss16(ctxt, ops, &tss_seg); |
1948 | 1899 | ||
1949 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 1900 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, |
1950 | &err); | 1901 | &ctxt->exception); |
1951 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 1902 | if (ret != X86EMUL_CONTINUE) |
1952 | /* FIXME: need to provide precise fault address */ | 1903 | /* FIXME: need to provide precise fault address */ |
1953 | emulate_pf(ctxt); | ||
1954 | return ret; | 1904 | return ret; |
1955 | } | ||
1956 | 1905 | ||
1957 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 1906 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, |
1958 | &err); | 1907 | &ctxt->exception); |
1959 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 1908 | if (ret != X86EMUL_CONTINUE) |
1960 | /* FIXME: need to provide precise fault address */ | 1909 | /* FIXME: need to provide precise fault address */ |
1961 | emulate_pf(ctxt); | ||
1962 | return ret; | 1910 | return ret; |
1963 | } | ||
1964 | 1911 | ||
1965 | if (old_tss_sel != 0xffff) { | 1912 | if (old_tss_sel != 0xffff) { |
1966 | tss_seg.prev_task_link = old_tss_sel; | 1913 | tss_seg.prev_task_link = old_tss_sel; |
@@ -1968,12 +1915,10 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
1968 | ret = ops->write_std(new_tss_base, | 1915 | ret = ops->write_std(new_tss_base, |
1969 | &tss_seg.prev_task_link, | 1916 | &tss_seg.prev_task_link, |
1970 | sizeof tss_seg.prev_task_link, | 1917 | sizeof tss_seg.prev_task_link, |
1971 | ctxt->vcpu, &err); | 1918 | ctxt->vcpu, &ctxt->exception); |
1972 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 1919 | if (ret != X86EMUL_CONTINUE) |
1973 | /* FIXME: need to provide precise fault address */ | 1920 | /* FIXME: need to provide precise fault address */ |
1974 | emulate_pf(ctxt); | ||
1975 | return ret; | 1921 | return ret; |
1976 | } | ||
1977 | } | 1922 | } |
1978 | 1923 | ||
1979 | return load_state_from_tss16(ctxt, ops, &tss_seg); | 1924 | return load_state_from_tss16(ctxt, ops, &tss_seg); |
@@ -2013,10 +1958,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2013 | struct decode_cache *c = &ctxt->decode; | 1958 | struct decode_cache *c = &ctxt->decode; |
2014 | int ret; | 1959 | int ret; |
2015 | 1960 | ||
2016 | if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) { | 1961 | if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) |
2017 | emulate_gp(ctxt, 0); | 1962 | return emulate_gp(ctxt, 0); |
2018 | return X86EMUL_PROPAGATE_FAULT; | ||
2019 | } | ||
2020 | c->eip = tss->eip; | 1963 | c->eip = tss->eip; |
2021 | ctxt->eflags = tss->eflags | 2; | 1964 | ctxt->eflags = tss->eflags | 2; |
2022 | c->regs[VCPU_REGS_RAX] = tss->eax; | 1965 | c->regs[VCPU_REGS_RAX] = tss->eax; |
@@ -2076,33 +2019,27 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2076 | { | 2019 | { |
2077 | struct tss_segment_32 tss_seg; | 2020 | struct tss_segment_32 tss_seg; |
2078 | int ret; | 2021 | int ret; |
2079 | u32 err, new_tss_base = get_desc_base(new_desc); | 2022 | u32 new_tss_base = get_desc_base(new_desc); |
2080 | 2023 | ||
2081 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2024 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, |
2082 | &err); | 2025 | &ctxt->exception); |
2083 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2026 | if (ret != X86EMUL_CONTINUE) |
2084 | /* FIXME: need to provide precise fault address */ | 2027 | /* FIXME: need to provide precise fault address */ |
2085 | emulate_pf(ctxt); | ||
2086 | return ret; | 2028 | return ret; |
2087 | } | ||
2088 | 2029 | ||
2089 | save_state_to_tss32(ctxt, ops, &tss_seg); | 2030 | save_state_to_tss32(ctxt, ops, &tss_seg); |
2090 | 2031 | ||
2091 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2032 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, |
2092 | &err); | 2033 | &ctxt->exception); |
2093 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2034 | if (ret != X86EMUL_CONTINUE) |
2094 | /* FIXME: need to provide precise fault address */ | 2035 | /* FIXME: need to provide precise fault address */ |
2095 | emulate_pf(ctxt); | ||
2096 | return ret; | 2036 | return ret; |
2097 | } | ||
2098 | 2037 | ||
2099 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | 2038 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, |
2100 | &err); | 2039 | &ctxt->exception); |
2101 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2040 | if (ret != X86EMUL_CONTINUE) |
2102 | /* FIXME: need to provide precise fault address */ | 2041 | /* FIXME: need to provide precise fault address */ |
2103 | emulate_pf(ctxt); | ||
2104 | return ret; | 2042 | return ret; |
2105 | } | ||
2106 | 2043 | ||
2107 | if (old_tss_sel != 0xffff) { | 2044 | if (old_tss_sel != 0xffff) { |
2108 | tss_seg.prev_task_link = old_tss_sel; | 2045 | tss_seg.prev_task_link = old_tss_sel; |
@@ -2110,12 +2047,10 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2110 | ret = ops->write_std(new_tss_base, | 2047 | ret = ops->write_std(new_tss_base, |
2111 | &tss_seg.prev_task_link, | 2048 | &tss_seg.prev_task_link, |
2112 | sizeof tss_seg.prev_task_link, | 2049 | sizeof tss_seg.prev_task_link, |
2113 | ctxt->vcpu, &err); | 2050 | ctxt->vcpu, &ctxt->exception); |
2114 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2051 | if (ret != X86EMUL_CONTINUE) |
2115 | /* FIXME: need to provide precise fault address */ | 2052 | /* FIXME: need to provide precise fault address */ |
2116 | emulate_pf(ctxt); | ||
2117 | return ret; | 2053 | return ret; |
2118 | } | ||
2119 | } | 2054 | } |
2120 | 2055 | ||
2121 | return load_state_from_tss32(ctxt, ops, &tss_seg); | 2056 | return load_state_from_tss32(ctxt, ops, &tss_seg); |
@@ -2146,10 +2081,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2146 | 2081 | ||
2147 | if (reason != TASK_SWITCH_IRET) { | 2082 | if (reason != TASK_SWITCH_IRET) { |
2148 | if ((tss_selector & 3) > next_tss_desc.dpl || | 2083 | if ((tss_selector & 3) > next_tss_desc.dpl || |
2149 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { | 2084 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) |
2150 | emulate_gp(ctxt, 0); | 2085 | return emulate_gp(ctxt, 0); |
2151 | return X86EMUL_PROPAGATE_FAULT; | ||
2152 | } | ||
2153 | } | 2086 | } |
2154 | 2087 | ||
2155 | desc_limit = desc_limit_scaled(&next_tss_desc); | 2088 | desc_limit = desc_limit_scaled(&next_tss_desc); |
@@ -2231,14 +2164,15 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2231 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 2164 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
2232 | } | 2165 | } |
2233 | 2166 | ||
2234 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base, | 2167 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, |
2235 | int reg, struct operand *op) | 2168 | int reg, struct operand *op) |
2236 | { | 2169 | { |
2237 | struct decode_cache *c = &ctxt->decode; | 2170 | struct decode_cache *c = &ctxt->decode; |
2238 | int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; | 2171 | int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; |
2239 | 2172 | ||
2240 | register_address_increment(c, &c->regs[reg], df * op->bytes); | 2173 | register_address_increment(c, &c->regs[reg], df * op->bytes); |
2241 | op->addr.mem = register_address(c, base, c->regs[reg]); | 2174 | op->addr.mem.ea = register_address(c, c->regs[reg]); |
2175 | op->addr.mem.seg = seg; | ||
2242 | } | 2176 | } |
2243 | 2177 | ||
2244 | static int em_push(struct x86_emulate_ctxt *ctxt) | 2178 | static int em_push(struct x86_emulate_ctxt *ctxt) |
@@ -2369,10 +2303,8 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt) | |||
2369 | struct decode_cache *c = &ctxt->decode; | 2303 | struct decode_cache *c = &ctxt->decode; |
2370 | u64 tsc = 0; | 2304 | u64 tsc = 0; |
2371 | 2305 | ||
2372 | if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) { | 2306 | if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) |
2373 | emulate_gp(ctxt, 0); | 2307 | return emulate_gp(ctxt, 0); |
2374 | return X86EMUL_PROPAGATE_FAULT; | ||
2375 | } | ||
2376 | ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc); | 2308 | ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc); |
2377 | c->regs[VCPU_REGS_RAX] = (u32)tsc; | 2309 | c->regs[VCPU_REGS_RAX] = (u32)tsc; |
2378 | c->regs[VCPU_REGS_RDX] = tsc >> 32; | 2310 | c->regs[VCPU_REGS_RDX] = tsc >> 32; |
@@ -2647,7 +2579,7 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
2647 | 2579 | ||
2648 | op->type = OP_IMM; | 2580 | op->type = OP_IMM; |
2649 | op->bytes = size; | 2581 | op->bytes = size; |
2650 | op->addr.mem = c->eip; | 2582 | op->addr.mem.ea = c->eip; |
2651 | /* NB. Immediates are sign-extended as necessary. */ | 2583 | /* NB. Immediates are sign-extended as necessary. */ |
2652 | switch (op->bytes) { | 2584 | switch (op->bytes) { |
2653 | case 1: | 2585 | case 1: |
@@ -2678,7 +2610,7 @@ done: | |||
2678 | } | 2610 | } |
2679 | 2611 | ||
2680 | int | 2612 | int |
2681 | x86_decode_insn(struct x86_emulate_ctxt *ctxt) | 2613 | x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) |
2682 | { | 2614 | { |
2683 | struct x86_emulate_ops *ops = ctxt->ops; | 2615 | struct x86_emulate_ops *ops = ctxt->ops; |
2684 | struct decode_cache *c = &ctxt->decode; | 2616 | struct decode_cache *c = &ctxt->decode; |
@@ -2689,7 +2621,10 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt) | |||
2689 | struct operand memop = { .type = OP_NONE }; | 2621 | struct operand memop = { .type = OP_NONE }; |
2690 | 2622 | ||
2691 | c->eip = ctxt->eip; | 2623 | c->eip = ctxt->eip; |
2692 | c->fetch.start = c->fetch.end = c->eip; | 2624 | c->fetch.start = c->eip; |
2625 | c->fetch.end = c->fetch.start + insn_len; | ||
2626 | if (insn_len > 0) | ||
2627 | memcpy(c->fetch.data, insn, insn_len); | ||
2693 | ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); | 2628 | ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); |
2694 | 2629 | ||
2695 | switch (mode) { | 2630 | switch (mode) { |
@@ -2803,10 +2738,8 @@ done_prefixes: | |||
2803 | c->execute = opcode.u.execute; | 2738 | c->execute = opcode.u.execute; |
2804 | 2739 | ||
2805 | /* Unrecognised? */ | 2740 | /* Unrecognised? */ |
2806 | if (c->d == 0 || (c->d & Undefined)) { | 2741 | if (c->d == 0 || (c->d & Undefined)) |
2807 | DPRINTF("Cannot emulate %02x\n", c->b); | ||
2808 | return -1; | 2742 | return -1; |
2809 | } | ||
2810 | 2743 | ||
2811 | if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) | 2744 | if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) |
2812 | c->op_bytes = 8; | 2745 | c->op_bytes = 8; |
@@ -2831,14 +2764,13 @@ done_prefixes: | |||
2831 | if (!c->has_seg_override) | 2764 | if (!c->has_seg_override) |
2832 | set_seg_override(c, VCPU_SREG_DS); | 2765 | set_seg_override(c, VCPU_SREG_DS); |
2833 | 2766 | ||
2834 | if (memop.type == OP_MEM && !(!c->twobyte && c->b == 0x8d)) | 2767 | memop.addr.mem.seg = seg_override(ctxt, ops, c); |
2835 | memop.addr.mem += seg_override_base(ctxt, ops, c); | ||
2836 | 2768 | ||
2837 | if (memop.type == OP_MEM && c->ad_bytes != 8) | 2769 | if (memop.type == OP_MEM && c->ad_bytes != 8) |
2838 | memop.addr.mem = (u32)memop.addr.mem; | 2770 | memop.addr.mem.ea = (u32)memop.addr.mem.ea; |
2839 | 2771 | ||
2840 | if (memop.type == OP_MEM && c->rip_relative) | 2772 | if (memop.type == OP_MEM && c->rip_relative) |
2841 | memop.addr.mem += c->eip; | 2773 | memop.addr.mem.ea += c->eip; |
2842 | 2774 | ||
2843 | /* | 2775 | /* |
2844 | * Decode and fetch the source operand: register, memory | 2776 | * Decode and fetch the source operand: register, memory |
@@ -2890,14 +2822,14 @@ done_prefixes: | |||
2890 | case SrcSI: | 2822 | case SrcSI: |
2891 | c->src.type = OP_MEM; | 2823 | c->src.type = OP_MEM; |
2892 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 2824 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
2893 | c->src.addr.mem = | 2825 | c->src.addr.mem.ea = |
2894 | register_address(c, seg_override_base(ctxt, ops, c), | 2826 | register_address(c, c->regs[VCPU_REGS_RSI]); |
2895 | c->regs[VCPU_REGS_RSI]); | 2827 | c->src.addr.mem.seg = seg_override(ctxt, ops, c), |
2896 | c->src.val = 0; | 2828 | c->src.val = 0; |
2897 | break; | 2829 | break; |
2898 | case SrcImmFAddr: | 2830 | case SrcImmFAddr: |
2899 | c->src.type = OP_IMM; | 2831 | c->src.type = OP_IMM; |
2900 | c->src.addr.mem = c->eip; | 2832 | c->src.addr.mem.ea = c->eip; |
2901 | c->src.bytes = c->op_bytes + 2; | 2833 | c->src.bytes = c->op_bytes + 2; |
2902 | insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip); | 2834 | insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip); |
2903 | break; | 2835 | break; |
@@ -2944,7 +2876,7 @@ done_prefixes: | |||
2944 | break; | 2876 | break; |
2945 | case DstImmUByte: | 2877 | case DstImmUByte: |
2946 | c->dst.type = OP_IMM; | 2878 | c->dst.type = OP_IMM; |
2947 | c->dst.addr.mem = c->eip; | 2879 | c->dst.addr.mem.ea = c->eip; |
2948 | c->dst.bytes = 1; | 2880 | c->dst.bytes = 1; |
2949 | c->dst.val = insn_fetch(u8, 1, c->eip); | 2881 | c->dst.val = insn_fetch(u8, 1, c->eip); |
2950 | break; | 2882 | break; |
@@ -2969,9 +2901,9 @@ done_prefixes: | |||
2969 | case DstDI: | 2901 | case DstDI: |
2970 | c->dst.type = OP_MEM; | 2902 | c->dst.type = OP_MEM; |
2971 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 2903 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
2972 | c->dst.addr.mem = | 2904 | c->dst.addr.mem.ea = |
2973 | register_address(c, es_base(ctxt, ops), | 2905 | register_address(c, c->regs[VCPU_REGS_RDI]); |
2974 | c->regs[VCPU_REGS_RDI]); | 2906 | c->dst.addr.mem.seg = VCPU_SREG_ES; |
2975 | c->dst.val = 0; | 2907 | c->dst.val = 0; |
2976 | break; | 2908 | break; |
2977 | case ImplicitOps: | 2909 | case ImplicitOps: |
@@ -3020,24 +2952,24 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
3020 | ctxt->decode.mem_read.pos = 0; | 2952 | ctxt->decode.mem_read.pos = 0; |
3021 | 2953 | ||
3022 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | 2954 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { |
3023 | emulate_ud(ctxt); | 2955 | rc = emulate_ud(ctxt); |
3024 | goto done; | 2956 | goto done; |
3025 | } | 2957 | } |
3026 | 2958 | ||
3027 | /* LOCK prefix is allowed only with some instructions */ | 2959 | /* LOCK prefix is allowed only with some instructions */ |
3028 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { | 2960 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { |
3029 | emulate_ud(ctxt); | 2961 | rc = emulate_ud(ctxt); |
3030 | goto done; | 2962 | goto done; |
3031 | } | 2963 | } |
3032 | 2964 | ||
3033 | if ((c->d & SrcMask) == SrcMemFAddr && c->src.type != OP_MEM) { | 2965 | if ((c->d & SrcMask) == SrcMemFAddr && c->src.type != OP_MEM) { |
3034 | emulate_ud(ctxt); | 2966 | rc = emulate_ud(ctxt); |
3035 | goto done; | 2967 | goto done; |
3036 | } | 2968 | } |
3037 | 2969 | ||
3038 | /* Privileged instruction can be executed only in CPL=0 */ | 2970 | /* Privileged instruction can be executed only in CPL=0 */ |
3039 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { | 2971 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { |
3040 | emulate_gp(ctxt, 0); | 2972 | rc = emulate_gp(ctxt, 0); |
3041 | goto done; | 2973 | goto done; |
3042 | } | 2974 | } |
3043 | 2975 | ||
@@ -3050,7 +2982,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
3050 | } | 2982 | } |
3051 | 2983 | ||
3052 | if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { | 2984 | if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { |
3053 | rc = read_emulated(ctxt, ops, c->src.addr.mem, | 2985 | rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem), |
3054 | c->src.valptr, c->src.bytes); | 2986 | c->src.valptr, c->src.bytes); |
3055 | if (rc != X86EMUL_CONTINUE) | 2987 | if (rc != X86EMUL_CONTINUE) |
3056 | goto done; | 2988 | goto done; |
@@ -3058,7 +2990,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
3058 | } | 2990 | } |
3059 | 2991 | ||
3060 | if (c->src2.type == OP_MEM) { | 2992 | if (c->src2.type == OP_MEM) { |
3061 | rc = read_emulated(ctxt, ops, c->src2.addr.mem, | 2993 | rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem), |
3062 | &c->src2.val, c->src2.bytes); | 2994 | &c->src2.val, c->src2.bytes); |
3063 | if (rc != X86EMUL_CONTINUE) | 2995 | if (rc != X86EMUL_CONTINUE) |
3064 | goto done; | 2996 | goto done; |
@@ -3070,7 +3002,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
3070 | 3002 | ||
3071 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { | 3003 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { |
3072 | /* optimisation - avoid slow emulated read if Mov */ | 3004 | /* optimisation - avoid slow emulated read if Mov */ |
3073 | rc = read_emulated(ctxt, ops, c->dst.addr.mem, | 3005 | rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem), |
3074 | &c->dst.val, c->dst.bytes); | 3006 | &c->dst.val, c->dst.bytes); |
3075 | if (rc != X86EMUL_CONTINUE) | 3007 | if (rc != X86EMUL_CONTINUE) |
3076 | goto done; | 3008 | goto done; |
@@ -3215,13 +3147,13 @@ special_insn: | |||
3215 | break; | 3147 | break; |
3216 | case 0x8c: /* mov r/m, sreg */ | 3148 | case 0x8c: /* mov r/m, sreg */ |
3217 | if (c->modrm_reg > VCPU_SREG_GS) { | 3149 | if (c->modrm_reg > VCPU_SREG_GS) { |
3218 | emulate_ud(ctxt); | 3150 | rc = emulate_ud(ctxt); |
3219 | goto done; | 3151 | goto done; |
3220 | } | 3152 | } |
3221 | c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); | 3153 | c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); |
3222 | break; | 3154 | break; |
3223 | case 0x8d: /* lea r16/r32, m */ | 3155 | case 0x8d: /* lea r16/r32, m */ |
3224 | c->dst.val = c->src.addr.mem; | 3156 | c->dst.val = c->src.addr.mem.ea; |
3225 | break; | 3157 | break; |
3226 | case 0x8e: { /* mov seg, r/m16 */ | 3158 | case 0x8e: { /* mov seg, r/m16 */ |
3227 | uint16_t sel; | 3159 | uint16_t sel; |
@@ -3230,7 +3162,7 @@ special_insn: | |||
3230 | 3162 | ||
3231 | if (c->modrm_reg == VCPU_SREG_CS || | 3163 | if (c->modrm_reg == VCPU_SREG_CS || |
3232 | c->modrm_reg > VCPU_SREG_GS) { | 3164 | c->modrm_reg > VCPU_SREG_GS) { |
3233 | emulate_ud(ctxt); | 3165 | rc = emulate_ud(ctxt); |
3234 | goto done; | 3166 | goto done; |
3235 | } | 3167 | } |
3236 | 3168 | ||
@@ -3268,7 +3200,6 @@ special_insn: | |||
3268 | break; | 3200 | break; |
3269 | case 0xa6 ... 0xa7: /* cmps */ | 3201 | case 0xa6 ... 0xa7: /* cmps */ |
3270 | c->dst.type = OP_NONE; /* Disable writeback. */ | 3202 | c->dst.type = OP_NONE; /* Disable writeback. */ |
3271 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.addr.mem, c->dst.addr.mem); | ||
3272 | goto cmp; | 3203 | goto cmp; |
3273 | case 0xa8 ... 0xa9: /* test ax, imm */ | 3204 | case 0xa8 ... 0xa9: /* test ax, imm */ |
3274 | goto test; | 3205 | goto test; |
@@ -3363,7 +3294,7 @@ special_insn: | |||
3363 | do_io_in: | 3294 | do_io_in: |
3364 | c->dst.bytes = min(c->dst.bytes, 4u); | 3295 | c->dst.bytes = min(c->dst.bytes, 4u); |
3365 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | 3296 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { |
3366 | emulate_gp(ctxt, 0); | 3297 | rc = emulate_gp(ctxt, 0); |
3367 | goto done; | 3298 | goto done; |
3368 | } | 3299 | } |
3369 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, | 3300 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, |
@@ -3377,7 +3308,7 @@ special_insn: | |||
3377 | c->src.bytes = min(c->src.bytes, 4u); | 3308 | c->src.bytes = min(c->src.bytes, 4u); |
3378 | if (!emulator_io_permited(ctxt, ops, c->dst.val, | 3309 | if (!emulator_io_permited(ctxt, ops, c->dst.val, |
3379 | c->src.bytes)) { | 3310 | c->src.bytes)) { |
3380 | emulate_gp(ctxt, 0); | 3311 | rc = emulate_gp(ctxt, 0); |
3381 | goto done; | 3312 | goto done; |
3382 | } | 3313 | } |
3383 | ops->pio_out_emulated(c->src.bytes, c->dst.val, | 3314 | ops->pio_out_emulated(c->src.bytes, c->dst.val, |
@@ -3402,14 +3333,14 @@ special_insn: | |||
3402 | break; | 3333 | break; |
3403 | case 0xfa: /* cli */ | 3334 | case 0xfa: /* cli */ |
3404 | if (emulator_bad_iopl(ctxt, ops)) { | 3335 | if (emulator_bad_iopl(ctxt, ops)) { |
3405 | emulate_gp(ctxt, 0); | 3336 | rc = emulate_gp(ctxt, 0); |
3406 | goto done; | 3337 | goto done; |
3407 | } else | 3338 | } else |
3408 | ctxt->eflags &= ~X86_EFLAGS_IF; | 3339 | ctxt->eflags &= ~X86_EFLAGS_IF; |
3409 | break; | 3340 | break; |
3410 | case 0xfb: /* sti */ | 3341 | case 0xfb: /* sti */ |
3411 | if (emulator_bad_iopl(ctxt, ops)) { | 3342 | if (emulator_bad_iopl(ctxt, ops)) { |
3412 | emulate_gp(ctxt, 0); | 3343 | rc = emulate_gp(ctxt, 0); |
3413 | goto done; | 3344 | goto done; |
3414 | } else { | 3345 | } else { |
3415 | ctxt->interruptibility = KVM_X86_SHADOW_INT_STI; | 3346 | ctxt->interruptibility = KVM_X86_SHADOW_INT_STI; |
@@ -3449,11 +3380,11 @@ writeback: | |||
3449 | c->dst.type = saved_dst_type; | 3380 | c->dst.type = saved_dst_type; |
3450 | 3381 | ||
3451 | if ((c->d & SrcMask) == SrcSI) | 3382 | if ((c->d & SrcMask) == SrcSI) |
3452 | string_addr_inc(ctxt, seg_override_base(ctxt, ops, c), | 3383 | string_addr_inc(ctxt, seg_override(ctxt, ops, c), |
3453 | VCPU_REGS_RSI, &c->src); | 3384 | VCPU_REGS_RSI, &c->src); |
3454 | 3385 | ||
3455 | if ((c->d & DstMask) == DstDI) | 3386 | if ((c->d & DstMask) == DstDI) |
3456 | string_addr_inc(ctxt, es_base(ctxt, ops), VCPU_REGS_RDI, | 3387 | string_addr_inc(ctxt, VCPU_SREG_ES, VCPU_REGS_RDI, |
3457 | &c->dst); | 3388 | &c->dst); |
3458 | 3389 | ||
3459 | if (c->rep_prefix && (c->d & String)) { | 3390 | if (c->rep_prefix && (c->d & String)) { |
@@ -3482,6 +3413,8 @@ writeback: | |||
3482 | ctxt->eip = c->eip; | 3413 | ctxt->eip = c->eip; |
3483 | 3414 | ||
3484 | done: | 3415 | done: |
3416 | if (rc == X86EMUL_PROPAGATE_FAULT) | ||
3417 | ctxt->have_exception = true; | ||
3485 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; | 3418 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; |
3486 | 3419 | ||
3487 | twobyte_insn: | 3420 | twobyte_insn: |
@@ -3544,9 +3477,11 @@ twobyte_insn: | |||
3544 | break; | 3477 | break; |
3545 | case 5: /* not defined */ | 3478 | case 5: /* not defined */ |
3546 | emulate_ud(ctxt); | 3479 | emulate_ud(ctxt); |
3480 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3547 | goto done; | 3481 | goto done; |
3548 | case 7: /* invlpg*/ | 3482 | case 7: /* invlpg*/ |
3549 | emulate_invlpg(ctxt->vcpu, c->src.addr.mem); | 3483 | emulate_invlpg(ctxt->vcpu, |
3484 | linear(ctxt, c->src.addr.mem)); | ||
3550 | /* Disable writeback. */ | 3485 | /* Disable writeback. */ |
3551 | c->dst.type = OP_NONE; | 3486 | c->dst.type = OP_NONE; |
3552 | break; | 3487 | break; |
@@ -3573,6 +3508,7 @@ twobyte_insn: | |||
3573 | case 5 ... 7: | 3508 | case 5 ... 7: |
3574 | case 9 ... 15: | 3509 | case 9 ... 15: |
3575 | emulate_ud(ctxt); | 3510 | emulate_ud(ctxt); |
3511 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3576 | goto done; | 3512 | goto done; |
3577 | } | 3513 | } |
3578 | c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); | 3514 | c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); |
@@ -3581,6 +3517,7 @@ twobyte_insn: | |||
3581 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && | 3517 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
3582 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { | 3518 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
3583 | emulate_ud(ctxt); | 3519 | emulate_ud(ctxt); |
3520 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3584 | goto done; | 3521 | goto done; |
3585 | } | 3522 | } |
3586 | ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu); | 3523 | ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu); |
@@ -3588,6 +3525,7 @@ twobyte_insn: | |||
3588 | case 0x22: /* mov reg, cr */ | 3525 | case 0x22: /* mov reg, cr */ |
3589 | if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) { | 3526 | if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) { |
3590 | emulate_gp(ctxt, 0); | 3527 | emulate_gp(ctxt, 0); |
3528 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3591 | goto done; | 3529 | goto done; |
3592 | } | 3530 | } |
3593 | c->dst.type = OP_NONE; | 3531 | c->dst.type = OP_NONE; |
@@ -3596,6 +3534,7 @@ twobyte_insn: | |||
3596 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && | 3534 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
3597 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { | 3535 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
3598 | emulate_ud(ctxt); | 3536 | emulate_ud(ctxt); |
3537 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3599 | goto done; | 3538 | goto done; |
3600 | } | 3539 | } |
3601 | 3540 | ||
@@ -3604,6 +3543,7 @@ twobyte_insn: | |||
3604 | ~0ULL : ~0U), ctxt->vcpu) < 0) { | 3543 | ~0ULL : ~0U), ctxt->vcpu) < 0) { |
3605 | /* #UD condition is already handled by the code above */ | 3544 | /* #UD condition is already handled by the code above */ |
3606 | emulate_gp(ctxt, 0); | 3545 | emulate_gp(ctxt, 0); |
3546 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3607 | goto done; | 3547 | goto done; |
3608 | } | 3548 | } |
3609 | 3549 | ||
@@ -3615,6 +3555,7 @@ twobyte_insn: | |||
3615 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); | 3555 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); |
3616 | if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { | 3556 | if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { |
3617 | emulate_gp(ctxt, 0); | 3557 | emulate_gp(ctxt, 0); |
3558 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3618 | goto done; | 3559 | goto done; |
3619 | } | 3560 | } |
3620 | rc = X86EMUL_CONTINUE; | 3561 | rc = X86EMUL_CONTINUE; |
@@ -3623,6 +3564,7 @@ twobyte_insn: | |||
3623 | /* rdmsr */ | 3564 | /* rdmsr */ |
3624 | if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { | 3565 | if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { |
3625 | emulate_gp(ctxt, 0); | 3566 | emulate_gp(ctxt, 0); |
3567 | rc = X86EMUL_PROPAGATE_FAULT; | ||
3626 | goto done; | 3568 | goto done; |
3627 | } else { | 3569 | } else { |
3628 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; | 3570 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; |
@@ -3785,6 +3727,5 @@ twobyte_insn: | |||
3785 | goto writeback; | 3727 | goto writeback; |
3786 | 3728 | ||
3787 | cannot_emulate: | 3729 | cannot_emulate: |
3788 | DPRINTF("Cannot emulate %02x\n", c->b); | ||
3789 | return -1; | 3730 | return -1; |
3790 | } | 3731 | } |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 975bb45329a1..3377d53fcd36 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
@@ -73,6 +73,13 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) | |||
73 | return vcpu->arch.cr4 & mask; | 73 | return vcpu->arch.cr4 & mask; |
74 | } | 74 | } |
75 | 75 | ||
76 | static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu) | ||
77 | { | ||
78 | if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) | ||
79 | kvm_x86_ops->decache_cr3(vcpu); | ||
80 | return vcpu->arch.cr3; | ||
81 | } | ||
82 | |||
76 | static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) | 83 | static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) |
77 | { | 84 | { |
78 | return kvm_read_cr4_bits(vcpu, ~0UL); | 85 | return kvm_read_cr4_bits(vcpu, ~0UL); |
@@ -84,4 +91,19 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) | |||
84 | | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); | 91 | | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); |
85 | } | 92 | } |
86 | 93 | ||
94 | static inline void enter_guest_mode(struct kvm_vcpu *vcpu) | ||
95 | { | ||
96 | vcpu->arch.hflags |= HF_GUEST_MASK; | ||
97 | } | ||
98 | |||
99 | static inline void leave_guest_mode(struct kvm_vcpu *vcpu) | ||
100 | { | ||
101 | vcpu->arch.hflags &= ~HF_GUEST_MASK; | ||
102 | } | ||
103 | |||
104 | static inline bool is_guest_mode(struct kvm_vcpu *vcpu) | ||
105 | { | ||
106 | return vcpu->arch.hflags & HF_GUEST_MASK; | ||
107 | } | ||
108 | |||
87 | #endif | 109 | #endif |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 413f8973a855..93cf9d0d3653 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -277,7 +277,8 @@ static void apic_update_ppr(struct kvm_lapic *apic) | |||
277 | 277 | ||
278 | if (old_ppr != ppr) { | 278 | if (old_ppr != ppr) { |
279 | apic_set_reg(apic, APIC_PROCPRI, ppr); | 279 | apic_set_reg(apic, APIC_PROCPRI, ppr); |
280 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | 280 | if (ppr < old_ppr) |
281 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | ||
281 | } | 282 | } |
282 | } | 283 | } |
283 | 284 | ||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index fbb04aee8301..f02b8edc3d44 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -18,9 +18,11 @@ | |||
18 | * | 18 | * |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include "irq.h" | ||
21 | #include "mmu.h" | 22 | #include "mmu.h" |
22 | #include "x86.h" | 23 | #include "x86.h" |
23 | #include "kvm_cache_regs.h" | 24 | #include "kvm_cache_regs.h" |
25 | #include "x86.h" | ||
24 | 26 | ||
25 | #include <linux/kvm_host.h> | 27 | #include <linux/kvm_host.h> |
26 | #include <linux/types.h> | 28 | #include <linux/types.h> |
@@ -194,7 +196,6 @@ static struct percpu_counter kvm_total_used_mmu_pages; | |||
194 | 196 | ||
195 | static u64 __read_mostly shadow_trap_nonpresent_pte; | 197 | static u64 __read_mostly shadow_trap_nonpresent_pte; |
196 | static u64 __read_mostly shadow_notrap_nonpresent_pte; | 198 | static u64 __read_mostly shadow_notrap_nonpresent_pte; |
197 | static u64 __read_mostly shadow_base_present_pte; | ||
198 | static u64 __read_mostly shadow_nx_mask; | 199 | static u64 __read_mostly shadow_nx_mask; |
199 | static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ | 200 | static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ |
200 | static u64 __read_mostly shadow_user_mask; | 201 | static u64 __read_mostly shadow_user_mask; |
@@ -213,12 +214,6 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | |||
213 | } | 214 | } |
214 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); | 215 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); |
215 | 216 | ||
216 | void kvm_mmu_set_base_ptes(u64 base_pte) | ||
217 | { | ||
218 | shadow_base_present_pte = base_pte; | ||
219 | } | ||
220 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); | ||
221 | |||
222 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 217 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
223 | u64 dirty_mask, u64 nx_mask, u64 x_mask) | 218 | u64 dirty_mask, u64 nx_mask, u64 x_mask) |
224 | { | 219 | { |
@@ -482,46 +477,46 @@ static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) | |||
482 | } | 477 | } |
483 | 478 | ||
484 | /* | 479 | /* |
485 | * Return the pointer to the largepage write count for a given | 480 | * Return the pointer to the large page information for a given gfn, |
486 | * gfn, handling slots that are not large page aligned. | 481 | * handling slots that are not large page aligned. |
487 | */ | 482 | */ |
488 | static int *slot_largepage_idx(gfn_t gfn, | 483 | static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn, |
489 | struct kvm_memory_slot *slot, | 484 | struct kvm_memory_slot *slot, |
490 | int level) | 485 | int level) |
491 | { | 486 | { |
492 | unsigned long idx; | 487 | unsigned long idx; |
493 | 488 | ||
494 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - | 489 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - |
495 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); | 490 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); |
496 | return &slot->lpage_info[level - 2][idx].write_count; | 491 | return &slot->lpage_info[level - 2][idx]; |
497 | } | 492 | } |
498 | 493 | ||
499 | static void account_shadowed(struct kvm *kvm, gfn_t gfn) | 494 | static void account_shadowed(struct kvm *kvm, gfn_t gfn) |
500 | { | 495 | { |
501 | struct kvm_memory_slot *slot; | 496 | struct kvm_memory_slot *slot; |
502 | int *write_count; | 497 | struct kvm_lpage_info *linfo; |
503 | int i; | 498 | int i; |
504 | 499 | ||
505 | slot = gfn_to_memslot(kvm, gfn); | 500 | slot = gfn_to_memslot(kvm, gfn); |
506 | for (i = PT_DIRECTORY_LEVEL; | 501 | for (i = PT_DIRECTORY_LEVEL; |
507 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 502 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
508 | write_count = slot_largepage_idx(gfn, slot, i); | 503 | linfo = lpage_info_slot(gfn, slot, i); |
509 | *write_count += 1; | 504 | linfo->write_count += 1; |
510 | } | 505 | } |
511 | } | 506 | } |
512 | 507 | ||
513 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | 508 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) |
514 | { | 509 | { |
515 | struct kvm_memory_slot *slot; | 510 | struct kvm_memory_slot *slot; |
516 | int *write_count; | 511 | struct kvm_lpage_info *linfo; |
517 | int i; | 512 | int i; |
518 | 513 | ||
519 | slot = gfn_to_memslot(kvm, gfn); | 514 | slot = gfn_to_memslot(kvm, gfn); |
520 | for (i = PT_DIRECTORY_LEVEL; | 515 | for (i = PT_DIRECTORY_LEVEL; |
521 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 516 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
522 | write_count = slot_largepage_idx(gfn, slot, i); | 517 | linfo = lpage_info_slot(gfn, slot, i); |
523 | *write_count -= 1; | 518 | linfo->write_count -= 1; |
524 | WARN_ON(*write_count < 0); | 519 | WARN_ON(linfo->write_count < 0); |
525 | } | 520 | } |
526 | } | 521 | } |
527 | 522 | ||
@@ -530,12 +525,12 @@ static int has_wrprotected_page(struct kvm *kvm, | |||
530 | int level) | 525 | int level) |
531 | { | 526 | { |
532 | struct kvm_memory_slot *slot; | 527 | struct kvm_memory_slot *slot; |
533 | int *largepage_idx; | 528 | struct kvm_lpage_info *linfo; |
534 | 529 | ||
535 | slot = gfn_to_memslot(kvm, gfn); | 530 | slot = gfn_to_memslot(kvm, gfn); |
536 | if (slot) { | 531 | if (slot) { |
537 | largepage_idx = slot_largepage_idx(gfn, slot, level); | 532 | linfo = lpage_info_slot(gfn, slot, level); |
538 | return *largepage_idx; | 533 | return linfo->write_count; |
539 | } | 534 | } |
540 | 535 | ||
541 | return 1; | 536 | return 1; |
@@ -559,14 +554,18 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | |||
559 | return ret; | 554 | return ret; |
560 | } | 555 | } |
561 | 556 | ||
562 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 557 | static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn) |
563 | { | 558 | { |
564 | struct kvm_memory_slot *slot; | 559 | struct kvm_memory_slot *slot; |
565 | int host_level, level, max_level; | ||
566 | |||
567 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); | 560 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); |
568 | if (slot && slot->dirty_bitmap) | 561 | if (slot && slot->dirty_bitmap) |
569 | return PT_PAGE_TABLE_LEVEL; | 562 | return true; |
563 | return false; | ||
564 | } | ||
565 | |||
566 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | ||
567 | { | ||
568 | int host_level, level, max_level; | ||
570 | 569 | ||
571 | host_level = host_mapping_level(vcpu->kvm, large_gfn); | 570 | host_level = host_mapping_level(vcpu->kvm, large_gfn); |
572 | 571 | ||
@@ -590,16 +589,15 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
590 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | 589 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) |
591 | { | 590 | { |
592 | struct kvm_memory_slot *slot; | 591 | struct kvm_memory_slot *slot; |
593 | unsigned long idx; | 592 | struct kvm_lpage_info *linfo; |
594 | 593 | ||
595 | slot = gfn_to_memslot(kvm, gfn); | 594 | slot = gfn_to_memslot(kvm, gfn); |
596 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | 595 | if (likely(level == PT_PAGE_TABLE_LEVEL)) |
597 | return &slot->rmap[gfn - slot->base_gfn]; | 596 | return &slot->rmap[gfn - slot->base_gfn]; |
598 | 597 | ||
599 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - | 598 | linfo = lpage_info_slot(gfn, slot, level); |
600 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); | ||
601 | 599 | ||
602 | return &slot->lpage_info[level - 2][idx].rmap_pde; | 600 | return &linfo->rmap_pde; |
603 | } | 601 | } |
604 | 602 | ||
605 | /* | 603 | /* |
@@ -887,19 +885,16 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
887 | end = start + (memslot->npages << PAGE_SHIFT); | 885 | end = start + (memslot->npages << PAGE_SHIFT); |
888 | if (hva >= start && hva < end) { | 886 | if (hva >= start && hva < end) { |
889 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 887 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; |
888 | gfn_t gfn = memslot->base_gfn + gfn_offset; | ||
890 | 889 | ||
891 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); | 890 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); |
892 | 891 | ||
893 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 892 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
894 | unsigned long idx; | 893 | struct kvm_lpage_info *linfo; |
895 | int sh; | 894 | |
896 | 895 | linfo = lpage_info_slot(gfn, memslot, | |
897 | sh = KVM_HPAGE_GFN_SHIFT(PT_DIRECTORY_LEVEL+j); | 896 | PT_DIRECTORY_LEVEL + j); |
898 | idx = ((memslot->base_gfn+gfn_offset) >> sh) - | 897 | ret |= handler(kvm, &linfo->rmap_pde, data); |
899 | (memslot->base_gfn >> sh); | ||
900 | ret |= handler(kvm, | ||
901 | &memslot->lpage_info[j][idx].rmap_pde, | ||
902 | data); | ||
903 | } | 898 | } |
904 | trace_kvm_age_page(hva, memslot, ret); | 899 | trace_kvm_age_page(hva, memslot, ret); |
905 | retval |= ret; | 900 | retval |= ret; |
@@ -950,6 +945,35 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
950 | return young; | 945 | return young; |
951 | } | 946 | } |
952 | 947 | ||
948 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
949 | unsigned long data) | ||
950 | { | ||
951 | u64 *spte; | ||
952 | int young = 0; | ||
953 | |||
954 | /* | ||
955 | * If there's no access bit in the secondary pte set by the | ||
956 | * hardware it's up to gup-fast/gup to set the access bit in | ||
957 | * the primary pte or in the page structure. | ||
958 | */ | ||
959 | if (!shadow_accessed_mask) | ||
960 | goto out; | ||
961 | |||
962 | spte = rmap_next(kvm, rmapp, NULL); | ||
963 | while (spte) { | ||
964 | u64 _spte = *spte; | ||
965 | BUG_ON(!(_spte & PT_PRESENT_MASK)); | ||
966 | young = _spte & PT_ACCESSED_MASK; | ||
967 | if (young) { | ||
968 | young = 1; | ||
969 | break; | ||
970 | } | ||
971 | spte = rmap_next(kvm, rmapp, spte); | ||
972 | } | ||
973 | out: | ||
974 | return young; | ||
975 | } | ||
976 | |||
953 | #define RMAP_RECYCLE_THRESHOLD 1000 | 977 | #define RMAP_RECYCLE_THRESHOLD 1000 |
954 | 978 | ||
955 | static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | 979 | static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) |
@@ -970,6 +994,11 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva) | |||
970 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); | 994 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); |
971 | } | 995 | } |
972 | 996 | ||
997 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | ||
998 | { | ||
999 | return kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp); | ||
1000 | } | ||
1001 | |||
973 | #ifdef MMU_DEBUG | 1002 | #ifdef MMU_DEBUG |
974 | static int is_empty_shadow_page(u64 *spt) | 1003 | static int is_empty_shadow_page(u64 *spt) |
975 | { | 1004 | { |
@@ -1161,7 +1190,7 @@ static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | |||
1161 | } | 1190 | } |
1162 | 1191 | ||
1163 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, | 1192 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, |
1164 | struct kvm_mmu_page *sp, bool clear_unsync) | 1193 | struct kvm_mmu_page *sp) |
1165 | { | 1194 | { |
1166 | return 1; | 1195 | return 1; |
1167 | } | 1196 | } |
@@ -1291,7 +1320,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
1291 | if (clear_unsync) | 1320 | if (clear_unsync) |
1292 | kvm_unlink_unsync_page(vcpu->kvm, sp); | 1321 | kvm_unlink_unsync_page(vcpu->kvm, sp); |
1293 | 1322 | ||
1294 | if (vcpu->arch.mmu.sync_page(vcpu, sp, clear_unsync)) { | 1323 | if (vcpu->arch.mmu.sync_page(vcpu, sp)) { |
1295 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); | 1324 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); |
1296 | return 1; | 1325 | return 1; |
1297 | } | 1326 | } |
@@ -1332,12 +1361,12 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | |||
1332 | continue; | 1361 | continue; |
1333 | 1362 | ||
1334 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); | 1363 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); |
1364 | kvm_unlink_unsync_page(vcpu->kvm, s); | ||
1335 | if ((s->role.cr4_pae != !!is_pae(vcpu)) || | 1365 | if ((s->role.cr4_pae != !!is_pae(vcpu)) || |
1336 | (vcpu->arch.mmu.sync_page(vcpu, s, true))) { | 1366 | (vcpu->arch.mmu.sync_page(vcpu, s))) { |
1337 | kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); | 1367 | kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); |
1338 | continue; | 1368 | continue; |
1339 | } | 1369 | } |
1340 | kvm_unlink_unsync_page(vcpu->kvm, s); | ||
1341 | flush = true; | 1370 | flush = true; |
1342 | } | 1371 | } |
1343 | 1372 | ||
@@ -1963,9 +1992,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1963 | unsigned pte_access, int user_fault, | 1992 | unsigned pte_access, int user_fault, |
1964 | int write_fault, int dirty, int level, | 1993 | int write_fault, int dirty, int level, |
1965 | gfn_t gfn, pfn_t pfn, bool speculative, | 1994 | gfn_t gfn, pfn_t pfn, bool speculative, |
1966 | bool can_unsync, bool reset_host_protection) | 1995 | bool can_unsync, bool host_writable) |
1967 | { | 1996 | { |
1968 | u64 spte; | 1997 | u64 spte, entry = *sptep; |
1969 | int ret = 0; | 1998 | int ret = 0; |
1970 | 1999 | ||
1971 | /* | 2000 | /* |
@@ -1973,7 +2002,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1973 | * whether the guest actually used the pte (in order to detect | 2002 | * whether the guest actually used the pte (in order to detect |
1974 | * demand paging). | 2003 | * demand paging). |
1975 | */ | 2004 | */ |
1976 | spte = shadow_base_present_pte; | 2005 | spte = PT_PRESENT_MASK; |
1977 | if (!speculative) | 2006 | if (!speculative) |
1978 | spte |= shadow_accessed_mask; | 2007 | spte |= shadow_accessed_mask; |
1979 | if (!dirty) | 2008 | if (!dirty) |
@@ -1990,8 +2019,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1990 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, | 2019 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, |
1991 | kvm_is_mmio_pfn(pfn)); | 2020 | kvm_is_mmio_pfn(pfn)); |
1992 | 2021 | ||
1993 | if (reset_host_protection) | 2022 | if (host_writable) |
1994 | spte |= SPTE_HOST_WRITEABLE; | 2023 | spte |= SPTE_HOST_WRITEABLE; |
2024 | else | ||
2025 | pte_access &= ~ACC_WRITE_MASK; | ||
1995 | 2026 | ||
1996 | spte |= (u64)pfn << PAGE_SHIFT; | 2027 | spte |= (u64)pfn << PAGE_SHIFT; |
1997 | 2028 | ||
@@ -2036,6 +2067,14 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2036 | 2067 | ||
2037 | set_pte: | 2068 | set_pte: |
2038 | update_spte(sptep, spte); | 2069 | update_spte(sptep, spte); |
2070 | /* | ||
2071 | * If we overwrite a writable spte with a read-only one we | ||
2072 | * should flush remote TLBs. Otherwise rmap_write_protect | ||
2073 | * will find a read-only spte, even though the writable spte | ||
2074 | * might be cached on a CPU's TLB. | ||
2075 | */ | ||
2076 | if (is_writable_pte(entry) && !is_writable_pte(*sptep)) | ||
2077 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
2039 | done: | 2078 | done: |
2040 | return ret; | 2079 | return ret; |
2041 | } | 2080 | } |
@@ -2045,7 +2084,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2045 | int user_fault, int write_fault, int dirty, | 2084 | int user_fault, int write_fault, int dirty, |
2046 | int *ptwrite, int level, gfn_t gfn, | 2085 | int *ptwrite, int level, gfn_t gfn, |
2047 | pfn_t pfn, bool speculative, | 2086 | pfn_t pfn, bool speculative, |
2048 | bool reset_host_protection) | 2087 | bool host_writable) |
2049 | { | 2088 | { |
2050 | int was_rmapped = 0; | 2089 | int was_rmapped = 0; |
2051 | int rmap_count; | 2090 | int rmap_count; |
@@ -2080,7 +2119,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2080 | 2119 | ||
2081 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, | 2120 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, |
2082 | dirty, level, gfn, pfn, speculative, true, | 2121 | dirty, level, gfn, pfn, speculative, true, |
2083 | reset_host_protection)) { | 2122 | host_writable)) { |
2084 | if (write_fault) | 2123 | if (write_fault) |
2085 | *ptwrite = 1; | 2124 | *ptwrite = 1; |
2086 | kvm_mmu_flush_tlb(vcpu); | 2125 | kvm_mmu_flush_tlb(vcpu); |
@@ -2211,7 +2250,8 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) | |||
2211 | } | 2250 | } |
2212 | 2251 | ||
2213 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | 2252 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, |
2214 | int level, gfn_t gfn, pfn_t pfn) | 2253 | int map_writable, int level, gfn_t gfn, pfn_t pfn, |
2254 | bool prefault) | ||
2215 | { | 2255 | { |
2216 | struct kvm_shadow_walk_iterator iterator; | 2256 | struct kvm_shadow_walk_iterator iterator; |
2217 | struct kvm_mmu_page *sp; | 2257 | struct kvm_mmu_page *sp; |
@@ -2220,9 +2260,11 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2220 | 2260 | ||
2221 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { | 2261 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { |
2222 | if (iterator.level == level) { | 2262 | if (iterator.level == level) { |
2223 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, | 2263 | unsigned pte_access = ACC_ALL; |
2264 | |||
2265 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access, | ||
2224 | 0, write, 1, &pt_write, | 2266 | 0, write, 1, &pt_write, |
2225 | level, gfn, pfn, false, true); | 2267 | level, gfn, pfn, prefault, map_writable); |
2226 | direct_pte_prefetch(vcpu, iterator.sptep); | 2268 | direct_pte_prefetch(vcpu, iterator.sptep); |
2227 | ++vcpu->stat.pf_fixed; | 2269 | ++vcpu->stat.pf_fixed; |
2228 | break; | 2270 | break; |
@@ -2277,27 +2319,81 @@ static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn) | |||
2277 | return 1; | 2319 | return 1; |
2278 | } | 2320 | } |
2279 | 2321 | ||
2280 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | 2322 | static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, |
2323 | gfn_t *gfnp, pfn_t *pfnp, int *levelp) | ||
2324 | { | ||
2325 | pfn_t pfn = *pfnp; | ||
2326 | gfn_t gfn = *gfnp; | ||
2327 | int level = *levelp; | ||
2328 | |||
2329 | /* | ||
2330 | * Check if it's a transparent hugepage. If this would be an | ||
2331 | * hugetlbfs page, level wouldn't be set to | ||
2332 | * PT_PAGE_TABLE_LEVEL and there would be no adjustment done | ||
2333 | * here. | ||
2334 | */ | ||
2335 | if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn) && | ||
2336 | level == PT_PAGE_TABLE_LEVEL && | ||
2337 | PageTransCompound(pfn_to_page(pfn)) && | ||
2338 | !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) { | ||
2339 | unsigned long mask; | ||
2340 | /* | ||
2341 | * mmu_notifier_retry was successful and we hold the | ||
2342 | * mmu_lock here, so the pmd can't become splitting | ||
2343 | * from under us, and in turn | ||
2344 | * __split_huge_page_refcount() can't run from under | ||
2345 | * us and we can safely transfer the refcount from | ||
2346 | * PG_tail to PG_head as we switch the pfn to tail to | ||
2347 | * head. | ||
2348 | */ | ||
2349 | *levelp = level = PT_DIRECTORY_LEVEL; | ||
2350 | mask = KVM_PAGES_PER_HPAGE(level) - 1; | ||
2351 | VM_BUG_ON((gfn & mask) != (pfn & mask)); | ||
2352 | if (pfn & mask) { | ||
2353 | gfn &= ~mask; | ||
2354 | *gfnp = gfn; | ||
2355 | kvm_release_pfn_clean(pfn); | ||
2356 | pfn &= ~mask; | ||
2357 | if (!get_page_unless_zero(pfn_to_page(pfn))) | ||
2358 | BUG(); | ||
2359 | *pfnp = pfn; | ||
2360 | } | ||
2361 | } | ||
2362 | } | ||
2363 | |||
2364 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | ||
2365 | gva_t gva, pfn_t *pfn, bool write, bool *writable); | ||
2366 | |||
2367 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn, | ||
2368 | bool prefault) | ||
2281 | { | 2369 | { |
2282 | int r; | 2370 | int r; |
2283 | int level; | 2371 | int level; |
2372 | int force_pt_level; | ||
2284 | pfn_t pfn; | 2373 | pfn_t pfn; |
2285 | unsigned long mmu_seq; | 2374 | unsigned long mmu_seq; |
2375 | bool map_writable; | ||
2286 | 2376 | ||
2287 | level = mapping_level(vcpu, gfn); | 2377 | force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); |
2288 | 2378 | if (likely(!force_pt_level)) { | |
2289 | /* | 2379 | level = mapping_level(vcpu, gfn); |
2290 | * This path builds a PAE pagetable - so we can map 2mb pages at | 2380 | /* |
2291 | * maximum. Therefore check if the level is larger than that. | 2381 | * This path builds a PAE pagetable - so we can map |
2292 | */ | 2382 | * 2mb pages at maximum. Therefore check if the level |
2293 | if (level > PT_DIRECTORY_LEVEL) | 2383 | * is larger than that. |
2294 | level = PT_DIRECTORY_LEVEL; | 2384 | */ |
2385 | if (level > PT_DIRECTORY_LEVEL) | ||
2386 | level = PT_DIRECTORY_LEVEL; | ||
2295 | 2387 | ||
2296 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | 2388 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); |
2389 | } else | ||
2390 | level = PT_PAGE_TABLE_LEVEL; | ||
2297 | 2391 | ||
2298 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2392 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2299 | smp_rmb(); | 2393 | smp_rmb(); |
2300 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2394 | |
2395 | if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) | ||
2396 | return 0; | ||
2301 | 2397 | ||
2302 | /* mmio */ | 2398 | /* mmio */ |
2303 | if (is_error_pfn(pfn)) | 2399 | if (is_error_pfn(pfn)) |
@@ -2307,7 +2403,10 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
2307 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2403 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
2308 | goto out_unlock; | 2404 | goto out_unlock; |
2309 | kvm_mmu_free_some_pages(vcpu); | 2405 | kvm_mmu_free_some_pages(vcpu); |
2310 | r = __direct_map(vcpu, v, write, level, gfn, pfn); | 2406 | if (likely(!force_pt_level)) |
2407 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | ||
2408 | r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, | ||
2409 | prefault); | ||
2311 | spin_unlock(&vcpu->kvm->mmu_lock); | 2410 | spin_unlock(&vcpu->kvm->mmu_lock); |
2312 | 2411 | ||
2313 | 2412 | ||
@@ -2530,6 +2629,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2530 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2629 | hpa_t root = vcpu->arch.mmu.root_hpa; |
2531 | sp = page_header(root); | 2630 | sp = page_header(root); |
2532 | mmu_sync_children(vcpu, sp); | 2631 | mmu_sync_children(vcpu, sp); |
2632 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); | ||
2533 | return; | 2633 | return; |
2534 | } | 2634 | } |
2535 | for (i = 0; i < 4; ++i) { | 2635 | for (i = 0; i < 4; ++i) { |
@@ -2552,23 +2652,24 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2552 | } | 2652 | } |
2553 | 2653 | ||
2554 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, | 2654 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, |
2555 | u32 access, u32 *error) | 2655 | u32 access, struct x86_exception *exception) |
2556 | { | 2656 | { |
2557 | if (error) | 2657 | if (exception) |
2558 | *error = 0; | 2658 | exception->error_code = 0; |
2559 | return vaddr; | 2659 | return vaddr; |
2560 | } | 2660 | } |
2561 | 2661 | ||
2562 | static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, | 2662 | static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, |
2563 | u32 access, u32 *error) | 2663 | u32 access, |
2664 | struct x86_exception *exception) | ||
2564 | { | 2665 | { |
2565 | if (error) | 2666 | if (exception) |
2566 | *error = 0; | 2667 | exception->error_code = 0; |
2567 | return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access); | 2668 | return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access); |
2568 | } | 2669 | } |
2569 | 2670 | ||
2570 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | 2671 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, |
2571 | u32 error_code) | 2672 | u32 error_code, bool prefault) |
2572 | { | 2673 | { |
2573 | gfn_t gfn; | 2674 | gfn_t gfn; |
2574 | int r; | 2675 | int r; |
@@ -2584,17 +2685,68 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
2584 | gfn = gva >> PAGE_SHIFT; | 2685 | gfn = gva >> PAGE_SHIFT; |
2585 | 2686 | ||
2586 | return nonpaging_map(vcpu, gva & PAGE_MASK, | 2687 | return nonpaging_map(vcpu, gva & PAGE_MASK, |
2587 | error_code & PFERR_WRITE_MASK, gfn); | 2688 | error_code & PFERR_WRITE_MASK, gfn, prefault); |
2689 | } | ||
2690 | |||
2691 | static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) | ||
2692 | { | ||
2693 | struct kvm_arch_async_pf arch; | ||
2694 | |||
2695 | arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; | ||
2696 | arch.gfn = gfn; | ||
2697 | arch.direct_map = vcpu->arch.mmu.direct_map; | ||
2698 | arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu); | ||
2699 | |||
2700 | return kvm_setup_async_pf(vcpu, gva, gfn, &arch); | ||
2701 | } | ||
2702 | |||
2703 | static bool can_do_async_pf(struct kvm_vcpu *vcpu) | ||
2704 | { | ||
2705 | if (unlikely(!irqchip_in_kernel(vcpu->kvm) || | ||
2706 | kvm_event_needs_reinjection(vcpu))) | ||
2707 | return false; | ||
2708 | |||
2709 | return kvm_x86_ops->interrupt_allowed(vcpu); | ||
2588 | } | 2710 | } |
2589 | 2711 | ||
2590 | static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | 2712 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, |
2591 | u32 error_code) | 2713 | gva_t gva, pfn_t *pfn, bool write, bool *writable) |
2714 | { | ||
2715 | bool async; | ||
2716 | |||
2717 | *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async, write, writable); | ||
2718 | |||
2719 | if (!async) | ||
2720 | return false; /* *pfn has correct page already */ | ||
2721 | |||
2722 | put_page(pfn_to_page(*pfn)); | ||
2723 | |||
2724 | if (!prefault && can_do_async_pf(vcpu)) { | ||
2725 | trace_kvm_try_async_get_page(gva, gfn); | ||
2726 | if (kvm_find_async_pf_gfn(vcpu, gfn)) { | ||
2727 | trace_kvm_async_pf_doublefault(gva, gfn); | ||
2728 | kvm_make_request(KVM_REQ_APF_HALT, vcpu); | ||
2729 | return true; | ||
2730 | } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn)) | ||
2731 | return true; | ||
2732 | } | ||
2733 | |||
2734 | *pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, writable); | ||
2735 | |||
2736 | return false; | ||
2737 | } | ||
2738 | |||
2739 | static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | ||
2740 | bool prefault) | ||
2592 | { | 2741 | { |
2593 | pfn_t pfn; | 2742 | pfn_t pfn; |
2594 | int r; | 2743 | int r; |
2595 | int level; | 2744 | int level; |
2745 | int force_pt_level; | ||
2596 | gfn_t gfn = gpa >> PAGE_SHIFT; | 2746 | gfn_t gfn = gpa >> PAGE_SHIFT; |
2597 | unsigned long mmu_seq; | 2747 | unsigned long mmu_seq; |
2748 | int write = error_code & PFERR_WRITE_MASK; | ||
2749 | bool map_writable; | ||
2598 | 2750 | ||
2599 | ASSERT(vcpu); | 2751 | ASSERT(vcpu); |
2600 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 2752 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
@@ -2603,21 +2755,30 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
2603 | if (r) | 2755 | if (r) |
2604 | return r; | 2756 | return r; |
2605 | 2757 | ||
2606 | level = mapping_level(vcpu, gfn); | 2758 | force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); |
2607 | 2759 | if (likely(!force_pt_level)) { | |
2608 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | 2760 | level = mapping_level(vcpu, gfn); |
2761 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | ||
2762 | } else | ||
2763 | level = PT_PAGE_TABLE_LEVEL; | ||
2609 | 2764 | ||
2610 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2765 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2611 | smp_rmb(); | 2766 | smp_rmb(); |
2612 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2767 | |
2768 | if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) | ||
2769 | return 0; | ||
2770 | |||
2771 | /* mmio */ | ||
2613 | if (is_error_pfn(pfn)) | 2772 | if (is_error_pfn(pfn)) |
2614 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); | 2773 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); |
2615 | spin_lock(&vcpu->kvm->mmu_lock); | 2774 | spin_lock(&vcpu->kvm->mmu_lock); |
2616 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2775 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
2617 | goto out_unlock; | 2776 | goto out_unlock; |
2618 | kvm_mmu_free_some_pages(vcpu); | 2777 | kvm_mmu_free_some_pages(vcpu); |
2619 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | 2778 | if (likely(!force_pt_level)) |
2620 | level, gfn, pfn); | 2779 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); |
2780 | r = __direct_map(vcpu, gpa, write, map_writable, | ||
2781 | level, gfn, pfn, prefault); | ||
2621 | spin_unlock(&vcpu->kvm->mmu_lock); | 2782 | spin_unlock(&vcpu->kvm->mmu_lock); |
2622 | 2783 | ||
2623 | return r; | 2784 | return r; |
@@ -2659,18 +2820,19 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | |||
2659 | 2820 | ||
2660 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 2821 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
2661 | { | 2822 | { |
2662 | pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3); | 2823 | pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu)); |
2663 | mmu_free_roots(vcpu); | 2824 | mmu_free_roots(vcpu); |
2664 | } | 2825 | } |
2665 | 2826 | ||
2666 | static unsigned long get_cr3(struct kvm_vcpu *vcpu) | 2827 | static unsigned long get_cr3(struct kvm_vcpu *vcpu) |
2667 | { | 2828 | { |
2668 | return vcpu->arch.cr3; | 2829 | return kvm_read_cr3(vcpu); |
2669 | } | 2830 | } |
2670 | 2831 | ||
2671 | static void inject_page_fault(struct kvm_vcpu *vcpu) | 2832 | static void inject_page_fault(struct kvm_vcpu *vcpu, |
2833 | struct x86_exception *fault) | ||
2672 | { | 2834 | { |
2673 | vcpu->arch.mmu.inject_page_fault(vcpu); | 2835 | vcpu->arch.mmu.inject_page_fault(vcpu, fault); |
2674 | } | 2836 | } |
2675 | 2837 | ||
2676 | static void paging_free(struct kvm_vcpu *vcpu) | 2838 | static void paging_free(struct kvm_vcpu *vcpu) |
@@ -2816,6 +2978,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
2816 | { | 2978 | { |
2817 | struct kvm_mmu *context = vcpu->arch.walk_mmu; | 2979 | struct kvm_mmu *context = vcpu->arch.walk_mmu; |
2818 | 2980 | ||
2981 | context->base_role.word = 0; | ||
2819 | context->new_cr3 = nonpaging_new_cr3; | 2982 | context->new_cr3 = nonpaging_new_cr3; |
2820 | context->page_fault = tdp_page_fault; | 2983 | context->page_fault = tdp_page_fault; |
2821 | context->free = nonpaging_free; | 2984 | context->free = nonpaging_free; |
@@ -3008,9 +3171,6 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
3008 | return; | 3171 | return; |
3009 | } | 3172 | } |
3010 | 3173 | ||
3011 | if (is_rsvd_bits_set(&vcpu->arch.mmu, *(u64 *)new, PT_PAGE_TABLE_LEVEL)) | ||
3012 | return; | ||
3013 | |||
3014 | ++vcpu->kvm->stat.mmu_pte_updated; | 3174 | ++vcpu->kvm->stat.mmu_pte_updated; |
3015 | if (!sp->role.cr4_pae) | 3175 | if (!sp->role.cr4_pae) |
3016 | paging32_update_pte(vcpu, sp, spte, new); | 3176 | paging32_update_pte(vcpu, sp, spte, new); |
@@ -3264,12 +3424,13 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | |||
3264 | } | 3424 | } |
3265 | } | 3425 | } |
3266 | 3426 | ||
3267 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | 3427 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, |
3428 | void *insn, int insn_len) | ||
3268 | { | 3429 | { |
3269 | int r; | 3430 | int r; |
3270 | enum emulation_result er; | 3431 | enum emulation_result er; |
3271 | 3432 | ||
3272 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code); | 3433 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); |
3273 | if (r < 0) | 3434 | if (r < 0) |
3274 | goto out; | 3435 | goto out; |
3275 | 3436 | ||
@@ -3282,7 +3443,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
3282 | if (r) | 3443 | if (r) |
3283 | goto out; | 3444 | goto out; |
3284 | 3445 | ||
3285 | er = emulate_instruction(vcpu, cr2, error_code, 0); | 3446 | er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len); |
3286 | 3447 | ||
3287 | switch (er) { | 3448 | switch (er) { |
3288 | case EMULATE_DONE: | 3449 | case EMULATE_DONE: |
@@ -3377,11 +3538,14 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
3377 | if (!test_bit(slot, sp->slot_bitmap)) | 3538 | if (!test_bit(slot, sp->slot_bitmap)) |
3378 | continue; | 3539 | continue; |
3379 | 3540 | ||
3541 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) | ||
3542 | continue; | ||
3543 | |||
3380 | pt = sp->spt; | 3544 | pt = sp->spt; |
3381 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 3545 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) |
3382 | /* avoid RMW */ | 3546 | /* avoid RMW */ |
3383 | if (is_writable_pte(pt[i])) | 3547 | if (is_writable_pte(pt[i])) |
3384 | pt[i] &= ~PT_WRITABLE_MASK; | 3548 | update_spte(&pt[i], pt[i] & ~PT_WRITABLE_MASK); |
3385 | } | 3549 | } |
3386 | kvm_flush_remote_tlbs(kvm); | 3550 | kvm_flush_remote_tlbs(kvm); |
3387 | } | 3551 | } |
@@ -3463,13 +3627,6 @@ static void mmu_destroy_caches(void) | |||
3463 | kmem_cache_destroy(mmu_page_header_cache); | 3627 | kmem_cache_destroy(mmu_page_header_cache); |
3464 | } | 3628 | } |
3465 | 3629 | ||
3466 | void kvm_mmu_module_exit(void) | ||
3467 | { | ||
3468 | mmu_destroy_caches(); | ||
3469 | percpu_counter_destroy(&kvm_total_used_mmu_pages); | ||
3470 | unregister_shrinker(&mmu_shrinker); | ||
3471 | } | ||
3472 | |||
3473 | int kvm_mmu_module_init(void) | 3630 | int kvm_mmu_module_init(void) |
3474 | { | 3631 | { |
3475 | pte_chain_cache = kmem_cache_create("kvm_pte_chain", | 3632 | pte_chain_cache = kmem_cache_create("kvm_pte_chain", |
@@ -3566,7 +3723,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, | |||
3566 | 3723 | ||
3567 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 3724 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
3568 | { | 3725 | { |
3569 | (void)kvm_set_cr3(vcpu, vcpu->arch.cr3); | 3726 | (void)kvm_set_cr3(vcpu, kvm_read_cr3(vcpu)); |
3570 | return 1; | 3727 | return 1; |
3571 | } | 3728 | } |
3572 | 3729 | ||
@@ -3662,12 +3819,6 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) | |||
3662 | } | 3819 | } |
3663 | EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); | 3820 | EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); |
3664 | 3821 | ||
3665 | #ifdef CONFIG_KVM_MMU_AUDIT | ||
3666 | #include "mmu_audit.c" | ||
3667 | #else | ||
3668 | static void mmu_audit_disable(void) { } | ||
3669 | #endif | ||
3670 | |||
3671 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | 3822 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu) |
3672 | { | 3823 | { |
3673 | ASSERT(vcpu); | 3824 | ASSERT(vcpu); |
@@ -3675,5 +3826,18 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | |||
3675 | destroy_kvm_mmu(vcpu); | 3826 | destroy_kvm_mmu(vcpu); |
3676 | free_mmu_pages(vcpu); | 3827 | free_mmu_pages(vcpu); |
3677 | mmu_free_memory_caches(vcpu); | 3828 | mmu_free_memory_caches(vcpu); |
3829 | } | ||
3830 | |||
3831 | #ifdef CONFIG_KVM_MMU_AUDIT | ||
3832 | #include "mmu_audit.c" | ||
3833 | #else | ||
3834 | static void mmu_audit_disable(void) { } | ||
3835 | #endif | ||
3836 | |||
3837 | void kvm_mmu_module_exit(void) | ||
3838 | { | ||
3839 | mmu_destroy_caches(); | ||
3840 | percpu_counter_destroy(&kvm_total_used_mmu_pages); | ||
3841 | unregister_shrinker(&mmu_shrinker); | ||
3678 | mmu_audit_disable(); | 3842 | mmu_audit_disable(); |
3679 | } | 3843 | } |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index ba2bcdde6221..5f6223b8bcf7 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
@@ -19,11 +19,9 @@ | |||
19 | 19 | ||
20 | #include <linux/ratelimit.h> | 20 | #include <linux/ratelimit.h> |
21 | 21 | ||
22 | static int audit_point; | 22 | #define audit_printk(kvm, fmt, args...) \ |
23 | |||
24 | #define audit_printk(fmt, args...) \ | ||
25 | printk(KERN_ERR "audit: (%s) error: " \ | 23 | printk(KERN_ERR "audit: (%s) error: " \ |
26 | fmt, audit_point_name[audit_point], ##args) | 24 | fmt, audit_point_name[kvm->arch.audit_point], ##args) |
27 | 25 | ||
28 | typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level); | 26 | typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level); |
29 | 27 | ||
@@ -97,18 +95,21 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) | |||
97 | 95 | ||
98 | if (sp->unsync) { | 96 | if (sp->unsync) { |
99 | if (level != PT_PAGE_TABLE_LEVEL) { | 97 | if (level != PT_PAGE_TABLE_LEVEL) { |
100 | audit_printk("unsync sp: %p level = %d\n", sp, level); | 98 | audit_printk(vcpu->kvm, "unsync sp: %p " |
99 | "level = %d\n", sp, level); | ||
101 | return; | 100 | return; |
102 | } | 101 | } |
103 | 102 | ||
104 | if (*sptep == shadow_notrap_nonpresent_pte) { | 103 | if (*sptep == shadow_notrap_nonpresent_pte) { |
105 | audit_printk("notrap spte in unsync sp: %p\n", sp); | 104 | audit_printk(vcpu->kvm, "notrap spte in unsync " |
105 | "sp: %p\n", sp); | ||
106 | return; | 106 | return; |
107 | } | 107 | } |
108 | } | 108 | } |
109 | 109 | ||
110 | if (sp->role.direct && *sptep == shadow_notrap_nonpresent_pte) { | 110 | if (sp->role.direct && *sptep == shadow_notrap_nonpresent_pte) { |
111 | audit_printk("notrap spte in direct sp: %p\n", sp); | 111 | audit_printk(vcpu->kvm, "notrap spte in direct sp: %p\n", |
112 | sp); | ||
112 | return; | 113 | return; |
113 | } | 114 | } |
114 | 115 | ||
@@ -125,8 +126,9 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) | |||
125 | 126 | ||
126 | hpa = pfn << PAGE_SHIFT; | 127 | hpa = pfn << PAGE_SHIFT; |
127 | if ((*sptep & PT64_BASE_ADDR_MASK) != hpa) | 128 | if ((*sptep & PT64_BASE_ADDR_MASK) != hpa) |
128 | audit_printk("levels %d pfn %llx hpa %llx ent %llxn", | 129 | audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx " |
129 | vcpu->arch.mmu.root_level, pfn, hpa, *sptep); | 130 | "ent %llxn", vcpu->arch.mmu.root_level, pfn, |
131 | hpa, *sptep); | ||
130 | } | 132 | } |
131 | 133 | ||
132 | static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | 134 | static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) |
@@ -142,8 +144,8 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | |||
142 | if (!gfn_to_memslot(kvm, gfn)) { | 144 | if (!gfn_to_memslot(kvm, gfn)) { |
143 | if (!printk_ratelimit()) | 145 | if (!printk_ratelimit()) |
144 | return; | 146 | return; |
145 | audit_printk("no memslot for gfn %llx\n", gfn); | 147 | audit_printk(kvm, "no memslot for gfn %llx\n", gfn); |
146 | audit_printk("index %ld of sp (gfn=%llx)\n", | 148 | audit_printk(kvm, "index %ld of sp (gfn=%llx)\n", |
147 | (long int)(sptep - rev_sp->spt), rev_sp->gfn); | 149 | (long int)(sptep - rev_sp->spt), rev_sp->gfn); |
148 | dump_stack(); | 150 | dump_stack(); |
149 | return; | 151 | return; |
@@ -153,7 +155,8 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | |||
153 | if (!*rmapp) { | 155 | if (!*rmapp) { |
154 | if (!printk_ratelimit()) | 156 | if (!printk_ratelimit()) |
155 | return; | 157 | return; |
156 | audit_printk("no rmap for writable spte %llx\n", *sptep); | 158 | audit_printk(kvm, "no rmap for writable spte %llx\n", |
159 | *sptep); | ||
157 | dump_stack(); | 160 | dump_stack(); |
158 | } | 161 | } |
159 | } | 162 | } |
@@ -168,8 +171,9 @@ static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level) | |||
168 | { | 171 | { |
169 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | 172 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); |
170 | 173 | ||
171 | if (audit_point == AUDIT_POST_SYNC && sp->unsync) | 174 | if (vcpu->kvm->arch.audit_point == AUDIT_POST_SYNC && sp->unsync) |
172 | audit_printk("meet unsync sp(%p) after sync root.\n", sp); | 175 | audit_printk(vcpu->kvm, "meet unsync sp(%p) after sync " |
176 | "root.\n", sp); | ||
173 | } | 177 | } |
174 | 178 | ||
175 | static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) | 179 | static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) |
@@ -202,8 +206,9 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
202 | spte = rmap_next(kvm, rmapp, NULL); | 206 | spte = rmap_next(kvm, rmapp, NULL); |
203 | while (spte) { | 207 | while (spte) { |
204 | if (is_writable_pte(*spte)) | 208 | if (is_writable_pte(*spte)) |
205 | audit_printk("shadow page has writable mappings: gfn " | 209 | audit_printk(kvm, "shadow page has writable " |
206 | "%llx role %x\n", sp->gfn, sp->role.word); | 210 | "mappings: gfn %llx role %x\n", |
211 | sp->gfn, sp->role.word); | ||
207 | spte = rmap_next(kvm, rmapp, spte); | 212 | spte = rmap_next(kvm, rmapp, spte); |
208 | } | 213 | } |
209 | } | 214 | } |
@@ -238,7 +243,7 @@ static void kvm_mmu_audit(void *ignore, struct kvm_vcpu *vcpu, int point) | |||
238 | if (!__ratelimit(&ratelimit_state)) | 243 | if (!__ratelimit(&ratelimit_state)) |
239 | return; | 244 | return; |
240 | 245 | ||
241 | audit_point = point; | 246 | vcpu->kvm->arch.audit_point = point; |
242 | audit_all_active_sps(vcpu->kvm); | 247 | audit_all_active_sps(vcpu->kvm); |
243 | audit_vcpu_spte(vcpu); | 248 | audit_vcpu_spte(vcpu); |
244 | } | 249 | } |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index cd7a833a3b52..6bccc24c4181 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -72,7 +72,7 @@ struct guest_walker { | |||
72 | unsigned pt_access; | 72 | unsigned pt_access; |
73 | unsigned pte_access; | 73 | unsigned pte_access; |
74 | gfn_t gfn; | 74 | gfn_t gfn; |
75 | u32 error_code; | 75 | struct x86_exception fault; |
76 | }; | 76 | }; |
77 | 77 | ||
78 | static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) | 78 | static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) |
@@ -266,21 +266,23 @@ walk: | |||
266 | return 1; | 266 | return 1; |
267 | 267 | ||
268 | error: | 268 | error: |
269 | walker->error_code = 0; | 269 | walker->fault.vector = PF_VECTOR; |
270 | walker->fault.error_code_valid = true; | ||
271 | walker->fault.error_code = 0; | ||
270 | if (present) | 272 | if (present) |
271 | walker->error_code |= PFERR_PRESENT_MASK; | 273 | walker->fault.error_code |= PFERR_PRESENT_MASK; |
272 | 274 | ||
273 | walker->error_code |= write_fault | user_fault; | 275 | walker->fault.error_code |= write_fault | user_fault; |
274 | 276 | ||
275 | if (fetch_fault && mmu->nx) | 277 | if (fetch_fault && mmu->nx) |
276 | walker->error_code |= PFERR_FETCH_MASK; | 278 | walker->fault.error_code |= PFERR_FETCH_MASK; |
277 | if (rsvd_fault) | 279 | if (rsvd_fault) |
278 | walker->error_code |= PFERR_RSVD_MASK; | 280 | walker->fault.error_code |= PFERR_RSVD_MASK; |
279 | 281 | ||
280 | vcpu->arch.fault.address = addr; | 282 | walker->fault.address = addr; |
281 | vcpu->arch.fault.error_code = walker->error_code; | 283 | walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu; |
282 | 284 | ||
283 | trace_kvm_mmu_walker_error(walker->error_code); | 285 | trace_kvm_mmu_walker_error(walker->fault.error_code); |
284 | return 0; | 286 | return 0; |
285 | } | 287 | } |
286 | 288 | ||
@@ -299,25 +301,42 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker, | |||
299 | addr, access); | 301 | addr, access); |
300 | } | 302 | } |
301 | 303 | ||
304 | static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, | ||
305 | struct kvm_mmu_page *sp, u64 *spte, | ||
306 | pt_element_t gpte) | ||
307 | { | ||
308 | u64 nonpresent = shadow_trap_nonpresent_pte; | ||
309 | |||
310 | if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) | ||
311 | goto no_present; | ||
312 | |||
313 | if (!is_present_gpte(gpte)) { | ||
314 | if (!sp->unsync) | ||
315 | nonpresent = shadow_notrap_nonpresent_pte; | ||
316 | goto no_present; | ||
317 | } | ||
318 | |||
319 | if (!(gpte & PT_ACCESSED_MASK)) | ||
320 | goto no_present; | ||
321 | |||
322 | return false; | ||
323 | |||
324 | no_present: | ||
325 | drop_spte(vcpu->kvm, spte, nonpresent); | ||
326 | return true; | ||
327 | } | ||
328 | |||
302 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 329 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
303 | u64 *spte, const void *pte) | 330 | u64 *spte, const void *pte) |
304 | { | 331 | { |
305 | pt_element_t gpte; | 332 | pt_element_t gpte; |
306 | unsigned pte_access; | 333 | unsigned pte_access; |
307 | pfn_t pfn; | 334 | pfn_t pfn; |
308 | u64 new_spte; | ||
309 | 335 | ||
310 | gpte = *(const pt_element_t *)pte; | 336 | gpte = *(const pt_element_t *)pte; |
311 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { | 337 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) |
312 | if (!is_present_gpte(gpte)) { | ||
313 | if (sp->unsync) | ||
314 | new_spte = shadow_trap_nonpresent_pte; | ||
315 | else | ||
316 | new_spte = shadow_notrap_nonpresent_pte; | ||
317 | __set_spte(spte, new_spte); | ||
318 | } | ||
319 | return; | 338 | return; |
320 | } | 339 | |
321 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 340 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
322 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 341 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
323 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) | 342 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) |
@@ -329,7 +348,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
329 | return; | 348 | return; |
330 | kvm_get_pfn(pfn); | 349 | kvm_get_pfn(pfn); |
331 | /* | 350 | /* |
332 | * we call mmu_set_spte() with reset_host_protection = true beacuse that | 351 | * we call mmu_set_spte() with host_writable = true beacuse that |
333 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). | 352 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). |
334 | */ | 353 | */ |
335 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, | 354 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, |
@@ -364,7 +383,6 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, | |||
364 | u64 *sptep) | 383 | u64 *sptep) |
365 | { | 384 | { |
366 | struct kvm_mmu_page *sp; | 385 | struct kvm_mmu_page *sp; |
367 | struct kvm_mmu *mmu = &vcpu->arch.mmu; | ||
368 | pt_element_t *gptep = gw->prefetch_ptes; | 386 | pt_element_t *gptep = gw->prefetch_ptes; |
369 | u64 *spte; | 387 | u64 *spte; |
370 | int i; | 388 | int i; |
@@ -395,14 +413,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, | |||
395 | 413 | ||
396 | gpte = gptep[i]; | 414 | gpte = gptep[i]; |
397 | 415 | ||
398 | if (!is_present_gpte(gpte) || | 416 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) |
399 | is_rsvd_bits_set(mmu, gpte, PT_PAGE_TABLE_LEVEL)) { | ||
400 | if (!sp->unsync) | ||
401 | __set_spte(spte, shadow_notrap_nonpresent_pte); | ||
402 | continue; | ||
403 | } | ||
404 | |||
405 | if (!(gpte & PT_ACCESSED_MASK)) | ||
406 | continue; | 417 | continue; |
407 | 418 | ||
408 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 419 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
@@ -427,7 +438,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, | |||
427 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | 438 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, |
428 | struct guest_walker *gw, | 439 | struct guest_walker *gw, |
429 | int user_fault, int write_fault, int hlevel, | 440 | int user_fault, int write_fault, int hlevel, |
430 | int *ptwrite, pfn_t pfn) | 441 | int *ptwrite, pfn_t pfn, bool map_writable, |
442 | bool prefault) | ||
431 | { | 443 | { |
432 | unsigned access = gw->pt_access; | 444 | unsigned access = gw->pt_access; |
433 | struct kvm_mmu_page *sp = NULL; | 445 | struct kvm_mmu_page *sp = NULL; |
@@ -501,7 +513,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
501 | 513 | ||
502 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, | 514 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, |
503 | user_fault, write_fault, dirty, ptwrite, it.level, | 515 | user_fault, write_fault, dirty, ptwrite, it.level, |
504 | gw->gfn, pfn, false, true); | 516 | gw->gfn, pfn, prefault, map_writable); |
505 | FNAME(pte_prefetch)(vcpu, gw, it.sptep); | 517 | FNAME(pte_prefetch)(vcpu, gw, it.sptep); |
506 | 518 | ||
507 | return it.sptep; | 519 | return it.sptep; |
@@ -527,8 +539,8 @@ out_gpte_changed: | |||
527 | * Returns: 1 if we need to emulate the instruction, 0 otherwise, or | 539 | * Returns: 1 if we need to emulate the instruction, 0 otherwise, or |
528 | * a negative value on error. | 540 | * a negative value on error. |
529 | */ | 541 | */ |
530 | static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | 542 | static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, |
531 | u32 error_code) | 543 | bool prefault) |
532 | { | 544 | { |
533 | int write_fault = error_code & PFERR_WRITE_MASK; | 545 | int write_fault = error_code & PFERR_WRITE_MASK; |
534 | int user_fault = error_code & PFERR_USER_MASK; | 546 | int user_fault = error_code & PFERR_USER_MASK; |
@@ -538,7 +550,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
538 | int r; | 550 | int r; |
539 | pfn_t pfn; | 551 | pfn_t pfn; |
540 | int level = PT_PAGE_TABLE_LEVEL; | 552 | int level = PT_PAGE_TABLE_LEVEL; |
553 | int force_pt_level; | ||
541 | unsigned long mmu_seq; | 554 | unsigned long mmu_seq; |
555 | bool map_writable; | ||
542 | 556 | ||
543 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); | 557 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
544 | 558 | ||
@@ -556,19 +570,29 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
556 | */ | 570 | */ |
557 | if (!r) { | 571 | if (!r) { |
558 | pgprintk("%s: guest page fault\n", __func__); | 572 | pgprintk("%s: guest page fault\n", __func__); |
559 | inject_page_fault(vcpu); | 573 | if (!prefault) { |
560 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ | 574 | inject_page_fault(vcpu, &walker.fault); |
575 | /* reset fork detector */ | ||
576 | vcpu->arch.last_pt_write_count = 0; | ||
577 | } | ||
561 | return 0; | 578 | return 0; |
562 | } | 579 | } |
563 | 580 | ||
564 | if (walker.level >= PT_DIRECTORY_LEVEL) { | 581 | if (walker.level >= PT_DIRECTORY_LEVEL) |
582 | force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn); | ||
583 | else | ||
584 | force_pt_level = 1; | ||
585 | if (!force_pt_level) { | ||
565 | level = min(walker.level, mapping_level(vcpu, walker.gfn)); | 586 | level = min(walker.level, mapping_level(vcpu, walker.gfn)); |
566 | walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); | 587 | walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); |
567 | } | 588 | } |
568 | 589 | ||
569 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 590 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
570 | smp_rmb(); | 591 | smp_rmb(); |
571 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); | 592 | |
593 | if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, write_fault, | ||
594 | &map_writable)) | ||
595 | return 0; | ||
572 | 596 | ||
573 | /* mmio */ | 597 | /* mmio */ |
574 | if (is_error_pfn(pfn)) | 598 | if (is_error_pfn(pfn)) |
@@ -580,8 +604,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
580 | 604 | ||
581 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); | 605 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); |
582 | kvm_mmu_free_some_pages(vcpu); | 606 | kvm_mmu_free_some_pages(vcpu); |
607 | if (!force_pt_level) | ||
608 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | ||
583 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 609 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
584 | level, &write_pt, pfn); | 610 | level, &write_pt, pfn, map_writable, prefault); |
585 | (void)sptep; | 611 | (void)sptep; |
586 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, | 612 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, |
587 | sptep, *sptep, write_pt); | 613 | sptep, *sptep, write_pt); |
@@ -661,7 +687,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
661 | } | 687 | } |
662 | 688 | ||
663 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | 689 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, |
664 | u32 *error) | 690 | struct x86_exception *exception) |
665 | { | 691 | { |
666 | struct guest_walker walker; | 692 | struct guest_walker walker; |
667 | gpa_t gpa = UNMAPPED_GVA; | 693 | gpa_t gpa = UNMAPPED_GVA; |
@@ -672,14 +698,15 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | |||
672 | if (r) { | 698 | if (r) { |
673 | gpa = gfn_to_gpa(walker.gfn); | 699 | gpa = gfn_to_gpa(walker.gfn); |
674 | gpa |= vaddr & ~PAGE_MASK; | 700 | gpa |= vaddr & ~PAGE_MASK; |
675 | } else if (error) | 701 | } else if (exception) |
676 | *error = walker.error_code; | 702 | *exception = walker.fault; |
677 | 703 | ||
678 | return gpa; | 704 | return gpa; |
679 | } | 705 | } |
680 | 706 | ||
681 | static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, | 707 | static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, |
682 | u32 access, u32 *error) | 708 | u32 access, |
709 | struct x86_exception *exception) | ||
683 | { | 710 | { |
684 | struct guest_walker walker; | 711 | struct guest_walker walker; |
685 | gpa_t gpa = UNMAPPED_GVA; | 712 | gpa_t gpa = UNMAPPED_GVA; |
@@ -690,8 +717,8 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, | |||
690 | if (r) { | 717 | if (r) { |
691 | gpa = gfn_to_gpa(walker.gfn); | 718 | gpa = gfn_to_gpa(walker.gfn); |
692 | gpa |= vaddr & ~PAGE_MASK; | 719 | gpa |= vaddr & ~PAGE_MASK; |
693 | } else if (error) | 720 | } else if (exception) |
694 | *error = walker.error_code; | 721 | *exception = walker.fault; |
695 | 722 | ||
696 | return gpa; | 723 | return gpa; |
697 | } | 724 | } |
@@ -730,12 +757,19 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | |||
730 | * Using the cached information from sp->gfns is safe because: | 757 | * Using the cached information from sp->gfns is safe because: |
731 | * - The spte has a reference to the struct page, so the pfn for a given gfn | 758 | * - The spte has a reference to the struct page, so the pfn for a given gfn |
732 | * can't change unless all sptes pointing to it are nuked first. | 759 | * can't change unless all sptes pointing to it are nuked first. |
760 | * | ||
761 | * Note: | ||
762 | * We should flush all tlbs if spte is dropped even though guest is | ||
763 | * responsible for it. Since if we don't, kvm_mmu_notifier_invalidate_page | ||
764 | * and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't | ||
765 | * used by guest then tlbs are not flushed, so guest is allowed to access the | ||
766 | * freed pages. | ||
767 | * And we increase kvm->tlbs_dirty to delay tlbs flush in this case. | ||
733 | */ | 768 | */ |
734 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 769 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
735 | bool clear_unsync) | ||
736 | { | 770 | { |
737 | int i, offset, nr_present; | 771 | int i, offset, nr_present; |
738 | bool reset_host_protection; | 772 | bool host_writable; |
739 | gpa_t first_pte_gpa; | 773 | gpa_t first_pte_gpa; |
740 | 774 | ||
741 | offset = nr_present = 0; | 775 | offset = nr_present = 0; |
@@ -764,31 +798,27 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
764 | return -EINVAL; | 798 | return -EINVAL; |
765 | 799 | ||
766 | gfn = gpte_to_gfn(gpte); | 800 | gfn = gpte_to_gfn(gpte); |
767 | if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL) | ||
768 | || gfn != sp->gfns[i] || !is_present_gpte(gpte) | ||
769 | || !(gpte & PT_ACCESSED_MASK)) { | ||
770 | u64 nonpresent; | ||
771 | 801 | ||
772 | if (is_present_gpte(gpte) || !clear_unsync) | 802 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { |
773 | nonpresent = shadow_trap_nonpresent_pte; | 803 | vcpu->kvm->tlbs_dirty++; |
774 | else | 804 | continue; |
775 | nonpresent = shadow_notrap_nonpresent_pte; | 805 | } |
776 | drop_spte(vcpu->kvm, &sp->spt[i], nonpresent); | 806 | |
807 | if (gfn != sp->gfns[i]) { | ||
808 | drop_spte(vcpu->kvm, &sp->spt[i], | ||
809 | shadow_trap_nonpresent_pte); | ||
810 | vcpu->kvm->tlbs_dirty++; | ||
777 | continue; | 811 | continue; |
778 | } | 812 | } |
779 | 813 | ||
780 | nr_present++; | 814 | nr_present++; |
781 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 815 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
782 | if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) { | 816 | host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; |
783 | pte_access &= ~ACC_WRITE_MASK; | 817 | |
784 | reset_host_protection = 0; | ||
785 | } else { | ||
786 | reset_host_protection = 1; | ||
787 | } | ||
788 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, | 818 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, |
789 | is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, | 819 | is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, |
790 | spte_to_pfn(sp->spt[i]), true, false, | 820 | spte_to_pfn(sp->spt[i]), true, false, |
791 | reset_host_protection); | 821 | host_writable); |
792 | } | 822 | } |
793 | 823 | ||
794 | return !nr_present; | 824 | return !nr_present; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b81a9b7c2ca4..25bd1bc5aad2 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -31,6 +31,7 @@ | |||
31 | 31 | ||
32 | #include <asm/tlbflush.h> | 32 | #include <asm/tlbflush.h> |
33 | #include <asm/desc.h> | 33 | #include <asm/desc.h> |
34 | #include <asm/kvm_para.h> | ||
34 | 35 | ||
35 | #include <asm/virtext.h> | 36 | #include <asm/virtext.h> |
36 | #include "trace.h" | 37 | #include "trace.h" |
@@ -50,6 +51,10 @@ MODULE_LICENSE("GPL"); | |||
50 | #define SVM_FEATURE_LBRV (1 << 1) | 51 | #define SVM_FEATURE_LBRV (1 << 1) |
51 | #define SVM_FEATURE_SVML (1 << 2) | 52 | #define SVM_FEATURE_SVML (1 << 2) |
52 | #define SVM_FEATURE_NRIP (1 << 3) | 53 | #define SVM_FEATURE_NRIP (1 << 3) |
54 | #define SVM_FEATURE_TSC_RATE (1 << 4) | ||
55 | #define SVM_FEATURE_VMCB_CLEAN (1 << 5) | ||
56 | #define SVM_FEATURE_FLUSH_ASID (1 << 6) | ||
57 | #define SVM_FEATURE_DECODE_ASSIST (1 << 7) | ||
53 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | 58 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) |
54 | 59 | ||
55 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ | 60 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ |
@@ -97,10 +102,8 @@ struct nested_state { | |||
97 | unsigned long vmexit_rax; | 102 | unsigned long vmexit_rax; |
98 | 103 | ||
99 | /* cache for intercepts of the guest */ | 104 | /* cache for intercepts of the guest */ |
100 | u16 intercept_cr_read; | 105 | u32 intercept_cr; |
101 | u16 intercept_cr_write; | 106 | u32 intercept_dr; |
102 | u16 intercept_dr_read; | ||
103 | u16 intercept_dr_write; | ||
104 | u32 intercept_exceptions; | 107 | u32 intercept_exceptions; |
105 | u64 intercept; | 108 | u64 intercept; |
106 | 109 | ||
@@ -123,7 +126,12 @@ struct vcpu_svm { | |||
123 | u64 next_rip; | 126 | u64 next_rip; |
124 | 127 | ||
125 | u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; | 128 | u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; |
126 | u64 host_gs_base; | 129 | struct { |
130 | u16 fs; | ||
131 | u16 gs; | ||
132 | u16 ldt; | ||
133 | u64 gs_base; | ||
134 | } host; | ||
127 | 135 | ||
128 | u32 *msrpm; | 136 | u32 *msrpm; |
129 | 137 | ||
@@ -133,6 +141,7 @@ struct vcpu_svm { | |||
133 | 141 | ||
134 | unsigned int3_injected; | 142 | unsigned int3_injected; |
135 | unsigned long int3_rip; | 143 | unsigned long int3_rip; |
144 | u32 apf_reason; | ||
136 | }; | 145 | }; |
137 | 146 | ||
138 | #define MSR_INVALID 0xffffffffU | 147 | #define MSR_INVALID 0xffffffffU |
@@ -180,14 +189,151 @@ static int nested_svm_vmexit(struct vcpu_svm *svm); | |||
180 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 189 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
181 | bool has_error_code, u32 error_code); | 190 | bool has_error_code, u32 error_code); |
182 | 191 | ||
192 | enum { | ||
193 | VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, | ||
194 | pause filter count */ | ||
195 | VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */ | ||
196 | VMCB_ASID, /* ASID */ | ||
197 | VMCB_INTR, /* int_ctl, int_vector */ | ||
198 | VMCB_NPT, /* npt_en, nCR3, gPAT */ | ||
199 | VMCB_CR, /* CR0, CR3, CR4, EFER */ | ||
200 | VMCB_DR, /* DR6, DR7 */ | ||
201 | VMCB_DT, /* GDT, IDT */ | ||
202 | VMCB_SEG, /* CS, DS, SS, ES, CPL */ | ||
203 | VMCB_CR2, /* CR2 only */ | ||
204 | VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */ | ||
205 | VMCB_DIRTY_MAX, | ||
206 | }; | ||
207 | |||
208 | /* TPR and CR2 are always written before VMRUN */ | ||
209 | #define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2)) | ||
210 | |||
211 | static inline void mark_all_dirty(struct vmcb *vmcb) | ||
212 | { | ||
213 | vmcb->control.clean = 0; | ||
214 | } | ||
215 | |||
216 | static inline void mark_all_clean(struct vmcb *vmcb) | ||
217 | { | ||
218 | vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1) | ||
219 | & ~VMCB_ALWAYS_DIRTY_MASK; | ||
220 | } | ||
221 | |||
222 | static inline void mark_dirty(struct vmcb *vmcb, int bit) | ||
223 | { | ||
224 | vmcb->control.clean &= ~(1 << bit); | ||
225 | } | ||
226 | |||
183 | static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) | 227 | static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) |
184 | { | 228 | { |
185 | return container_of(vcpu, struct vcpu_svm, vcpu); | 229 | return container_of(vcpu, struct vcpu_svm, vcpu); |
186 | } | 230 | } |
187 | 231 | ||
188 | static inline bool is_nested(struct vcpu_svm *svm) | 232 | static void recalc_intercepts(struct vcpu_svm *svm) |
233 | { | ||
234 | struct vmcb_control_area *c, *h; | ||
235 | struct nested_state *g; | ||
236 | |||
237 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | ||
238 | |||
239 | if (!is_guest_mode(&svm->vcpu)) | ||
240 | return; | ||
241 | |||
242 | c = &svm->vmcb->control; | ||
243 | h = &svm->nested.hsave->control; | ||
244 | g = &svm->nested; | ||
245 | |||
246 | c->intercept_cr = h->intercept_cr | g->intercept_cr; | ||
247 | c->intercept_dr = h->intercept_dr | g->intercept_dr; | ||
248 | c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; | ||
249 | c->intercept = h->intercept | g->intercept; | ||
250 | } | ||
251 | |||
252 | static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm) | ||
253 | { | ||
254 | if (is_guest_mode(&svm->vcpu)) | ||
255 | return svm->nested.hsave; | ||
256 | else | ||
257 | return svm->vmcb; | ||
258 | } | ||
259 | |||
260 | static inline void set_cr_intercept(struct vcpu_svm *svm, int bit) | ||
261 | { | ||
262 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
263 | |||
264 | vmcb->control.intercept_cr |= (1U << bit); | ||
265 | |||
266 | recalc_intercepts(svm); | ||
267 | } | ||
268 | |||
269 | static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit) | ||
270 | { | ||
271 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
272 | |||
273 | vmcb->control.intercept_cr &= ~(1U << bit); | ||
274 | |||
275 | recalc_intercepts(svm); | ||
276 | } | ||
277 | |||
278 | static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit) | ||
279 | { | ||
280 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
281 | |||
282 | return vmcb->control.intercept_cr & (1U << bit); | ||
283 | } | ||
284 | |||
285 | static inline void set_dr_intercept(struct vcpu_svm *svm, int bit) | ||
286 | { | ||
287 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
288 | |||
289 | vmcb->control.intercept_dr |= (1U << bit); | ||
290 | |||
291 | recalc_intercepts(svm); | ||
292 | } | ||
293 | |||
294 | static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit) | ||
295 | { | ||
296 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
297 | |||
298 | vmcb->control.intercept_dr &= ~(1U << bit); | ||
299 | |||
300 | recalc_intercepts(svm); | ||
301 | } | ||
302 | |||
303 | static inline void set_exception_intercept(struct vcpu_svm *svm, int bit) | ||
304 | { | ||
305 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
306 | |||
307 | vmcb->control.intercept_exceptions |= (1U << bit); | ||
308 | |||
309 | recalc_intercepts(svm); | ||
310 | } | ||
311 | |||
312 | static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit) | ||
189 | { | 313 | { |
190 | return svm->nested.vmcb; | 314 | struct vmcb *vmcb = get_host_vmcb(svm); |
315 | |||
316 | vmcb->control.intercept_exceptions &= ~(1U << bit); | ||
317 | |||
318 | recalc_intercepts(svm); | ||
319 | } | ||
320 | |||
321 | static inline void set_intercept(struct vcpu_svm *svm, int bit) | ||
322 | { | ||
323 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
324 | |||
325 | vmcb->control.intercept |= (1ULL << bit); | ||
326 | |||
327 | recalc_intercepts(svm); | ||
328 | } | ||
329 | |||
330 | static inline void clr_intercept(struct vcpu_svm *svm, int bit) | ||
331 | { | ||
332 | struct vmcb *vmcb = get_host_vmcb(svm); | ||
333 | |||
334 | vmcb->control.intercept &= ~(1ULL << bit); | ||
335 | |||
336 | recalc_intercepts(svm); | ||
191 | } | 337 | } |
192 | 338 | ||
193 | static inline void enable_gif(struct vcpu_svm *svm) | 339 | static inline void enable_gif(struct vcpu_svm *svm) |
@@ -264,11 +410,6 @@ static u32 svm_msrpm_offset(u32 msr) | |||
264 | 410 | ||
265 | #define MAX_INST_SIZE 15 | 411 | #define MAX_INST_SIZE 15 |
266 | 412 | ||
267 | static inline u32 svm_has(u32 feat) | ||
268 | { | ||
269 | return svm_features & feat; | ||
270 | } | ||
271 | |||
272 | static inline void clgi(void) | 413 | static inline void clgi(void) |
273 | { | 414 | { |
274 | asm volatile (__ex(SVM_CLGI)); | 415 | asm volatile (__ex(SVM_CLGI)); |
@@ -284,16 +425,6 @@ static inline void invlpga(unsigned long addr, u32 asid) | |||
284 | asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); | 425 | asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); |
285 | } | 426 | } |
286 | 427 | ||
287 | static inline void force_new_asid(struct kvm_vcpu *vcpu) | ||
288 | { | ||
289 | to_svm(vcpu)->asid_generation--; | ||
290 | } | ||
291 | |||
292 | static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) | ||
293 | { | ||
294 | force_new_asid(vcpu); | ||
295 | } | ||
296 | |||
297 | static int get_npt_level(void) | 428 | static int get_npt_level(void) |
298 | { | 429 | { |
299 | #ifdef CONFIG_X86_64 | 430 | #ifdef CONFIG_X86_64 |
@@ -310,6 +441,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
310 | efer &= ~EFER_LME; | 441 | efer &= ~EFER_LME; |
311 | 442 | ||
312 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; | 443 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; |
444 | mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); | ||
313 | } | 445 | } |
314 | 446 | ||
315 | static int is_external_interrupt(u32 info) | 447 | static int is_external_interrupt(u32 info) |
@@ -347,7 +479,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
347 | svm->next_rip = svm->vmcb->control.next_rip; | 479 | svm->next_rip = svm->vmcb->control.next_rip; |
348 | 480 | ||
349 | if (!svm->next_rip) { | 481 | if (!svm->next_rip) { |
350 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != | 482 | if (emulate_instruction(vcpu, EMULTYPE_SKIP) != |
351 | EMULATE_DONE) | 483 | EMULATE_DONE) |
352 | printk(KERN_DEBUG "%s: NOP\n", __func__); | 484 | printk(KERN_DEBUG "%s: NOP\n", __func__); |
353 | return; | 485 | return; |
@@ -374,7 +506,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
374 | nested_svm_check_exception(svm, nr, has_error_code, error_code)) | 506 | nested_svm_check_exception(svm, nr, has_error_code, error_code)) |
375 | return; | 507 | return; |
376 | 508 | ||
377 | if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) { | 509 | if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) { |
378 | unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); | 510 | unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); |
379 | 511 | ||
380 | /* | 512 | /* |
@@ -670,7 +802,7 @@ static __init int svm_hardware_setup(void) | |||
670 | 802 | ||
671 | svm_features = cpuid_edx(SVM_CPUID_FUNC); | 803 | svm_features = cpuid_edx(SVM_CPUID_FUNC); |
672 | 804 | ||
673 | if (!svm_has(SVM_FEATURE_NPT)) | 805 | if (!boot_cpu_has(X86_FEATURE_NPT)) |
674 | npt_enabled = false; | 806 | npt_enabled = false; |
675 | 807 | ||
676 | if (npt_enabled && !npt) { | 808 | if (npt_enabled && !npt) { |
@@ -725,13 +857,15 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | |||
725 | struct vcpu_svm *svm = to_svm(vcpu); | 857 | struct vcpu_svm *svm = to_svm(vcpu); |
726 | u64 g_tsc_offset = 0; | 858 | u64 g_tsc_offset = 0; |
727 | 859 | ||
728 | if (is_nested(svm)) { | 860 | if (is_guest_mode(vcpu)) { |
729 | g_tsc_offset = svm->vmcb->control.tsc_offset - | 861 | g_tsc_offset = svm->vmcb->control.tsc_offset - |
730 | svm->nested.hsave->control.tsc_offset; | 862 | svm->nested.hsave->control.tsc_offset; |
731 | svm->nested.hsave->control.tsc_offset = offset; | 863 | svm->nested.hsave->control.tsc_offset = offset; |
732 | } | 864 | } |
733 | 865 | ||
734 | svm->vmcb->control.tsc_offset = offset + g_tsc_offset; | 866 | svm->vmcb->control.tsc_offset = offset + g_tsc_offset; |
867 | |||
868 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | ||
735 | } | 869 | } |
736 | 870 | ||
737 | static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | 871 | static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) |
@@ -739,8 +873,9 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | |||
739 | struct vcpu_svm *svm = to_svm(vcpu); | 873 | struct vcpu_svm *svm = to_svm(vcpu); |
740 | 874 | ||
741 | svm->vmcb->control.tsc_offset += adjustment; | 875 | svm->vmcb->control.tsc_offset += adjustment; |
742 | if (is_nested(svm)) | 876 | if (is_guest_mode(vcpu)) |
743 | svm->nested.hsave->control.tsc_offset += adjustment; | 877 | svm->nested.hsave->control.tsc_offset += adjustment; |
878 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | ||
744 | } | 879 | } |
745 | 880 | ||
746 | static void init_vmcb(struct vcpu_svm *svm) | 881 | static void init_vmcb(struct vcpu_svm *svm) |
@@ -749,62 +884,62 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
749 | struct vmcb_save_area *save = &svm->vmcb->save; | 884 | struct vmcb_save_area *save = &svm->vmcb->save; |
750 | 885 | ||
751 | svm->vcpu.fpu_active = 1; | 886 | svm->vcpu.fpu_active = 1; |
887 | svm->vcpu.arch.hflags = 0; | ||
752 | 888 | ||
753 | control->intercept_cr_read = INTERCEPT_CR0_MASK | | 889 | set_cr_intercept(svm, INTERCEPT_CR0_READ); |
754 | INTERCEPT_CR3_MASK | | 890 | set_cr_intercept(svm, INTERCEPT_CR3_READ); |
755 | INTERCEPT_CR4_MASK; | 891 | set_cr_intercept(svm, INTERCEPT_CR4_READ); |
756 | 892 | set_cr_intercept(svm, INTERCEPT_CR0_WRITE); | |
757 | control->intercept_cr_write = INTERCEPT_CR0_MASK | | 893 | set_cr_intercept(svm, INTERCEPT_CR3_WRITE); |
758 | INTERCEPT_CR3_MASK | | 894 | set_cr_intercept(svm, INTERCEPT_CR4_WRITE); |
759 | INTERCEPT_CR4_MASK | | 895 | set_cr_intercept(svm, INTERCEPT_CR8_WRITE); |
760 | INTERCEPT_CR8_MASK; | 896 | |
761 | 897 | set_dr_intercept(svm, INTERCEPT_DR0_READ); | |
762 | control->intercept_dr_read = INTERCEPT_DR0_MASK | | 898 | set_dr_intercept(svm, INTERCEPT_DR1_READ); |
763 | INTERCEPT_DR1_MASK | | 899 | set_dr_intercept(svm, INTERCEPT_DR2_READ); |
764 | INTERCEPT_DR2_MASK | | 900 | set_dr_intercept(svm, INTERCEPT_DR3_READ); |
765 | INTERCEPT_DR3_MASK | | 901 | set_dr_intercept(svm, INTERCEPT_DR4_READ); |
766 | INTERCEPT_DR4_MASK | | 902 | set_dr_intercept(svm, INTERCEPT_DR5_READ); |
767 | INTERCEPT_DR5_MASK | | 903 | set_dr_intercept(svm, INTERCEPT_DR6_READ); |
768 | INTERCEPT_DR6_MASK | | 904 | set_dr_intercept(svm, INTERCEPT_DR7_READ); |
769 | INTERCEPT_DR7_MASK; | 905 | |
770 | 906 | set_dr_intercept(svm, INTERCEPT_DR0_WRITE); | |
771 | control->intercept_dr_write = INTERCEPT_DR0_MASK | | 907 | set_dr_intercept(svm, INTERCEPT_DR1_WRITE); |
772 | INTERCEPT_DR1_MASK | | 908 | set_dr_intercept(svm, INTERCEPT_DR2_WRITE); |
773 | INTERCEPT_DR2_MASK | | 909 | set_dr_intercept(svm, INTERCEPT_DR3_WRITE); |
774 | INTERCEPT_DR3_MASK | | 910 | set_dr_intercept(svm, INTERCEPT_DR4_WRITE); |
775 | INTERCEPT_DR4_MASK | | 911 | set_dr_intercept(svm, INTERCEPT_DR5_WRITE); |
776 | INTERCEPT_DR5_MASK | | 912 | set_dr_intercept(svm, INTERCEPT_DR6_WRITE); |
777 | INTERCEPT_DR6_MASK | | 913 | set_dr_intercept(svm, INTERCEPT_DR7_WRITE); |
778 | INTERCEPT_DR7_MASK; | 914 | |
779 | 915 | set_exception_intercept(svm, PF_VECTOR); | |
780 | control->intercept_exceptions = (1 << PF_VECTOR) | | 916 | set_exception_intercept(svm, UD_VECTOR); |
781 | (1 << UD_VECTOR) | | 917 | set_exception_intercept(svm, MC_VECTOR); |
782 | (1 << MC_VECTOR); | 918 | |
783 | 919 | set_intercept(svm, INTERCEPT_INTR); | |
784 | 920 | set_intercept(svm, INTERCEPT_NMI); | |
785 | control->intercept = (1ULL << INTERCEPT_INTR) | | 921 | set_intercept(svm, INTERCEPT_SMI); |
786 | (1ULL << INTERCEPT_NMI) | | 922 | set_intercept(svm, INTERCEPT_SELECTIVE_CR0); |
787 | (1ULL << INTERCEPT_SMI) | | 923 | set_intercept(svm, INTERCEPT_CPUID); |
788 | (1ULL << INTERCEPT_SELECTIVE_CR0) | | 924 | set_intercept(svm, INTERCEPT_INVD); |
789 | (1ULL << INTERCEPT_CPUID) | | 925 | set_intercept(svm, INTERCEPT_HLT); |
790 | (1ULL << INTERCEPT_INVD) | | 926 | set_intercept(svm, INTERCEPT_INVLPG); |
791 | (1ULL << INTERCEPT_HLT) | | 927 | set_intercept(svm, INTERCEPT_INVLPGA); |
792 | (1ULL << INTERCEPT_INVLPG) | | 928 | set_intercept(svm, INTERCEPT_IOIO_PROT); |
793 | (1ULL << INTERCEPT_INVLPGA) | | 929 | set_intercept(svm, INTERCEPT_MSR_PROT); |
794 | (1ULL << INTERCEPT_IOIO_PROT) | | 930 | set_intercept(svm, INTERCEPT_TASK_SWITCH); |
795 | (1ULL << INTERCEPT_MSR_PROT) | | 931 | set_intercept(svm, INTERCEPT_SHUTDOWN); |
796 | (1ULL << INTERCEPT_TASK_SWITCH) | | 932 | set_intercept(svm, INTERCEPT_VMRUN); |
797 | (1ULL << INTERCEPT_SHUTDOWN) | | 933 | set_intercept(svm, INTERCEPT_VMMCALL); |
798 | (1ULL << INTERCEPT_VMRUN) | | 934 | set_intercept(svm, INTERCEPT_VMLOAD); |
799 | (1ULL << INTERCEPT_VMMCALL) | | 935 | set_intercept(svm, INTERCEPT_VMSAVE); |
800 | (1ULL << INTERCEPT_VMLOAD) | | 936 | set_intercept(svm, INTERCEPT_STGI); |
801 | (1ULL << INTERCEPT_VMSAVE) | | 937 | set_intercept(svm, INTERCEPT_CLGI); |
802 | (1ULL << INTERCEPT_STGI) | | 938 | set_intercept(svm, INTERCEPT_SKINIT); |
803 | (1ULL << INTERCEPT_CLGI) | | 939 | set_intercept(svm, INTERCEPT_WBINVD); |
804 | (1ULL << INTERCEPT_SKINIT) | | 940 | set_intercept(svm, INTERCEPT_MONITOR); |
805 | (1ULL << INTERCEPT_WBINVD) | | 941 | set_intercept(svm, INTERCEPT_MWAIT); |
806 | (1ULL << INTERCEPT_MONITOR) | | 942 | set_intercept(svm, INTERCEPT_XSETBV); |
807 | (1ULL << INTERCEPT_MWAIT); | ||
808 | 943 | ||
809 | control->iopm_base_pa = iopm_base; | 944 | control->iopm_base_pa = iopm_base; |
810 | control->msrpm_base_pa = __pa(svm->msrpm); | 945 | control->msrpm_base_pa = __pa(svm->msrpm); |
@@ -855,25 +990,27 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
855 | if (npt_enabled) { | 990 | if (npt_enabled) { |
856 | /* Setup VMCB for Nested Paging */ | 991 | /* Setup VMCB for Nested Paging */ |
857 | control->nested_ctl = 1; | 992 | control->nested_ctl = 1; |
858 | control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) | | 993 | clr_intercept(svm, INTERCEPT_TASK_SWITCH); |
859 | (1ULL << INTERCEPT_INVLPG)); | 994 | clr_intercept(svm, INTERCEPT_INVLPG); |
860 | control->intercept_exceptions &= ~(1 << PF_VECTOR); | 995 | clr_exception_intercept(svm, PF_VECTOR); |
861 | control->intercept_cr_read &= ~INTERCEPT_CR3_MASK; | 996 | clr_cr_intercept(svm, INTERCEPT_CR3_READ); |
862 | control->intercept_cr_write &= ~INTERCEPT_CR3_MASK; | 997 | clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); |
863 | save->g_pat = 0x0007040600070406ULL; | 998 | save->g_pat = 0x0007040600070406ULL; |
864 | save->cr3 = 0; | 999 | save->cr3 = 0; |
865 | save->cr4 = 0; | 1000 | save->cr4 = 0; |
866 | } | 1001 | } |
867 | force_new_asid(&svm->vcpu); | 1002 | svm->asid_generation = 0; |
868 | 1003 | ||
869 | svm->nested.vmcb = 0; | 1004 | svm->nested.vmcb = 0; |
870 | svm->vcpu.arch.hflags = 0; | 1005 | svm->vcpu.arch.hflags = 0; |
871 | 1006 | ||
872 | if (svm_has(SVM_FEATURE_PAUSE_FILTER)) { | 1007 | if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { |
873 | control->pause_filter_count = 3000; | 1008 | control->pause_filter_count = 3000; |
874 | control->intercept |= (1ULL << INTERCEPT_PAUSE); | 1009 | set_intercept(svm, INTERCEPT_PAUSE); |
875 | } | 1010 | } |
876 | 1011 | ||
1012 | mark_all_dirty(svm->vmcb); | ||
1013 | |||
877 | enable_gif(svm); | 1014 | enable_gif(svm); |
878 | } | 1015 | } |
879 | 1016 | ||
@@ -990,8 +1127,16 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
990 | 1127 | ||
991 | if (unlikely(cpu != vcpu->cpu)) { | 1128 | if (unlikely(cpu != vcpu->cpu)) { |
992 | svm->asid_generation = 0; | 1129 | svm->asid_generation = 0; |
1130 | mark_all_dirty(svm->vmcb); | ||
993 | } | 1131 | } |
994 | 1132 | ||
1133 | #ifdef CONFIG_X86_64 | ||
1134 | rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base); | ||
1135 | #endif | ||
1136 | savesegment(fs, svm->host.fs); | ||
1137 | savesegment(gs, svm->host.gs); | ||
1138 | svm->host.ldt = kvm_read_ldt(); | ||
1139 | |||
995 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 1140 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
996 | rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 1141 | rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
997 | } | 1142 | } |
@@ -1002,6 +1147,14 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
1002 | int i; | 1147 | int i; |
1003 | 1148 | ||
1004 | ++vcpu->stat.host_state_reload; | 1149 | ++vcpu->stat.host_state_reload; |
1150 | kvm_load_ldt(svm->host.ldt); | ||
1151 | #ifdef CONFIG_X86_64 | ||
1152 | loadsegment(fs, svm->host.fs); | ||
1153 | load_gs_index(svm->host.gs); | ||
1154 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); | ||
1155 | #else | ||
1156 | loadsegment(gs, svm->host.gs); | ||
1157 | #endif | ||
1005 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 1158 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
1006 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 1159 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
1007 | } | 1160 | } |
@@ -1021,7 +1174,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | |||
1021 | switch (reg) { | 1174 | switch (reg) { |
1022 | case VCPU_EXREG_PDPTR: | 1175 | case VCPU_EXREG_PDPTR: |
1023 | BUG_ON(!npt_enabled); | 1176 | BUG_ON(!npt_enabled); |
1024 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); | 1177 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); |
1025 | break; | 1178 | break; |
1026 | default: | 1179 | default: |
1027 | BUG(); | 1180 | BUG(); |
@@ -1030,12 +1183,12 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | |||
1030 | 1183 | ||
1031 | static void svm_set_vintr(struct vcpu_svm *svm) | 1184 | static void svm_set_vintr(struct vcpu_svm *svm) |
1032 | { | 1185 | { |
1033 | svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR; | 1186 | set_intercept(svm, INTERCEPT_VINTR); |
1034 | } | 1187 | } |
1035 | 1188 | ||
1036 | static void svm_clear_vintr(struct vcpu_svm *svm) | 1189 | static void svm_clear_vintr(struct vcpu_svm *svm) |
1037 | { | 1190 | { |
1038 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); | 1191 | clr_intercept(svm, INTERCEPT_VINTR); |
1039 | } | 1192 | } |
1040 | 1193 | ||
1041 | static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) | 1194 | static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) |
@@ -1150,6 +1303,7 @@ static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) | |||
1150 | 1303 | ||
1151 | svm->vmcb->save.idtr.limit = dt->size; | 1304 | svm->vmcb->save.idtr.limit = dt->size; |
1152 | svm->vmcb->save.idtr.base = dt->address ; | 1305 | svm->vmcb->save.idtr.base = dt->address ; |
1306 | mark_dirty(svm->vmcb, VMCB_DT); | ||
1153 | } | 1307 | } |
1154 | 1308 | ||
1155 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) | 1309 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
@@ -1166,19 +1320,23 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) | |||
1166 | 1320 | ||
1167 | svm->vmcb->save.gdtr.limit = dt->size; | 1321 | svm->vmcb->save.gdtr.limit = dt->size; |
1168 | svm->vmcb->save.gdtr.base = dt->address ; | 1322 | svm->vmcb->save.gdtr.base = dt->address ; |
1323 | mark_dirty(svm->vmcb, VMCB_DT); | ||
1169 | } | 1324 | } |
1170 | 1325 | ||
1171 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | 1326 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) |
1172 | { | 1327 | { |
1173 | } | 1328 | } |
1174 | 1329 | ||
1330 | static void svm_decache_cr3(struct kvm_vcpu *vcpu) | ||
1331 | { | ||
1332 | } | ||
1333 | |||
1175 | static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 1334 | static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
1176 | { | 1335 | { |
1177 | } | 1336 | } |
1178 | 1337 | ||
1179 | static void update_cr0_intercept(struct vcpu_svm *svm) | 1338 | static void update_cr0_intercept(struct vcpu_svm *svm) |
1180 | { | 1339 | { |
1181 | struct vmcb *vmcb = svm->vmcb; | ||
1182 | ulong gcr0 = svm->vcpu.arch.cr0; | 1340 | ulong gcr0 = svm->vcpu.arch.cr0; |
1183 | u64 *hcr0 = &svm->vmcb->save.cr0; | 1341 | u64 *hcr0 = &svm->vmcb->save.cr0; |
1184 | 1342 | ||
@@ -1188,27 +1346,14 @@ static void update_cr0_intercept(struct vcpu_svm *svm) | |||
1188 | *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) | 1346 | *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) |
1189 | | (gcr0 & SVM_CR0_SELECTIVE_MASK); | 1347 | | (gcr0 & SVM_CR0_SELECTIVE_MASK); |
1190 | 1348 | ||
1349 | mark_dirty(svm->vmcb, VMCB_CR); | ||
1191 | 1350 | ||
1192 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { | 1351 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { |
1193 | vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | 1352 | clr_cr_intercept(svm, INTERCEPT_CR0_READ); |
1194 | vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | 1353 | clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); |
1195 | if (is_nested(svm)) { | ||
1196 | struct vmcb *hsave = svm->nested.hsave; | ||
1197 | |||
1198 | hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | ||
1199 | hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | ||
1200 | vmcb->control.intercept_cr_read |= svm->nested.intercept_cr_read; | ||
1201 | vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write; | ||
1202 | } | ||
1203 | } else { | 1354 | } else { |
1204 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | 1355 | set_cr_intercept(svm, INTERCEPT_CR0_READ); |
1205 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | 1356 | set_cr_intercept(svm, INTERCEPT_CR0_WRITE); |
1206 | if (is_nested(svm)) { | ||
1207 | struct vmcb *hsave = svm->nested.hsave; | ||
1208 | |||
1209 | hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | ||
1210 | hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | ||
1211 | } | ||
1212 | } | 1357 | } |
1213 | } | 1358 | } |
1214 | 1359 | ||
@@ -1216,7 +1361,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1216 | { | 1361 | { |
1217 | struct vcpu_svm *svm = to_svm(vcpu); | 1362 | struct vcpu_svm *svm = to_svm(vcpu); |
1218 | 1363 | ||
1219 | if (is_nested(svm)) { | 1364 | if (is_guest_mode(vcpu)) { |
1220 | /* | 1365 | /* |
1221 | * We are here because we run in nested mode, the host kvm | 1366 | * We are here because we run in nested mode, the host kvm |
1222 | * intercepts cr0 writes but the l1 hypervisor does not. | 1367 | * intercepts cr0 writes but the l1 hypervisor does not. |
@@ -1268,6 +1413,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1268 | */ | 1413 | */ |
1269 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); | 1414 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); |
1270 | svm->vmcb->save.cr0 = cr0; | 1415 | svm->vmcb->save.cr0 = cr0; |
1416 | mark_dirty(svm->vmcb, VMCB_CR); | ||
1271 | update_cr0_intercept(svm); | 1417 | update_cr0_intercept(svm); |
1272 | } | 1418 | } |
1273 | 1419 | ||
@@ -1277,13 +1423,14 @@ static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
1277 | unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; | 1423 | unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; |
1278 | 1424 | ||
1279 | if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) | 1425 | if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) |
1280 | force_new_asid(vcpu); | 1426 | svm_flush_tlb(vcpu); |
1281 | 1427 | ||
1282 | vcpu->arch.cr4 = cr4; | 1428 | vcpu->arch.cr4 = cr4; |
1283 | if (!npt_enabled) | 1429 | if (!npt_enabled) |
1284 | cr4 |= X86_CR4_PAE; | 1430 | cr4 |= X86_CR4_PAE; |
1285 | cr4 |= host_cr4_mce; | 1431 | cr4 |= host_cr4_mce; |
1286 | to_svm(vcpu)->vmcb->save.cr4 = cr4; | 1432 | to_svm(vcpu)->vmcb->save.cr4 = cr4; |
1433 | mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); | ||
1287 | } | 1434 | } |
1288 | 1435 | ||
1289 | static void svm_set_segment(struct kvm_vcpu *vcpu, | 1436 | static void svm_set_segment(struct kvm_vcpu *vcpu, |
@@ -1312,26 +1459,25 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, | |||
1312 | = (svm->vmcb->save.cs.attrib | 1459 | = (svm->vmcb->save.cs.attrib |
1313 | >> SVM_SELECTOR_DPL_SHIFT) & 3; | 1460 | >> SVM_SELECTOR_DPL_SHIFT) & 3; |
1314 | 1461 | ||
1462 | mark_dirty(svm->vmcb, VMCB_SEG); | ||
1315 | } | 1463 | } |
1316 | 1464 | ||
1317 | static void update_db_intercept(struct kvm_vcpu *vcpu) | 1465 | static void update_db_intercept(struct kvm_vcpu *vcpu) |
1318 | { | 1466 | { |
1319 | struct vcpu_svm *svm = to_svm(vcpu); | 1467 | struct vcpu_svm *svm = to_svm(vcpu); |
1320 | 1468 | ||
1321 | svm->vmcb->control.intercept_exceptions &= | 1469 | clr_exception_intercept(svm, DB_VECTOR); |
1322 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); | 1470 | clr_exception_intercept(svm, BP_VECTOR); |
1323 | 1471 | ||
1324 | if (svm->nmi_singlestep) | 1472 | if (svm->nmi_singlestep) |
1325 | svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); | 1473 | set_exception_intercept(svm, DB_VECTOR); |
1326 | 1474 | ||
1327 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | 1475 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { |
1328 | if (vcpu->guest_debug & | 1476 | if (vcpu->guest_debug & |
1329 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) | 1477 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) |
1330 | svm->vmcb->control.intercept_exceptions |= | 1478 | set_exception_intercept(svm, DB_VECTOR); |
1331 | 1 << DB_VECTOR; | ||
1332 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | 1479 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) |
1333 | svm->vmcb->control.intercept_exceptions |= | 1480 | set_exception_intercept(svm, BP_VECTOR); |
1334 | 1 << BP_VECTOR; | ||
1335 | } else | 1481 | } else |
1336 | vcpu->guest_debug = 0; | 1482 | vcpu->guest_debug = 0; |
1337 | } | 1483 | } |
@@ -1345,21 +1491,9 @@ static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | |||
1345 | else | 1491 | else |
1346 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | 1492 | svm->vmcb->save.dr7 = vcpu->arch.dr7; |
1347 | 1493 | ||
1348 | update_db_intercept(vcpu); | 1494 | mark_dirty(svm->vmcb, VMCB_DR); |
1349 | } | ||
1350 | |||
1351 | static void load_host_msrs(struct kvm_vcpu *vcpu) | ||
1352 | { | ||
1353 | #ifdef CONFIG_X86_64 | ||
1354 | wrmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base); | ||
1355 | #endif | ||
1356 | } | ||
1357 | 1495 | ||
1358 | static void save_host_msrs(struct kvm_vcpu *vcpu) | 1496 | update_db_intercept(vcpu); |
1359 | { | ||
1360 | #ifdef CONFIG_X86_64 | ||
1361 | rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base); | ||
1362 | #endif | ||
1363 | } | 1497 | } |
1364 | 1498 | ||
1365 | static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | 1499 | static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) |
@@ -1372,6 +1506,8 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | |||
1372 | 1506 | ||
1373 | svm->asid_generation = sd->asid_generation; | 1507 | svm->asid_generation = sd->asid_generation; |
1374 | svm->vmcb->control.asid = sd->next_asid++; | 1508 | svm->vmcb->control.asid = sd->next_asid++; |
1509 | |||
1510 | mark_dirty(svm->vmcb, VMCB_ASID); | ||
1375 | } | 1511 | } |
1376 | 1512 | ||
1377 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) | 1513 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) |
@@ -1379,20 +1515,40 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) | |||
1379 | struct vcpu_svm *svm = to_svm(vcpu); | 1515 | struct vcpu_svm *svm = to_svm(vcpu); |
1380 | 1516 | ||
1381 | svm->vmcb->save.dr7 = value; | 1517 | svm->vmcb->save.dr7 = value; |
1518 | mark_dirty(svm->vmcb, VMCB_DR); | ||
1382 | } | 1519 | } |
1383 | 1520 | ||
1384 | static int pf_interception(struct vcpu_svm *svm) | 1521 | static int pf_interception(struct vcpu_svm *svm) |
1385 | { | 1522 | { |
1386 | u64 fault_address; | 1523 | u64 fault_address = svm->vmcb->control.exit_info_2; |
1387 | u32 error_code; | 1524 | u32 error_code; |
1525 | int r = 1; | ||
1388 | 1526 | ||
1389 | fault_address = svm->vmcb->control.exit_info_2; | 1527 | switch (svm->apf_reason) { |
1390 | error_code = svm->vmcb->control.exit_info_1; | 1528 | default: |
1391 | 1529 | error_code = svm->vmcb->control.exit_info_1; | |
1392 | trace_kvm_page_fault(fault_address, error_code); | 1530 | |
1393 | if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu)) | 1531 | trace_kvm_page_fault(fault_address, error_code); |
1394 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); | 1532 | if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu)) |
1395 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); | 1533 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); |
1534 | r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code, | ||
1535 | svm->vmcb->control.insn_bytes, | ||
1536 | svm->vmcb->control.insn_len); | ||
1537 | break; | ||
1538 | case KVM_PV_REASON_PAGE_NOT_PRESENT: | ||
1539 | svm->apf_reason = 0; | ||
1540 | local_irq_disable(); | ||
1541 | kvm_async_pf_task_wait(fault_address); | ||
1542 | local_irq_enable(); | ||
1543 | break; | ||
1544 | case KVM_PV_REASON_PAGE_READY: | ||
1545 | svm->apf_reason = 0; | ||
1546 | local_irq_disable(); | ||
1547 | kvm_async_pf_task_wake(fault_address); | ||
1548 | local_irq_enable(); | ||
1549 | break; | ||
1550 | } | ||
1551 | return r; | ||
1396 | } | 1552 | } |
1397 | 1553 | ||
1398 | static int db_interception(struct vcpu_svm *svm) | 1554 | static int db_interception(struct vcpu_svm *svm) |
@@ -1440,7 +1596,7 @@ static int ud_interception(struct vcpu_svm *svm) | |||
1440 | { | 1596 | { |
1441 | int er; | 1597 | int er; |
1442 | 1598 | ||
1443 | er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD); | 1599 | er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); |
1444 | if (er != EMULATE_DONE) | 1600 | if (er != EMULATE_DONE) |
1445 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 1601 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
1446 | return 1; | 1602 | return 1; |
@@ -1449,21 +1605,8 @@ static int ud_interception(struct vcpu_svm *svm) | |||
1449 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) | 1605 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) |
1450 | { | 1606 | { |
1451 | struct vcpu_svm *svm = to_svm(vcpu); | 1607 | struct vcpu_svm *svm = to_svm(vcpu); |
1452 | u32 excp; | ||
1453 | |||
1454 | if (is_nested(svm)) { | ||
1455 | u32 h_excp, n_excp; | ||
1456 | |||
1457 | h_excp = svm->nested.hsave->control.intercept_exceptions; | ||
1458 | n_excp = svm->nested.intercept_exceptions; | ||
1459 | h_excp &= ~(1 << NM_VECTOR); | ||
1460 | excp = h_excp | n_excp; | ||
1461 | } else { | ||
1462 | excp = svm->vmcb->control.intercept_exceptions; | ||
1463 | excp &= ~(1 << NM_VECTOR); | ||
1464 | } | ||
1465 | 1608 | ||
1466 | svm->vmcb->control.intercept_exceptions = excp; | 1609 | clr_exception_intercept(svm, NM_VECTOR); |
1467 | 1610 | ||
1468 | svm->vcpu.fpu_active = 1; | 1611 | svm->vcpu.fpu_active = 1; |
1469 | update_cr0_intercept(svm); | 1612 | update_cr0_intercept(svm); |
@@ -1570,7 +1713,7 @@ static int io_interception(struct vcpu_svm *svm) | |||
1570 | string = (io_info & SVM_IOIO_STR_MASK) != 0; | 1713 | string = (io_info & SVM_IOIO_STR_MASK) != 0; |
1571 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; | 1714 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; |
1572 | if (string || in) | 1715 | if (string || in) |
1573 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; | 1716 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
1574 | 1717 | ||
1575 | port = io_info >> 16; | 1718 | port = io_info >> 16; |
1576 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1719 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
@@ -1624,17 +1767,19 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, | |||
1624 | struct vcpu_svm *svm = to_svm(vcpu); | 1767 | struct vcpu_svm *svm = to_svm(vcpu); |
1625 | 1768 | ||
1626 | svm->vmcb->control.nested_cr3 = root; | 1769 | svm->vmcb->control.nested_cr3 = root; |
1627 | force_new_asid(vcpu); | 1770 | mark_dirty(svm->vmcb, VMCB_NPT); |
1771 | svm_flush_tlb(vcpu); | ||
1628 | } | 1772 | } |
1629 | 1773 | ||
1630 | static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu) | 1774 | static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, |
1775 | struct x86_exception *fault) | ||
1631 | { | 1776 | { |
1632 | struct vcpu_svm *svm = to_svm(vcpu); | 1777 | struct vcpu_svm *svm = to_svm(vcpu); |
1633 | 1778 | ||
1634 | svm->vmcb->control.exit_code = SVM_EXIT_NPF; | 1779 | svm->vmcb->control.exit_code = SVM_EXIT_NPF; |
1635 | svm->vmcb->control.exit_code_hi = 0; | 1780 | svm->vmcb->control.exit_code_hi = 0; |
1636 | svm->vmcb->control.exit_info_1 = vcpu->arch.fault.error_code; | 1781 | svm->vmcb->control.exit_info_1 = fault->error_code; |
1637 | svm->vmcb->control.exit_info_2 = vcpu->arch.fault.address; | 1782 | svm->vmcb->control.exit_info_2 = fault->address; |
1638 | 1783 | ||
1639 | nested_svm_vmexit(svm); | 1784 | nested_svm_vmexit(svm); |
1640 | } | 1785 | } |
@@ -1680,7 +1825,7 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | |||
1680 | { | 1825 | { |
1681 | int vmexit; | 1826 | int vmexit; |
1682 | 1827 | ||
1683 | if (!is_nested(svm)) | 1828 | if (!is_guest_mode(&svm->vcpu)) |
1684 | return 0; | 1829 | return 0; |
1685 | 1830 | ||
1686 | svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; | 1831 | svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; |
@@ -1698,7 +1843,7 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | |||
1698 | /* This function returns true if it is save to enable the irq window */ | 1843 | /* This function returns true if it is save to enable the irq window */ |
1699 | static inline bool nested_svm_intr(struct vcpu_svm *svm) | 1844 | static inline bool nested_svm_intr(struct vcpu_svm *svm) |
1700 | { | 1845 | { |
1701 | if (!is_nested(svm)) | 1846 | if (!is_guest_mode(&svm->vcpu)) |
1702 | return true; | 1847 | return true; |
1703 | 1848 | ||
1704 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) | 1849 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) |
@@ -1737,7 +1882,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm) | |||
1737 | /* This function returns true if it is save to enable the nmi window */ | 1882 | /* This function returns true if it is save to enable the nmi window */ |
1738 | static inline bool nested_svm_nmi(struct vcpu_svm *svm) | 1883 | static inline bool nested_svm_nmi(struct vcpu_svm *svm) |
1739 | { | 1884 | { |
1740 | if (!is_nested(svm)) | 1885 | if (!is_guest_mode(&svm->vcpu)) |
1741 | return true; | 1886 | return true; |
1742 | 1887 | ||
1743 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) | 1888 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) |
@@ -1836,8 +1981,8 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
1836 | return NESTED_EXIT_HOST; | 1981 | return NESTED_EXIT_HOST; |
1837 | break; | 1982 | break; |
1838 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: | 1983 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: |
1839 | /* When we're shadowing, trap PFs */ | 1984 | /* When we're shadowing, trap PFs, but not async PF */ |
1840 | if (!npt_enabled) | 1985 | if (!npt_enabled && svm->apf_reason == 0) |
1841 | return NESTED_EXIT_HOST; | 1986 | return NESTED_EXIT_HOST; |
1842 | break; | 1987 | break; |
1843 | case SVM_EXIT_EXCP_BASE + NM_VECTOR: | 1988 | case SVM_EXIT_EXCP_BASE + NM_VECTOR: |
@@ -1865,27 +2010,15 @@ static int nested_svm_intercept(struct vcpu_svm *svm) | |||
1865 | case SVM_EXIT_IOIO: | 2010 | case SVM_EXIT_IOIO: |
1866 | vmexit = nested_svm_intercept_ioio(svm); | 2011 | vmexit = nested_svm_intercept_ioio(svm); |
1867 | break; | 2012 | break; |
1868 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { | 2013 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { |
1869 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); | 2014 | u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0); |
1870 | if (svm->nested.intercept_cr_read & cr_bits) | 2015 | if (svm->nested.intercept_cr & bit) |
1871 | vmexit = NESTED_EXIT_DONE; | 2016 | vmexit = NESTED_EXIT_DONE; |
1872 | break; | 2017 | break; |
1873 | } | 2018 | } |
1874 | case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: { | 2019 | case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { |
1875 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0); | 2020 | u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0); |
1876 | if (svm->nested.intercept_cr_write & cr_bits) | 2021 | if (svm->nested.intercept_dr & bit) |
1877 | vmexit = NESTED_EXIT_DONE; | ||
1878 | break; | ||
1879 | } | ||
1880 | case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: { | ||
1881 | u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0); | ||
1882 | if (svm->nested.intercept_dr_read & dr_bits) | ||
1883 | vmexit = NESTED_EXIT_DONE; | ||
1884 | break; | ||
1885 | } | ||
1886 | case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: { | ||
1887 | u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0); | ||
1888 | if (svm->nested.intercept_dr_write & dr_bits) | ||
1889 | vmexit = NESTED_EXIT_DONE; | 2022 | vmexit = NESTED_EXIT_DONE; |
1890 | break; | 2023 | break; |
1891 | } | 2024 | } |
@@ -1893,6 +2026,10 @@ static int nested_svm_intercept(struct vcpu_svm *svm) | |||
1893 | u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); | 2026 | u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); |
1894 | if (svm->nested.intercept_exceptions & excp_bits) | 2027 | if (svm->nested.intercept_exceptions & excp_bits) |
1895 | vmexit = NESTED_EXIT_DONE; | 2028 | vmexit = NESTED_EXIT_DONE; |
2029 | /* async page fault always cause vmexit */ | ||
2030 | else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && | ||
2031 | svm->apf_reason != 0) | ||
2032 | vmexit = NESTED_EXIT_DONE; | ||
1896 | break; | 2033 | break; |
1897 | } | 2034 | } |
1898 | case SVM_EXIT_ERR: { | 2035 | case SVM_EXIT_ERR: { |
@@ -1926,10 +2063,8 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr | |||
1926 | struct vmcb_control_area *dst = &dst_vmcb->control; | 2063 | struct vmcb_control_area *dst = &dst_vmcb->control; |
1927 | struct vmcb_control_area *from = &from_vmcb->control; | 2064 | struct vmcb_control_area *from = &from_vmcb->control; |
1928 | 2065 | ||
1929 | dst->intercept_cr_read = from->intercept_cr_read; | 2066 | dst->intercept_cr = from->intercept_cr; |
1930 | dst->intercept_cr_write = from->intercept_cr_write; | 2067 | dst->intercept_dr = from->intercept_dr; |
1931 | dst->intercept_dr_read = from->intercept_dr_read; | ||
1932 | dst->intercept_dr_write = from->intercept_dr_write; | ||
1933 | dst->intercept_exceptions = from->intercept_exceptions; | 2068 | dst->intercept_exceptions = from->intercept_exceptions; |
1934 | dst->intercept = from->intercept; | 2069 | dst->intercept = from->intercept; |
1935 | dst->iopm_base_pa = from->iopm_base_pa; | 2070 | dst->iopm_base_pa = from->iopm_base_pa; |
@@ -1970,7 +2105,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1970 | if (!nested_vmcb) | 2105 | if (!nested_vmcb) |
1971 | return 1; | 2106 | return 1; |
1972 | 2107 | ||
1973 | /* Exit nested SVM mode */ | 2108 | /* Exit Guest-Mode */ |
2109 | leave_guest_mode(&svm->vcpu); | ||
1974 | svm->nested.vmcb = 0; | 2110 | svm->nested.vmcb = 0; |
1975 | 2111 | ||
1976 | /* Give the current vmcb to the guest */ | 2112 | /* Give the current vmcb to the guest */ |
@@ -1984,7 +2120,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1984 | nested_vmcb->save.idtr = vmcb->save.idtr; | 2120 | nested_vmcb->save.idtr = vmcb->save.idtr; |
1985 | nested_vmcb->save.efer = svm->vcpu.arch.efer; | 2121 | nested_vmcb->save.efer = svm->vcpu.arch.efer; |
1986 | nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); | 2122 | nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); |
1987 | nested_vmcb->save.cr3 = svm->vcpu.arch.cr3; | 2123 | nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); |
1988 | nested_vmcb->save.cr2 = vmcb->save.cr2; | 2124 | nested_vmcb->save.cr2 = vmcb->save.cr2; |
1989 | nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; | 2125 | nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; |
1990 | nested_vmcb->save.rflags = vmcb->save.rflags; | 2126 | nested_vmcb->save.rflags = vmcb->save.rflags; |
@@ -2061,6 +2197,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
2061 | svm->vmcb->save.cpl = 0; | 2197 | svm->vmcb->save.cpl = 0; |
2062 | svm->vmcb->control.exit_int_info = 0; | 2198 | svm->vmcb->control.exit_int_info = 0; |
2063 | 2199 | ||
2200 | mark_all_dirty(svm->vmcb); | ||
2201 | |||
2064 | nested_svm_unmap(page); | 2202 | nested_svm_unmap(page); |
2065 | 2203 | ||
2066 | nested_svm_uninit_mmu_context(&svm->vcpu); | 2204 | nested_svm_uninit_mmu_context(&svm->vcpu); |
@@ -2148,8 +2286,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2148 | nested_vmcb->control.event_inj, | 2286 | nested_vmcb->control.event_inj, |
2149 | nested_vmcb->control.nested_ctl); | 2287 | nested_vmcb->control.nested_ctl); |
2150 | 2288 | ||
2151 | trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read, | 2289 | trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff, |
2152 | nested_vmcb->control.intercept_cr_write, | 2290 | nested_vmcb->control.intercept_cr >> 16, |
2153 | nested_vmcb->control.intercept_exceptions, | 2291 | nested_vmcb->control.intercept_exceptions, |
2154 | nested_vmcb->control.intercept); | 2292 | nested_vmcb->control.intercept); |
2155 | 2293 | ||
@@ -2177,7 +2315,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2177 | if (npt_enabled) | 2315 | if (npt_enabled) |
2178 | hsave->save.cr3 = vmcb->save.cr3; | 2316 | hsave->save.cr3 = vmcb->save.cr3; |
2179 | else | 2317 | else |
2180 | hsave->save.cr3 = svm->vcpu.arch.cr3; | 2318 | hsave->save.cr3 = kvm_read_cr3(&svm->vcpu); |
2181 | 2319 | ||
2182 | copy_vmcb_control_area(hsave, vmcb); | 2320 | copy_vmcb_control_area(hsave, vmcb); |
2183 | 2321 | ||
@@ -2229,14 +2367,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2229 | svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; | 2367 | svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; |
2230 | 2368 | ||
2231 | /* cache intercepts */ | 2369 | /* cache intercepts */ |
2232 | svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; | 2370 | svm->nested.intercept_cr = nested_vmcb->control.intercept_cr; |
2233 | svm->nested.intercept_cr_write = nested_vmcb->control.intercept_cr_write; | 2371 | svm->nested.intercept_dr = nested_vmcb->control.intercept_dr; |
2234 | svm->nested.intercept_dr_read = nested_vmcb->control.intercept_dr_read; | ||
2235 | svm->nested.intercept_dr_write = nested_vmcb->control.intercept_dr_write; | ||
2236 | svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; | 2372 | svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; |
2237 | svm->nested.intercept = nested_vmcb->control.intercept; | 2373 | svm->nested.intercept = nested_vmcb->control.intercept; |
2238 | 2374 | ||
2239 | force_new_asid(&svm->vcpu); | 2375 | svm_flush_tlb(&svm->vcpu); |
2240 | svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; | 2376 | svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; |
2241 | if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) | 2377 | if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) |
2242 | svm->vcpu.arch.hflags |= HF_VINTR_MASK; | 2378 | svm->vcpu.arch.hflags |= HF_VINTR_MASK; |
@@ -2245,29 +2381,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2245 | 2381 | ||
2246 | if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { | 2382 | if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { |
2247 | /* We only want the cr8 intercept bits of the guest */ | 2383 | /* We only want the cr8 intercept bits of the guest */ |
2248 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK; | 2384 | clr_cr_intercept(svm, INTERCEPT_CR8_READ); |
2249 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | 2385 | clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); |
2250 | } | 2386 | } |
2251 | 2387 | ||
2252 | /* We don't want to see VMMCALLs from a nested guest */ | 2388 | /* We don't want to see VMMCALLs from a nested guest */ |
2253 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMMCALL); | 2389 | clr_intercept(svm, INTERCEPT_VMMCALL); |
2254 | |||
2255 | /* | ||
2256 | * We don't want a nested guest to be more powerful than the guest, so | ||
2257 | * all intercepts are ORed | ||
2258 | */ | ||
2259 | svm->vmcb->control.intercept_cr_read |= | ||
2260 | nested_vmcb->control.intercept_cr_read; | ||
2261 | svm->vmcb->control.intercept_cr_write |= | ||
2262 | nested_vmcb->control.intercept_cr_write; | ||
2263 | svm->vmcb->control.intercept_dr_read |= | ||
2264 | nested_vmcb->control.intercept_dr_read; | ||
2265 | svm->vmcb->control.intercept_dr_write |= | ||
2266 | nested_vmcb->control.intercept_dr_write; | ||
2267 | svm->vmcb->control.intercept_exceptions |= | ||
2268 | nested_vmcb->control.intercept_exceptions; | ||
2269 | |||
2270 | svm->vmcb->control.intercept |= nested_vmcb->control.intercept; | ||
2271 | 2390 | ||
2272 | svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; | 2391 | svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; |
2273 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; | 2392 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; |
@@ -2278,11 +2397,21 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2278 | 2397 | ||
2279 | nested_svm_unmap(page); | 2398 | nested_svm_unmap(page); |
2280 | 2399 | ||
2281 | /* nested_vmcb is our indicator if nested SVM is activated */ | 2400 | /* Enter Guest-Mode */ |
2401 | enter_guest_mode(&svm->vcpu); | ||
2402 | |||
2403 | /* | ||
2404 | * Merge guest and host intercepts - must be called with vcpu in | ||
2405 | * guest-mode to take affect here | ||
2406 | */ | ||
2407 | recalc_intercepts(svm); | ||
2408 | |||
2282 | svm->nested.vmcb = vmcb_gpa; | 2409 | svm->nested.vmcb = vmcb_gpa; |
2283 | 2410 | ||
2284 | enable_gif(svm); | 2411 | enable_gif(svm); |
2285 | 2412 | ||
2413 | mark_all_dirty(svm->vmcb); | ||
2414 | |||
2286 | return true; | 2415 | return true; |
2287 | } | 2416 | } |
2288 | 2417 | ||
@@ -2400,6 +2529,8 @@ static int clgi_interception(struct vcpu_svm *svm) | |||
2400 | svm_clear_vintr(svm); | 2529 | svm_clear_vintr(svm); |
2401 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | 2530 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; |
2402 | 2531 | ||
2532 | mark_dirty(svm->vmcb, VMCB_INTR); | ||
2533 | |||
2403 | return 1; | 2534 | return 1; |
2404 | } | 2535 | } |
2405 | 2536 | ||
@@ -2426,6 +2557,19 @@ static int skinit_interception(struct vcpu_svm *svm) | |||
2426 | return 1; | 2557 | return 1; |
2427 | } | 2558 | } |
2428 | 2559 | ||
2560 | static int xsetbv_interception(struct vcpu_svm *svm) | ||
2561 | { | ||
2562 | u64 new_bv = kvm_read_edx_eax(&svm->vcpu); | ||
2563 | u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); | ||
2564 | |||
2565 | if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) { | ||
2566 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | ||
2567 | skip_emulated_instruction(&svm->vcpu); | ||
2568 | } | ||
2569 | |||
2570 | return 1; | ||
2571 | } | ||
2572 | |||
2429 | static int invalid_op_interception(struct vcpu_svm *svm) | 2573 | static int invalid_op_interception(struct vcpu_svm *svm) |
2430 | { | 2574 | { |
2431 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 2575 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
@@ -2507,19 +2651,92 @@ static int cpuid_interception(struct vcpu_svm *svm) | |||
2507 | static int iret_interception(struct vcpu_svm *svm) | 2651 | static int iret_interception(struct vcpu_svm *svm) |
2508 | { | 2652 | { |
2509 | ++svm->vcpu.stat.nmi_window_exits; | 2653 | ++svm->vcpu.stat.nmi_window_exits; |
2510 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); | 2654 | clr_intercept(svm, INTERCEPT_IRET); |
2511 | svm->vcpu.arch.hflags |= HF_IRET_MASK; | 2655 | svm->vcpu.arch.hflags |= HF_IRET_MASK; |
2512 | return 1; | 2656 | return 1; |
2513 | } | 2657 | } |
2514 | 2658 | ||
2515 | static int invlpg_interception(struct vcpu_svm *svm) | 2659 | static int invlpg_interception(struct vcpu_svm *svm) |
2516 | { | 2660 | { |
2517 | return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; | 2661 | if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) |
2662 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; | ||
2663 | |||
2664 | kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); | ||
2665 | skip_emulated_instruction(&svm->vcpu); | ||
2666 | return 1; | ||
2518 | } | 2667 | } |
2519 | 2668 | ||
2520 | static int emulate_on_interception(struct vcpu_svm *svm) | 2669 | static int emulate_on_interception(struct vcpu_svm *svm) |
2521 | { | 2670 | { |
2522 | return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; | 2671 | return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; |
2672 | } | ||
2673 | |||
2674 | #define CR_VALID (1ULL << 63) | ||
2675 | |||
2676 | static int cr_interception(struct vcpu_svm *svm) | ||
2677 | { | ||
2678 | int reg, cr; | ||
2679 | unsigned long val; | ||
2680 | int err; | ||
2681 | |||
2682 | if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) | ||
2683 | return emulate_on_interception(svm); | ||
2684 | |||
2685 | if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0)) | ||
2686 | return emulate_on_interception(svm); | ||
2687 | |||
2688 | reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; | ||
2689 | cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0; | ||
2690 | |||
2691 | err = 0; | ||
2692 | if (cr >= 16) { /* mov to cr */ | ||
2693 | cr -= 16; | ||
2694 | val = kvm_register_read(&svm->vcpu, reg); | ||
2695 | switch (cr) { | ||
2696 | case 0: | ||
2697 | err = kvm_set_cr0(&svm->vcpu, val); | ||
2698 | break; | ||
2699 | case 3: | ||
2700 | err = kvm_set_cr3(&svm->vcpu, val); | ||
2701 | break; | ||
2702 | case 4: | ||
2703 | err = kvm_set_cr4(&svm->vcpu, val); | ||
2704 | break; | ||
2705 | case 8: | ||
2706 | err = kvm_set_cr8(&svm->vcpu, val); | ||
2707 | break; | ||
2708 | default: | ||
2709 | WARN(1, "unhandled write to CR%d", cr); | ||
2710 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | ||
2711 | return 1; | ||
2712 | } | ||
2713 | } else { /* mov from cr */ | ||
2714 | switch (cr) { | ||
2715 | case 0: | ||
2716 | val = kvm_read_cr0(&svm->vcpu); | ||
2717 | break; | ||
2718 | case 2: | ||
2719 | val = svm->vcpu.arch.cr2; | ||
2720 | break; | ||
2721 | case 3: | ||
2722 | val = kvm_read_cr3(&svm->vcpu); | ||
2723 | break; | ||
2724 | case 4: | ||
2725 | val = kvm_read_cr4(&svm->vcpu); | ||
2726 | break; | ||
2727 | case 8: | ||
2728 | val = kvm_get_cr8(&svm->vcpu); | ||
2729 | break; | ||
2730 | default: | ||
2731 | WARN(1, "unhandled read from CR%d", cr); | ||
2732 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | ||
2733 | return 1; | ||
2734 | } | ||
2735 | kvm_register_write(&svm->vcpu, reg, val); | ||
2736 | } | ||
2737 | kvm_complete_insn_gp(&svm->vcpu, err); | ||
2738 | |||
2739 | return 1; | ||
2523 | } | 2740 | } |
2524 | 2741 | ||
2525 | static int cr0_write_interception(struct vcpu_svm *svm) | 2742 | static int cr0_write_interception(struct vcpu_svm *svm) |
@@ -2527,7 +2744,7 @@ static int cr0_write_interception(struct vcpu_svm *svm) | |||
2527 | struct kvm_vcpu *vcpu = &svm->vcpu; | 2744 | struct kvm_vcpu *vcpu = &svm->vcpu; |
2528 | int r; | 2745 | int r; |
2529 | 2746 | ||
2530 | r = emulate_instruction(&svm->vcpu, 0, 0, 0); | 2747 | r = cr_interception(svm); |
2531 | 2748 | ||
2532 | if (svm->nested.vmexit_rip) { | 2749 | if (svm->nested.vmexit_rip) { |
2533 | kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); | 2750 | kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); |
@@ -2536,22 +2753,47 @@ static int cr0_write_interception(struct vcpu_svm *svm) | |||
2536 | svm->nested.vmexit_rip = 0; | 2753 | svm->nested.vmexit_rip = 0; |
2537 | } | 2754 | } |
2538 | 2755 | ||
2539 | return r == EMULATE_DONE; | 2756 | return r; |
2757 | } | ||
2758 | |||
2759 | static int dr_interception(struct vcpu_svm *svm) | ||
2760 | { | ||
2761 | int reg, dr; | ||
2762 | unsigned long val; | ||
2763 | int err; | ||
2764 | |||
2765 | if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) | ||
2766 | return emulate_on_interception(svm); | ||
2767 | |||
2768 | reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; | ||
2769 | dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0; | ||
2770 | |||
2771 | if (dr >= 16) { /* mov to DRn */ | ||
2772 | val = kvm_register_read(&svm->vcpu, reg); | ||
2773 | kvm_set_dr(&svm->vcpu, dr - 16, val); | ||
2774 | } else { | ||
2775 | err = kvm_get_dr(&svm->vcpu, dr, &val); | ||
2776 | if (!err) | ||
2777 | kvm_register_write(&svm->vcpu, reg, val); | ||
2778 | } | ||
2779 | |||
2780 | return 1; | ||
2540 | } | 2781 | } |
2541 | 2782 | ||
2542 | static int cr8_write_interception(struct vcpu_svm *svm) | 2783 | static int cr8_write_interception(struct vcpu_svm *svm) |
2543 | { | 2784 | { |
2544 | struct kvm_run *kvm_run = svm->vcpu.run; | 2785 | struct kvm_run *kvm_run = svm->vcpu.run; |
2786 | int r; | ||
2545 | 2787 | ||
2546 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); | 2788 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); |
2547 | /* instruction emulation calls kvm_set_cr8() */ | 2789 | /* instruction emulation calls kvm_set_cr8() */ |
2548 | emulate_instruction(&svm->vcpu, 0, 0, 0); | 2790 | r = cr_interception(svm); |
2549 | if (irqchip_in_kernel(svm->vcpu.kvm)) { | 2791 | if (irqchip_in_kernel(svm->vcpu.kvm)) { |
2550 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | 2792 | clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); |
2551 | return 1; | 2793 | return r; |
2552 | } | 2794 | } |
2553 | if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) | 2795 | if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) |
2554 | return 1; | 2796 | return r; |
2555 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; | 2797 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; |
2556 | return 0; | 2798 | return 0; |
2557 | } | 2799 | } |
@@ -2562,14 +2804,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2562 | 2804 | ||
2563 | switch (ecx) { | 2805 | switch (ecx) { |
2564 | case MSR_IA32_TSC: { | 2806 | case MSR_IA32_TSC: { |
2565 | u64 tsc_offset; | 2807 | struct vmcb *vmcb = get_host_vmcb(svm); |
2566 | 2808 | ||
2567 | if (is_nested(svm)) | 2809 | *data = vmcb->control.tsc_offset + native_read_tsc(); |
2568 | tsc_offset = svm->nested.hsave->control.tsc_offset; | ||
2569 | else | ||
2570 | tsc_offset = svm->vmcb->control.tsc_offset; | ||
2571 | |||
2572 | *data = tsc_offset + native_read_tsc(); | ||
2573 | break; | 2810 | break; |
2574 | } | 2811 | } |
2575 | case MSR_STAR: | 2812 | case MSR_STAR: |
@@ -2714,7 +2951,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2714 | svm->vmcb->save.sysenter_esp = data; | 2951 | svm->vmcb->save.sysenter_esp = data; |
2715 | break; | 2952 | break; |
2716 | case MSR_IA32_DEBUGCTLMSR: | 2953 | case MSR_IA32_DEBUGCTLMSR: |
2717 | if (!svm_has(SVM_FEATURE_LBRV)) { | 2954 | if (!boot_cpu_has(X86_FEATURE_LBRV)) { |
2718 | pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", | 2955 | pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", |
2719 | __func__, data); | 2956 | __func__, data); |
2720 | break; | 2957 | break; |
@@ -2723,6 +2960,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2723 | return 1; | 2960 | return 1; |
2724 | 2961 | ||
2725 | svm->vmcb->save.dbgctl = data; | 2962 | svm->vmcb->save.dbgctl = data; |
2963 | mark_dirty(svm->vmcb, VMCB_LBR); | ||
2726 | if (data & (1ULL<<0)) | 2964 | if (data & (1ULL<<0)) |
2727 | svm_enable_lbrv(svm); | 2965 | svm_enable_lbrv(svm); |
2728 | else | 2966 | else |
@@ -2775,6 +3013,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm) | |||
2775 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); | 3013 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); |
2776 | svm_clear_vintr(svm); | 3014 | svm_clear_vintr(svm); |
2777 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | 3015 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; |
3016 | mark_dirty(svm->vmcb, VMCB_INTR); | ||
2778 | /* | 3017 | /* |
2779 | * If the user space waits to inject interrupts, exit as soon as | 3018 | * If the user space waits to inject interrupts, exit as soon as |
2780 | * possible | 3019 | * possible |
@@ -2797,31 +3036,31 @@ static int pause_interception(struct vcpu_svm *svm) | |||
2797 | } | 3036 | } |
2798 | 3037 | ||
2799 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | 3038 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { |
2800 | [SVM_EXIT_READ_CR0] = emulate_on_interception, | 3039 | [SVM_EXIT_READ_CR0] = cr_interception, |
2801 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 3040 | [SVM_EXIT_READ_CR3] = cr_interception, |
2802 | [SVM_EXIT_READ_CR4] = emulate_on_interception, | 3041 | [SVM_EXIT_READ_CR4] = cr_interception, |
2803 | [SVM_EXIT_READ_CR8] = emulate_on_interception, | 3042 | [SVM_EXIT_READ_CR8] = cr_interception, |
2804 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, | 3043 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, |
2805 | [SVM_EXIT_WRITE_CR0] = cr0_write_interception, | 3044 | [SVM_EXIT_WRITE_CR0] = cr0_write_interception, |
2806 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, | 3045 | [SVM_EXIT_WRITE_CR3] = cr_interception, |
2807 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, | 3046 | [SVM_EXIT_WRITE_CR4] = cr_interception, |
2808 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, | 3047 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, |
2809 | [SVM_EXIT_READ_DR0] = emulate_on_interception, | 3048 | [SVM_EXIT_READ_DR0] = dr_interception, |
2810 | [SVM_EXIT_READ_DR1] = emulate_on_interception, | 3049 | [SVM_EXIT_READ_DR1] = dr_interception, |
2811 | [SVM_EXIT_READ_DR2] = emulate_on_interception, | 3050 | [SVM_EXIT_READ_DR2] = dr_interception, |
2812 | [SVM_EXIT_READ_DR3] = emulate_on_interception, | 3051 | [SVM_EXIT_READ_DR3] = dr_interception, |
2813 | [SVM_EXIT_READ_DR4] = emulate_on_interception, | 3052 | [SVM_EXIT_READ_DR4] = dr_interception, |
2814 | [SVM_EXIT_READ_DR5] = emulate_on_interception, | 3053 | [SVM_EXIT_READ_DR5] = dr_interception, |
2815 | [SVM_EXIT_READ_DR6] = emulate_on_interception, | 3054 | [SVM_EXIT_READ_DR6] = dr_interception, |
2816 | [SVM_EXIT_READ_DR7] = emulate_on_interception, | 3055 | [SVM_EXIT_READ_DR7] = dr_interception, |
2817 | [SVM_EXIT_WRITE_DR0] = emulate_on_interception, | 3056 | [SVM_EXIT_WRITE_DR0] = dr_interception, |
2818 | [SVM_EXIT_WRITE_DR1] = emulate_on_interception, | 3057 | [SVM_EXIT_WRITE_DR1] = dr_interception, |
2819 | [SVM_EXIT_WRITE_DR2] = emulate_on_interception, | 3058 | [SVM_EXIT_WRITE_DR2] = dr_interception, |
2820 | [SVM_EXIT_WRITE_DR3] = emulate_on_interception, | 3059 | [SVM_EXIT_WRITE_DR3] = dr_interception, |
2821 | [SVM_EXIT_WRITE_DR4] = emulate_on_interception, | 3060 | [SVM_EXIT_WRITE_DR4] = dr_interception, |
2822 | [SVM_EXIT_WRITE_DR5] = emulate_on_interception, | 3061 | [SVM_EXIT_WRITE_DR5] = dr_interception, |
2823 | [SVM_EXIT_WRITE_DR6] = emulate_on_interception, | 3062 | [SVM_EXIT_WRITE_DR6] = dr_interception, |
2824 | [SVM_EXIT_WRITE_DR7] = emulate_on_interception, | 3063 | [SVM_EXIT_WRITE_DR7] = dr_interception, |
2825 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, | 3064 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, |
2826 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, | 3065 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, |
2827 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, | 3066 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, |
@@ -2854,6 +3093,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2854 | [SVM_EXIT_WBINVD] = emulate_on_interception, | 3093 | [SVM_EXIT_WBINVD] = emulate_on_interception, |
2855 | [SVM_EXIT_MONITOR] = invalid_op_interception, | 3094 | [SVM_EXIT_MONITOR] = invalid_op_interception, |
2856 | [SVM_EXIT_MWAIT] = invalid_op_interception, | 3095 | [SVM_EXIT_MWAIT] = invalid_op_interception, |
3096 | [SVM_EXIT_XSETBV] = xsetbv_interception, | ||
2857 | [SVM_EXIT_NPF] = pf_interception, | 3097 | [SVM_EXIT_NPF] = pf_interception, |
2858 | }; | 3098 | }; |
2859 | 3099 | ||
@@ -2864,10 +3104,10 @@ void dump_vmcb(struct kvm_vcpu *vcpu) | |||
2864 | struct vmcb_save_area *save = &svm->vmcb->save; | 3104 | struct vmcb_save_area *save = &svm->vmcb->save; |
2865 | 3105 | ||
2866 | pr_err("VMCB Control Area:\n"); | 3106 | pr_err("VMCB Control Area:\n"); |
2867 | pr_err("cr_read: %04x\n", control->intercept_cr_read); | 3107 | pr_err("cr_read: %04x\n", control->intercept_cr & 0xffff); |
2868 | pr_err("cr_write: %04x\n", control->intercept_cr_write); | 3108 | pr_err("cr_write: %04x\n", control->intercept_cr >> 16); |
2869 | pr_err("dr_read: %04x\n", control->intercept_dr_read); | 3109 | pr_err("dr_read: %04x\n", control->intercept_dr & 0xffff); |
2870 | pr_err("dr_write: %04x\n", control->intercept_dr_write); | 3110 | pr_err("dr_write: %04x\n", control->intercept_dr >> 16); |
2871 | pr_err("exceptions: %08x\n", control->intercept_exceptions); | 3111 | pr_err("exceptions: %08x\n", control->intercept_exceptions); |
2872 | pr_err("intercepts: %016llx\n", control->intercept); | 3112 | pr_err("intercepts: %016llx\n", control->intercept); |
2873 | pr_err("pause filter count: %d\n", control->pause_filter_count); | 3113 | pr_err("pause filter count: %d\n", control->pause_filter_count); |
@@ -2950,15 +3190,23 @@ void dump_vmcb(struct kvm_vcpu *vcpu) | |||
2950 | 3190 | ||
2951 | } | 3191 | } |
2952 | 3192 | ||
3193 | static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) | ||
3194 | { | ||
3195 | struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; | ||
3196 | |||
3197 | *info1 = control->exit_info_1; | ||
3198 | *info2 = control->exit_info_2; | ||
3199 | } | ||
3200 | |||
2953 | static int handle_exit(struct kvm_vcpu *vcpu) | 3201 | static int handle_exit(struct kvm_vcpu *vcpu) |
2954 | { | 3202 | { |
2955 | struct vcpu_svm *svm = to_svm(vcpu); | 3203 | struct vcpu_svm *svm = to_svm(vcpu); |
2956 | struct kvm_run *kvm_run = vcpu->run; | 3204 | struct kvm_run *kvm_run = vcpu->run; |
2957 | u32 exit_code = svm->vmcb->control.exit_code; | 3205 | u32 exit_code = svm->vmcb->control.exit_code; |
2958 | 3206 | ||
2959 | trace_kvm_exit(exit_code, vcpu); | 3207 | trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); |
2960 | 3208 | ||
2961 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) | 3209 | if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) |
2962 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | 3210 | vcpu->arch.cr0 = svm->vmcb->save.cr0; |
2963 | if (npt_enabled) | 3211 | if (npt_enabled) |
2964 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | 3212 | vcpu->arch.cr3 = svm->vmcb->save.cr3; |
@@ -2970,7 +3218,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
2970 | return 1; | 3218 | return 1; |
2971 | } | 3219 | } |
2972 | 3220 | ||
2973 | if (is_nested(svm)) { | 3221 | if (is_guest_mode(vcpu)) { |
2974 | int vmexit; | 3222 | int vmexit; |
2975 | 3223 | ||
2976 | trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, | 3224 | trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, |
@@ -3033,7 +3281,6 @@ static void pre_svm_run(struct vcpu_svm *svm) | |||
3033 | 3281 | ||
3034 | struct svm_cpu_data *sd = per_cpu(svm_data, cpu); | 3282 | struct svm_cpu_data *sd = per_cpu(svm_data, cpu); |
3035 | 3283 | ||
3036 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; | ||
3037 | /* FIXME: handle wraparound of asid_generation */ | 3284 | /* FIXME: handle wraparound of asid_generation */ |
3038 | if (svm->asid_generation != sd->asid_generation) | 3285 | if (svm->asid_generation != sd->asid_generation) |
3039 | new_asid(svm, sd); | 3286 | new_asid(svm, sd); |
@@ -3045,7 +3292,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) | |||
3045 | 3292 | ||
3046 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; | 3293 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; |
3047 | vcpu->arch.hflags |= HF_NMI_MASK; | 3294 | vcpu->arch.hflags |= HF_NMI_MASK; |
3048 | svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); | 3295 | set_intercept(svm, INTERCEPT_IRET); |
3049 | ++vcpu->stat.nmi_injections; | 3296 | ++vcpu->stat.nmi_injections; |
3050 | } | 3297 | } |
3051 | 3298 | ||
@@ -3058,6 +3305,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) | |||
3058 | control->int_ctl &= ~V_INTR_PRIO_MASK; | 3305 | control->int_ctl &= ~V_INTR_PRIO_MASK; |
3059 | control->int_ctl |= V_IRQ_MASK | | 3306 | control->int_ctl |= V_IRQ_MASK | |
3060 | ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); | 3307 | ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); |
3308 | mark_dirty(svm->vmcb, VMCB_INTR); | ||
3061 | } | 3309 | } |
3062 | 3310 | ||
3063 | static void svm_set_irq(struct kvm_vcpu *vcpu) | 3311 | static void svm_set_irq(struct kvm_vcpu *vcpu) |
@@ -3077,14 +3325,14 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
3077 | { | 3325 | { |
3078 | struct vcpu_svm *svm = to_svm(vcpu); | 3326 | struct vcpu_svm *svm = to_svm(vcpu); |
3079 | 3327 | ||
3080 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | 3328 | if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) |
3081 | return; | 3329 | return; |
3082 | 3330 | ||
3083 | if (irr == -1) | 3331 | if (irr == -1) |
3084 | return; | 3332 | return; |
3085 | 3333 | ||
3086 | if (tpr >= irr) | 3334 | if (tpr >= irr) |
3087 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK; | 3335 | set_cr_intercept(svm, INTERCEPT_CR8_WRITE); |
3088 | } | 3336 | } |
3089 | 3337 | ||
3090 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | 3338 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) |
@@ -3112,10 +3360,10 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
3112 | 3360 | ||
3113 | if (masked) { | 3361 | if (masked) { |
3114 | svm->vcpu.arch.hflags |= HF_NMI_MASK; | 3362 | svm->vcpu.arch.hflags |= HF_NMI_MASK; |
3115 | svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); | 3363 | set_intercept(svm, INTERCEPT_IRET); |
3116 | } else { | 3364 | } else { |
3117 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; | 3365 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; |
3118 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); | 3366 | clr_intercept(svm, INTERCEPT_IRET); |
3119 | } | 3367 | } |
3120 | } | 3368 | } |
3121 | 3369 | ||
@@ -3131,7 +3379,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
3131 | 3379 | ||
3132 | ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); | 3380 | ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); |
3133 | 3381 | ||
3134 | if (is_nested(svm)) | 3382 | if (is_guest_mode(vcpu)) |
3135 | return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); | 3383 | return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); |
3136 | 3384 | ||
3137 | return ret; | 3385 | return ret; |
@@ -3177,7 +3425,12 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
3177 | 3425 | ||
3178 | static void svm_flush_tlb(struct kvm_vcpu *vcpu) | 3426 | static void svm_flush_tlb(struct kvm_vcpu *vcpu) |
3179 | { | 3427 | { |
3180 | force_new_asid(vcpu); | 3428 | struct vcpu_svm *svm = to_svm(vcpu); |
3429 | |||
3430 | if (static_cpu_has(X86_FEATURE_FLUSHBYASID)) | ||
3431 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; | ||
3432 | else | ||
3433 | svm->asid_generation--; | ||
3181 | } | 3434 | } |
3182 | 3435 | ||
3183 | static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) | 3436 | static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) |
@@ -3188,10 +3441,10 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) | |||
3188 | { | 3441 | { |
3189 | struct vcpu_svm *svm = to_svm(vcpu); | 3442 | struct vcpu_svm *svm = to_svm(vcpu); |
3190 | 3443 | ||
3191 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | 3444 | if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) |
3192 | return; | 3445 | return; |
3193 | 3446 | ||
3194 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { | 3447 | if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) { |
3195 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; | 3448 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; |
3196 | kvm_set_cr8(vcpu, cr8); | 3449 | kvm_set_cr8(vcpu, cr8); |
3197 | } | 3450 | } |
@@ -3202,7 +3455,7 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) | |||
3202 | struct vcpu_svm *svm = to_svm(vcpu); | 3455 | struct vcpu_svm *svm = to_svm(vcpu); |
3203 | u64 cr8; | 3456 | u64 cr8; |
3204 | 3457 | ||
3205 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | 3458 | if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) |
3206 | return; | 3459 | return; |
3207 | 3460 | ||
3208 | cr8 = kvm_get_cr8(vcpu); | 3461 | cr8 = kvm_get_cr8(vcpu); |
@@ -3289,9 +3542,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) | |||
3289 | static void svm_vcpu_run(struct kvm_vcpu *vcpu) | 3542 | static void svm_vcpu_run(struct kvm_vcpu *vcpu) |
3290 | { | 3543 | { |
3291 | struct vcpu_svm *svm = to_svm(vcpu); | 3544 | struct vcpu_svm *svm = to_svm(vcpu); |
3292 | u16 fs_selector; | ||
3293 | u16 gs_selector; | ||
3294 | u16 ldt_selector; | ||
3295 | 3545 | ||
3296 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | 3546 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; |
3297 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | 3547 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; |
@@ -3308,10 +3558,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3308 | 3558 | ||
3309 | sync_lapic_to_cr8(vcpu); | 3559 | sync_lapic_to_cr8(vcpu); |
3310 | 3560 | ||
3311 | save_host_msrs(vcpu); | ||
3312 | savesegment(fs, fs_selector); | ||
3313 | savesegment(gs, gs_selector); | ||
3314 | ldt_selector = kvm_read_ldt(); | ||
3315 | svm->vmcb->save.cr2 = vcpu->arch.cr2; | 3561 | svm->vmcb->save.cr2 = vcpu->arch.cr2; |
3316 | 3562 | ||
3317 | clgi(); | 3563 | clgi(); |
@@ -3389,19 +3635,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3389 | #endif | 3635 | #endif |
3390 | ); | 3636 | ); |
3391 | 3637 | ||
3392 | vcpu->arch.cr2 = svm->vmcb->save.cr2; | ||
3393 | vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; | ||
3394 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; | ||
3395 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; | ||
3396 | |||
3397 | load_host_msrs(vcpu); | ||
3398 | kvm_load_ldt(ldt_selector); | ||
3399 | loadsegment(fs, fs_selector); | ||
3400 | #ifdef CONFIG_X86_64 | 3638 | #ifdef CONFIG_X86_64 |
3401 | load_gs_index(gs_selector); | 3639 | wrmsrl(MSR_GS_BASE, svm->host.gs_base); |
3402 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); | ||
3403 | #else | 3640 | #else |
3404 | loadsegment(gs, gs_selector); | 3641 | loadsegment(fs, svm->host.fs); |
3405 | #endif | 3642 | #endif |
3406 | 3643 | ||
3407 | reload_tss(vcpu); | 3644 | reload_tss(vcpu); |
@@ -3410,10 +3647,21 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3410 | 3647 | ||
3411 | stgi(); | 3648 | stgi(); |
3412 | 3649 | ||
3650 | vcpu->arch.cr2 = svm->vmcb->save.cr2; | ||
3651 | vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; | ||
3652 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; | ||
3653 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; | ||
3654 | |||
3413 | sync_cr8_to_lapic(vcpu); | 3655 | sync_cr8_to_lapic(vcpu); |
3414 | 3656 | ||
3415 | svm->next_rip = 0; | 3657 | svm->next_rip = 0; |
3416 | 3658 | ||
3659 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; | ||
3660 | |||
3661 | /* if exit due to PF check for async PF */ | ||
3662 | if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) | ||
3663 | svm->apf_reason = kvm_read_and_reset_pf_reason(); | ||
3664 | |||
3417 | if (npt_enabled) { | 3665 | if (npt_enabled) { |
3418 | vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); | 3666 | vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); |
3419 | vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR); | 3667 | vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR); |
@@ -3426,6 +3674,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3426 | if (unlikely(svm->vmcb->control.exit_code == | 3674 | if (unlikely(svm->vmcb->control.exit_code == |
3427 | SVM_EXIT_EXCP_BASE + MC_VECTOR)) | 3675 | SVM_EXIT_EXCP_BASE + MC_VECTOR)) |
3428 | svm_handle_mce(svm); | 3676 | svm_handle_mce(svm); |
3677 | |||
3678 | mark_all_clean(svm->vmcb); | ||
3429 | } | 3679 | } |
3430 | 3680 | ||
3431 | #undef R | 3681 | #undef R |
@@ -3435,7 +3685,8 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) | |||
3435 | struct vcpu_svm *svm = to_svm(vcpu); | 3685 | struct vcpu_svm *svm = to_svm(vcpu); |
3436 | 3686 | ||
3437 | svm->vmcb->save.cr3 = root; | 3687 | svm->vmcb->save.cr3 = root; |
3438 | force_new_asid(vcpu); | 3688 | mark_dirty(svm->vmcb, VMCB_CR); |
3689 | svm_flush_tlb(vcpu); | ||
3439 | } | 3690 | } |
3440 | 3691 | ||
3441 | static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) | 3692 | static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) |
@@ -3443,11 +3694,13 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) | |||
3443 | struct vcpu_svm *svm = to_svm(vcpu); | 3694 | struct vcpu_svm *svm = to_svm(vcpu); |
3444 | 3695 | ||
3445 | svm->vmcb->control.nested_cr3 = root; | 3696 | svm->vmcb->control.nested_cr3 = root; |
3697 | mark_dirty(svm->vmcb, VMCB_NPT); | ||
3446 | 3698 | ||
3447 | /* Also sync guest cr3 here in case we live migrate */ | 3699 | /* Also sync guest cr3 here in case we live migrate */ |
3448 | svm->vmcb->save.cr3 = vcpu->arch.cr3; | 3700 | svm->vmcb->save.cr3 = kvm_read_cr3(vcpu); |
3701 | mark_dirty(svm->vmcb, VMCB_CR); | ||
3449 | 3702 | ||
3450 | force_new_asid(vcpu); | 3703 | svm_flush_tlb(vcpu); |
3451 | } | 3704 | } |
3452 | 3705 | ||
3453 | static int is_disabled(void) | 3706 | static int is_disabled(void) |
@@ -3494,10 +3747,6 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) | |||
3494 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | 3747 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) |
3495 | { | 3748 | { |
3496 | switch (func) { | 3749 | switch (func) { |
3497 | case 0x00000001: | ||
3498 | /* Mask out xsave bit as long as it is not supported by SVM */ | ||
3499 | entry->ecx &= ~(bit(X86_FEATURE_XSAVE)); | ||
3500 | break; | ||
3501 | case 0x80000001: | 3750 | case 0x80000001: |
3502 | if (nested) | 3751 | if (nested) |
3503 | entry->ecx |= (1 << 2); /* Set SVM bit */ | 3752 | entry->ecx |= (1 << 2); /* Set SVM bit */ |
@@ -3511,7 +3760,7 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | |||
3511 | additional features */ | 3760 | additional features */ |
3512 | 3761 | ||
3513 | /* Support next_rip if host supports it */ | 3762 | /* Support next_rip if host supports it */ |
3514 | if (svm_has(SVM_FEATURE_NRIP)) | 3763 | if (boot_cpu_has(X86_FEATURE_NRIPS)) |
3515 | entry->edx |= SVM_FEATURE_NRIP; | 3764 | entry->edx |= SVM_FEATURE_NRIP; |
3516 | 3765 | ||
3517 | /* Support NPT for the guest if enabled */ | 3766 | /* Support NPT for the guest if enabled */ |
@@ -3571,6 +3820,7 @@ static const struct trace_print_flags svm_exit_reasons_str[] = { | |||
3571 | { SVM_EXIT_WBINVD, "wbinvd" }, | 3820 | { SVM_EXIT_WBINVD, "wbinvd" }, |
3572 | { SVM_EXIT_MONITOR, "monitor" }, | 3821 | { SVM_EXIT_MONITOR, "monitor" }, |
3573 | { SVM_EXIT_MWAIT, "mwait" }, | 3822 | { SVM_EXIT_MWAIT, "mwait" }, |
3823 | { SVM_EXIT_XSETBV, "xsetbv" }, | ||
3574 | { SVM_EXIT_NPF, "npf" }, | 3824 | { SVM_EXIT_NPF, "npf" }, |
3575 | { -1, NULL } | 3825 | { -1, NULL } |
3576 | }; | 3826 | }; |
@@ -3594,9 +3844,7 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | |||
3594 | { | 3844 | { |
3595 | struct vcpu_svm *svm = to_svm(vcpu); | 3845 | struct vcpu_svm *svm = to_svm(vcpu); |
3596 | 3846 | ||
3597 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; | 3847 | set_exception_intercept(svm, NM_VECTOR); |
3598 | if (is_nested(svm)) | ||
3599 | svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR; | ||
3600 | update_cr0_intercept(svm); | 3848 | update_cr0_intercept(svm); |
3601 | } | 3849 | } |
3602 | 3850 | ||
@@ -3627,6 +3875,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
3627 | .get_cpl = svm_get_cpl, | 3875 | .get_cpl = svm_get_cpl, |
3628 | .get_cs_db_l_bits = kvm_get_cs_db_l_bits, | 3876 | .get_cs_db_l_bits = kvm_get_cs_db_l_bits, |
3629 | .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, | 3877 | .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, |
3878 | .decache_cr3 = svm_decache_cr3, | ||
3630 | .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, | 3879 | .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, |
3631 | .set_cr0 = svm_set_cr0, | 3880 | .set_cr0 = svm_set_cr0, |
3632 | .set_cr3 = svm_set_cr3, | 3881 | .set_cr3 = svm_set_cr3, |
@@ -3667,7 +3916,9 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
3667 | .get_tdp_level = get_npt_level, | 3916 | .get_tdp_level = get_npt_level, |
3668 | .get_mt_mask = svm_get_mt_mask, | 3917 | .get_mt_mask = svm_get_mt_mask, |
3669 | 3918 | ||
3919 | .get_exit_info = svm_get_exit_info, | ||
3670 | .exit_reasons_str = svm_exit_reasons_str, | 3920 | .exit_reasons_str = svm_exit_reasons_str, |
3921 | |||
3671 | .get_lpage_level = svm_get_lpage_level, | 3922 | .get_lpage_level = svm_get_lpage_level, |
3672 | 3923 | ||
3673 | .cpuid_update = svm_cpuid_update, | 3924 | .cpuid_update = svm_cpuid_update, |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a6544b8e7c0f..1357d7cf4ec8 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -178,27 +178,36 @@ TRACE_EVENT(kvm_apic, | |||
178 | #define trace_kvm_apic_read(reg, val) trace_kvm_apic(0, reg, val) | 178 | #define trace_kvm_apic_read(reg, val) trace_kvm_apic(0, reg, val) |
179 | #define trace_kvm_apic_write(reg, val) trace_kvm_apic(1, reg, val) | 179 | #define trace_kvm_apic_write(reg, val) trace_kvm_apic(1, reg, val) |
180 | 180 | ||
181 | #define KVM_ISA_VMX 1 | ||
182 | #define KVM_ISA_SVM 2 | ||
183 | |||
181 | /* | 184 | /* |
182 | * Tracepoint for kvm guest exit: | 185 | * Tracepoint for kvm guest exit: |
183 | */ | 186 | */ |
184 | TRACE_EVENT(kvm_exit, | 187 | TRACE_EVENT(kvm_exit, |
185 | TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu), | 188 | TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu, u32 isa), |
186 | TP_ARGS(exit_reason, vcpu), | 189 | TP_ARGS(exit_reason, vcpu, isa), |
187 | 190 | ||
188 | TP_STRUCT__entry( | 191 | TP_STRUCT__entry( |
189 | __field( unsigned int, exit_reason ) | 192 | __field( unsigned int, exit_reason ) |
190 | __field( unsigned long, guest_rip ) | 193 | __field( unsigned long, guest_rip ) |
194 | __field( u32, isa ) | ||
195 | __field( u64, info1 ) | ||
196 | __field( u64, info2 ) | ||
191 | ), | 197 | ), |
192 | 198 | ||
193 | TP_fast_assign( | 199 | TP_fast_assign( |
194 | __entry->exit_reason = exit_reason; | 200 | __entry->exit_reason = exit_reason; |
195 | __entry->guest_rip = kvm_rip_read(vcpu); | 201 | __entry->guest_rip = kvm_rip_read(vcpu); |
202 | __entry->isa = isa; | ||
203 | kvm_x86_ops->get_exit_info(vcpu, &__entry->info1, | ||
204 | &__entry->info2); | ||
196 | ), | 205 | ), |
197 | 206 | ||
198 | TP_printk("reason %s rip 0x%lx", | 207 | TP_printk("reason %s rip 0x%lx info %llx %llx", |
199 | ftrace_print_symbols_seq(p, __entry->exit_reason, | 208 | ftrace_print_symbols_seq(p, __entry->exit_reason, |
200 | kvm_x86_ops->exit_reasons_str), | 209 | kvm_x86_ops->exit_reasons_str), |
201 | __entry->guest_rip) | 210 | __entry->guest_rip, __entry->info1, __entry->info2) |
202 | ); | 211 | ); |
203 | 212 | ||
204 | /* | 213 | /* |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 81fcbe9515c5..bf89ec2cfb82 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -69,6 +69,9 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); | |||
69 | static int __read_mostly vmm_exclusive = 1; | 69 | static int __read_mostly vmm_exclusive = 1; |
70 | module_param(vmm_exclusive, bool, S_IRUGO); | 70 | module_param(vmm_exclusive, bool, S_IRUGO); |
71 | 71 | ||
72 | static int __read_mostly yield_on_hlt = 1; | ||
73 | module_param(yield_on_hlt, bool, S_IRUGO); | ||
74 | |||
72 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | 75 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ |
73 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | 76 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) |
74 | #define KVM_GUEST_CR0_MASK \ | 77 | #define KVM_GUEST_CR0_MASK \ |
@@ -177,6 +180,7 @@ static int init_rmode(struct kvm *kvm); | |||
177 | static u64 construct_eptp(unsigned long root_hpa); | 180 | static u64 construct_eptp(unsigned long root_hpa); |
178 | static void kvm_cpu_vmxon(u64 addr); | 181 | static void kvm_cpu_vmxon(u64 addr); |
179 | static void kvm_cpu_vmxoff(void); | 182 | static void kvm_cpu_vmxoff(void); |
183 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | ||
180 | 184 | ||
181 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 185 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
182 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 186 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -188,6 +192,8 @@ static unsigned long *vmx_io_bitmap_b; | |||
188 | static unsigned long *vmx_msr_bitmap_legacy; | 192 | static unsigned long *vmx_msr_bitmap_legacy; |
189 | static unsigned long *vmx_msr_bitmap_longmode; | 193 | static unsigned long *vmx_msr_bitmap_longmode; |
190 | 194 | ||
195 | static bool cpu_has_load_ia32_efer; | ||
196 | |||
191 | static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); | 197 | static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); |
192 | static DEFINE_SPINLOCK(vmx_vpid_lock); | 198 | static DEFINE_SPINLOCK(vmx_vpid_lock); |
193 | 199 | ||
@@ -472,7 +478,7 @@ static void vmcs_clear(struct vmcs *vmcs) | |||
472 | u8 error; | 478 | u8 error; |
473 | 479 | ||
474 | asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" | 480 | asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" |
475 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | 481 | : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) |
476 | : "cc", "memory"); | 482 | : "cc", "memory"); |
477 | if (error) | 483 | if (error) |
478 | printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", | 484 | printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", |
@@ -485,7 +491,7 @@ static void vmcs_load(struct vmcs *vmcs) | |||
485 | u8 error; | 491 | u8 error; |
486 | 492 | ||
487 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" | 493 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" |
488 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | 494 | : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) |
489 | : "cc", "memory"); | 495 | : "cc", "memory"); |
490 | if (error) | 496 | if (error) |
491 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", | 497 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", |
@@ -565,10 +571,10 @@ static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) | |||
565 | 571 | ||
566 | static unsigned long vmcs_readl(unsigned long field) | 572 | static unsigned long vmcs_readl(unsigned long field) |
567 | { | 573 | { |
568 | unsigned long value; | 574 | unsigned long value = 0; |
569 | 575 | ||
570 | asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) | 576 | asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) |
571 | : "=a"(value) : "d"(field) : "cc"); | 577 | : "+a"(value) : "d"(field) : "cc"); |
572 | return value; | 578 | return value; |
573 | } | 579 | } |
574 | 580 | ||
@@ -661,6 +667,12 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | |||
661 | unsigned i; | 667 | unsigned i; |
662 | struct msr_autoload *m = &vmx->msr_autoload; | 668 | struct msr_autoload *m = &vmx->msr_autoload; |
663 | 669 | ||
670 | if (msr == MSR_EFER && cpu_has_load_ia32_efer) { | ||
671 | vmcs_clear_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER); | ||
672 | vmcs_clear_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER); | ||
673 | return; | ||
674 | } | ||
675 | |||
664 | for (i = 0; i < m->nr; ++i) | 676 | for (i = 0; i < m->nr; ++i) |
665 | if (m->guest[i].index == msr) | 677 | if (m->guest[i].index == msr) |
666 | break; | 678 | break; |
@@ -680,6 +692,14 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | |||
680 | unsigned i; | 692 | unsigned i; |
681 | struct msr_autoload *m = &vmx->msr_autoload; | 693 | struct msr_autoload *m = &vmx->msr_autoload; |
682 | 694 | ||
695 | if (msr == MSR_EFER && cpu_has_load_ia32_efer) { | ||
696 | vmcs_write64(GUEST_IA32_EFER, guest_val); | ||
697 | vmcs_write64(HOST_IA32_EFER, host_val); | ||
698 | vmcs_set_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER); | ||
699 | vmcs_set_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER); | ||
700 | return; | ||
701 | } | ||
702 | |||
683 | for (i = 0; i < m->nr; ++i) | 703 | for (i = 0; i < m->nr; ++i) |
684 | if (m->guest[i].index == msr) | 704 | if (m->guest[i].index == msr) |
685 | break; | 705 | break; |
@@ -1009,6 +1029,17 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
1009 | vmx_set_interrupt_shadow(vcpu, 0); | 1029 | vmx_set_interrupt_shadow(vcpu, 0); |
1010 | } | 1030 | } |
1011 | 1031 | ||
1032 | static void vmx_clear_hlt(struct kvm_vcpu *vcpu) | ||
1033 | { | ||
1034 | /* Ensure that we clear the HLT state in the VMCS. We don't need to | ||
1035 | * explicitly skip the instruction because if the HLT state is set, then | ||
1036 | * the instruction is already executing and RIP has already been | ||
1037 | * advanced. */ | ||
1038 | if (!yield_on_hlt && | ||
1039 | vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) | ||
1040 | vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); | ||
1041 | } | ||
1042 | |||
1012 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 1043 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
1013 | bool has_error_code, u32 error_code, | 1044 | bool has_error_code, u32 error_code, |
1014 | bool reinject) | 1045 | bool reinject) |
@@ -1035,6 +1066,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
1035 | intr_info |= INTR_TYPE_HARD_EXCEPTION; | 1066 | intr_info |= INTR_TYPE_HARD_EXCEPTION; |
1036 | 1067 | ||
1037 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); | 1068 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); |
1069 | vmx_clear_hlt(vcpu); | ||
1038 | } | 1070 | } |
1039 | 1071 | ||
1040 | static bool vmx_rdtscp_supported(void) | 1072 | static bool vmx_rdtscp_supported(void) |
@@ -1305,8 +1337,11 @@ static __init int vmx_disabled_by_bios(void) | |||
1305 | && tboot_enabled()) | 1337 | && tboot_enabled()) |
1306 | return 1; | 1338 | return 1; |
1307 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | 1339 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) |
1308 | && !tboot_enabled()) | 1340 | && !tboot_enabled()) { |
1341 | printk(KERN_WARNING "kvm: disable TXT in the BIOS or " | ||
1342 | " activate TXT before enabling KVM\n"); | ||
1309 | return 1; | 1343 | return 1; |
1344 | } | ||
1310 | } | 1345 | } |
1311 | 1346 | ||
1312 | return 0; | 1347 | return 0; |
@@ -1400,6 +1435,14 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, | |||
1400 | return 0; | 1435 | return 0; |
1401 | } | 1436 | } |
1402 | 1437 | ||
1438 | static __init bool allow_1_setting(u32 msr, u32 ctl) | ||
1439 | { | ||
1440 | u32 vmx_msr_low, vmx_msr_high; | ||
1441 | |||
1442 | rdmsr(msr, vmx_msr_low, vmx_msr_high); | ||
1443 | return vmx_msr_high & ctl; | ||
1444 | } | ||
1445 | |||
1403 | static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | 1446 | static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) |
1404 | { | 1447 | { |
1405 | u32 vmx_msr_low, vmx_msr_high; | 1448 | u32 vmx_msr_low, vmx_msr_high; |
@@ -1416,7 +1459,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1416 | &_pin_based_exec_control) < 0) | 1459 | &_pin_based_exec_control) < 0) |
1417 | return -EIO; | 1460 | return -EIO; |
1418 | 1461 | ||
1419 | min = CPU_BASED_HLT_EXITING | | 1462 | min = |
1420 | #ifdef CONFIG_X86_64 | 1463 | #ifdef CONFIG_X86_64 |
1421 | CPU_BASED_CR8_LOAD_EXITING | | 1464 | CPU_BASED_CR8_LOAD_EXITING | |
1422 | CPU_BASED_CR8_STORE_EXITING | | 1465 | CPU_BASED_CR8_STORE_EXITING | |
@@ -1429,6 +1472,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1429 | CPU_BASED_MWAIT_EXITING | | 1472 | CPU_BASED_MWAIT_EXITING | |
1430 | CPU_BASED_MONITOR_EXITING | | 1473 | CPU_BASED_MONITOR_EXITING | |
1431 | CPU_BASED_INVLPG_EXITING; | 1474 | CPU_BASED_INVLPG_EXITING; |
1475 | |||
1476 | if (yield_on_hlt) | ||
1477 | min |= CPU_BASED_HLT_EXITING; | ||
1478 | |||
1432 | opt = CPU_BASED_TPR_SHADOW | | 1479 | opt = CPU_BASED_TPR_SHADOW | |
1433 | CPU_BASED_USE_MSR_BITMAPS | | 1480 | CPU_BASED_USE_MSR_BITMAPS | |
1434 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 1481 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
@@ -1510,6 +1557,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1510 | vmcs_conf->vmexit_ctrl = _vmexit_control; | 1557 | vmcs_conf->vmexit_ctrl = _vmexit_control; |
1511 | vmcs_conf->vmentry_ctrl = _vmentry_control; | 1558 | vmcs_conf->vmentry_ctrl = _vmentry_control; |
1512 | 1559 | ||
1560 | cpu_has_load_ia32_efer = | ||
1561 | allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS, | ||
1562 | VM_ENTRY_LOAD_IA32_EFER) | ||
1563 | && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS, | ||
1564 | VM_EXIT_LOAD_IA32_EFER); | ||
1565 | |||
1513 | return 0; | 1566 | return 0; |
1514 | } | 1567 | } |
1515 | 1568 | ||
@@ -1683,9 +1736,13 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save) | |||
1683 | save->limit = vmcs_read32(sf->limit); | 1736 | save->limit = vmcs_read32(sf->limit); |
1684 | save->ar = vmcs_read32(sf->ar_bytes); | 1737 | save->ar = vmcs_read32(sf->ar_bytes); |
1685 | vmcs_write16(sf->selector, save->base >> 4); | 1738 | vmcs_write16(sf->selector, save->base >> 4); |
1686 | vmcs_write32(sf->base, save->base & 0xfffff); | 1739 | vmcs_write32(sf->base, save->base & 0xffff0); |
1687 | vmcs_write32(sf->limit, 0xffff); | 1740 | vmcs_write32(sf->limit, 0xffff); |
1688 | vmcs_write32(sf->ar_bytes, 0xf3); | 1741 | vmcs_write32(sf->ar_bytes, 0xf3); |
1742 | if (save->base & 0xf) | ||
1743 | printk_once(KERN_WARNING "kvm: segment base is not paragraph" | ||
1744 | " aligned when entering protected mode (seg=%d)", | ||
1745 | seg); | ||
1689 | } | 1746 | } |
1690 | 1747 | ||
1691 | static void enter_rmode(struct kvm_vcpu *vcpu) | 1748 | static void enter_rmode(struct kvm_vcpu *vcpu) |
@@ -1814,6 +1871,13 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | |||
1814 | vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; | 1871 | vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; |
1815 | } | 1872 | } |
1816 | 1873 | ||
1874 | static void vmx_decache_cr3(struct kvm_vcpu *vcpu) | ||
1875 | { | ||
1876 | if (enable_ept && is_paging(vcpu)) | ||
1877 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | ||
1878 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||
1879 | } | ||
1880 | |||
1817 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 1881 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
1818 | { | 1882 | { |
1819 | ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; | 1883 | ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; |
@@ -1857,6 +1921,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1857 | unsigned long cr0, | 1921 | unsigned long cr0, |
1858 | struct kvm_vcpu *vcpu) | 1922 | struct kvm_vcpu *vcpu) |
1859 | { | 1923 | { |
1924 | vmx_decache_cr3(vcpu); | ||
1860 | if (!(cr0 & X86_CR0_PG)) { | 1925 | if (!(cr0 & X86_CR0_PG)) { |
1861 | /* From paging/starting to nonpaging */ | 1926 | /* From paging/starting to nonpaging */ |
1862 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | 1927 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, |
@@ -1937,7 +2002,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
1937 | if (enable_ept) { | 2002 | if (enable_ept) { |
1938 | eptp = construct_eptp(cr3); | 2003 | eptp = construct_eptp(cr3); |
1939 | vmcs_write64(EPT_POINTER, eptp); | 2004 | vmcs_write64(EPT_POINTER, eptp); |
1940 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : | 2005 | guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) : |
1941 | vcpu->kvm->arch.ept_identity_map_addr; | 2006 | vcpu->kvm->arch.ept_identity_map_addr; |
1942 | ept_load_pdptrs(vcpu); | 2007 | ept_load_pdptrs(vcpu); |
1943 | } | 2008 | } |
@@ -2725,7 +2790,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2725 | vmcs_writel(GUEST_IDTR_BASE, 0); | 2790 | vmcs_writel(GUEST_IDTR_BASE, 0); |
2726 | vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); | 2791 | vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); |
2727 | 2792 | ||
2728 | vmcs_write32(GUEST_ACTIVITY_STATE, 0); | 2793 | vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); |
2729 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); | 2794 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); |
2730 | vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); | 2795 | vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); |
2731 | 2796 | ||
@@ -2787,6 +2852,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
2787 | return; | 2852 | return; |
2788 | } | 2853 | } |
2789 | 2854 | ||
2855 | if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { | ||
2856 | enable_irq_window(vcpu); | ||
2857 | return; | ||
2858 | } | ||
2790 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 2859 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
2791 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | 2860 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; |
2792 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 2861 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
@@ -2814,6 +2883,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) | |||
2814 | } else | 2883 | } else |
2815 | intr |= INTR_TYPE_EXT_INTR; | 2884 | intr |= INTR_TYPE_EXT_INTR; |
2816 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); | 2885 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); |
2886 | vmx_clear_hlt(vcpu); | ||
2817 | } | 2887 | } |
2818 | 2888 | ||
2819 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | 2889 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) |
@@ -2841,6 +2911,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
2841 | } | 2911 | } |
2842 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 2912 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
2843 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 2913 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
2914 | vmx_clear_hlt(vcpu); | ||
2844 | } | 2915 | } |
2845 | 2916 | ||
2846 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | 2917 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) |
@@ -2849,7 +2920,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2849 | return 0; | 2920 | return 0; |
2850 | 2921 | ||
2851 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 2922 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
2852 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI)); | 2923 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI |
2924 | | GUEST_INTR_STATE_NMI)); | ||
2853 | } | 2925 | } |
2854 | 2926 | ||
2855 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | 2927 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) |
@@ -2910,7 +2982,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
2910 | * Cause the #SS fault with 0 error code in VM86 mode. | 2982 | * Cause the #SS fault with 0 error code in VM86 mode. |
2911 | */ | 2983 | */ |
2912 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) | 2984 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) |
2913 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) | 2985 | if (emulate_instruction(vcpu, 0) == EMULATE_DONE) |
2914 | return 1; | 2986 | return 1; |
2915 | /* | 2987 | /* |
2916 | * Forward all other exceptions that are valid in real mode. | 2988 | * Forward all other exceptions that are valid in real mode. |
@@ -3007,7 +3079,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3007 | } | 3079 | } |
3008 | 3080 | ||
3009 | if (is_invalid_opcode(intr_info)) { | 3081 | if (is_invalid_opcode(intr_info)) { |
3010 | er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD); | 3082 | er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); |
3011 | if (er != EMULATE_DONE) | 3083 | if (er != EMULATE_DONE) |
3012 | kvm_queue_exception(vcpu, UD_VECTOR); | 3084 | kvm_queue_exception(vcpu, UD_VECTOR); |
3013 | return 1; | 3085 | return 1; |
@@ -3026,7 +3098,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
3026 | 3098 | ||
3027 | if (kvm_event_needs_reinjection(vcpu)) | 3099 | if (kvm_event_needs_reinjection(vcpu)) |
3028 | kvm_mmu_unprotect_page_virt(vcpu, cr2); | 3100 | kvm_mmu_unprotect_page_virt(vcpu, cr2); |
3029 | return kvm_mmu_page_fault(vcpu, cr2, error_code); | 3101 | return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); |
3030 | } | 3102 | } |
3031 | 3103 | ||
3032 | if (vmx->rmode.vm86_active && | 3104 | if (vmx->rmode.vm86_active && |
@@ -3098,7 +3170,7 @@ static int handle_io(struct kvm_vcpu *vcpu) | |||
3098 | ++vcpu->stat.io_exits; | 3170 | ++vcpu->stat.io_exits; |
3099 | 3171 | ||
3100 | if (string || in) | 3172 | if (string || in) |
3101 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; | 3173 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
3102 | 3174 | ||
3103 | port = exit_qualification >> 16; | 3175 | port = exit_qualification >> 16; |
3104 | size = (exit_qualification & 7) + 1; | 3176 | size = (exit_qualification & 7) + 1; |
@@ -3118,14 +3190,6 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
3118 | hypercall[2] = 0xc1; | 3190 | hypercall[2] = 0xc1; |
3119 | } | 3191 | } |
3120 | 3192 | ||
3121 | static void complete_insn_gp(struct kvm_vcpu *vcpu, int err) | ||
3122 | { | ||
3123 | if (err) | ||
3124 | kvm_inject_gp(vcpu, 0); | ||
3125 | else | ||
3126 | skip_emulated_instruction(vcpu); | ||
3127 | } | ||
3128 | |||
3129 | static int handle_cr(struct kvm_vcpu *vcpu) | 3193 | static int handle_cr(struct kvm_vcpu *vcpu) |
3130 | { | 3194 | { |
3131 | unsigned long exit_qualification, val; | 3195 | unsigned long exit_qualification, val; |
@@ -3143,21 +3207,21 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3143 | switch (cr) { | 3207 | switch (cr) { |
3144 | case 0: | 3208 | case 0: |
3145 | err = kvm_set_cr0(vcpu, val); | 3209 | err = kvm_set_cr0(vcpu, val); |
3146 | complete_insn_gp(vcpu, err); | 3210 | kvm_complete_insn_gp(vcpu, err); |
3147 | return 1; | 3211 | return 1; |
3148 | case 3: | 3212 | case 3: |
3149 | err = kvm_set_cr3(vcpu, val); | 3213 | err = kvm_set_cr3(vcpu, val); |
3150 | complete_insn_gp(vcpu, err); | 3214 | kvm_complete_insn_gp(vcpu, err); |
3151 | return 1; | 3215 | return 1; |
3152 | case 4: | 3216 | case 4: |
3153 | err = kvm_set_cr4(vcpu, val); | 3217 | err = kvm_set_cr4(vcpu, val); |
3154 | complete_insn_gp(vcpu, err); | 3218 | kvm_complete_insn_gp(vcpu, err); |
3155 | return 1; | 3219 | return 1; |
3156 | case 8: { | 3220 | case 8: { |
3157 | u8 cr8_prev = kvm_get_cr8(vcpu); | 3221 | u8 cr8_prev = kvm_get_cr8(vcpu); |
3158 | u8 cr8 = kvm_register_read(vcpu, reg); | 3222 | u8 cr8 = kvm_register_read(vcpu, reg); |
3159 | kvm_set_cr8(vcpu, cr8); | 3223 | err = kvm_set_cr8(vcpu, cr8); |
3160 | skip_emulated_instruction(vcpu); | 3224 | kvm_complete_insn_gp(vcpu, err); |
3161 | if (irqchip_in_kernel(vcpu->kvm)) | 3225 | if (irqchip_in_kernel(vcpu->kvm)) |
3162 | return 1; | 3226 | return 1; |
3163 | if (cr8_prev <= cr8) | 3227 | if (cr8_prev <= cr8) |
@@ -3176,8 +3240,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3176 | case 1: /*mov from cr*/ | 3240 | case 1: /*mov from cr*/ |
3177 | switch (cr) { | 3241 | switch (cr) { |
3178 | case 3: | 3242 | case 3: |
3179 | kvm_register_write(vcpu, reg, vcpu->arch.cr3); | 3243 | val = kvm_read_cr3(vcpu); |
3180 | trace_kvm_cr_read(cr, vcpu->arch.cr3); | 3244 | kvm_register_write(vcpu, reg, val); |
3245 | trace_kvm_cr_read(cr, val); | ||
3181 | skip_emulated_instruction(vcpu); | 3246 | skip_emulated_instruction(vcpu); |
3182 | return 1; | 3247 | return 1; |
3183 | case 8: | 3248 | case 8: |
@@ -3349,6 +3414,11 @@ static int handle_vmx_insn(struct kvm_vcpu *vcpu) | |||
3349 | return 1; | 3414 | return 1; |
3350 | } | 3415 | } |
3351 | 3416 | ||
3417 | static int handle_invd(struct kvm_vcpu *vcpu) | ||
3418 | { | ||
3419 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; | ||
3420 | } | ||
3421 | |||
3352 | static int handle_invlpg(struct kvm_vcpu *vcpu) | 3422 | static int handle_invlpg(struct kvm_vcpu *vcpu) |
3353 | { | 3423 | { |
3354 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3424 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
@@ -3377,7 +3447,7 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) | |||
3377 | 3447 | ||
3378 | static int handle_apic_access(struct kvm_vcpu *vcpu) | 3448 | static int handle_apic_access(struct kvm_vcpu *vcpu) |
3379 | { | 3449 | { |
3380 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; | 3450 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
3381 | } | 3451 | } |
3382 | 3452 | ||
3383 | static int handle_task_switch(struct kvm_vcpu *vcpu) | 3453 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
@@ -3476,7 +3546,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
3476 | 3546 | ||
3477 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | 3547 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); |
3478 | trace_kvm_page_fault(gpa, exit_qualification); | 3548 | trace_kvm_page_fault(gpa, exit_qualification); |
3479 | return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); | 3549 | return kvm_mmu_page_fault(vcpu, gpa, exit_qualification & 0x3, NULL, 0); |
3480 | } | 3550 | } |
3481 | 3551 | ||
3482 | static u64 ept_rsvd_mask(u64 spte, int level) | 3552 | static u64 ept_rsvd_mask(u64 spte, int level) |
@@ -3592,7 +3662,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
3592 | && (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF)) | 3662 | && (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF)) |
3593 | return handle_interrupt_window(&vmx->vcpu); | 3663 | return handle_interrupt_window(&vmx->vcpu); |
3594 | 3664 | ||
3595 | err = emulate_instruction(vcpu, 0, 0, 0); | 3665 | err = emulate_instruction(vcpu, 0); |
3596 | 3666 | ||
3597 | if (err == EMULATE_DO_MMIO) { | 3667 | if (err == EMULATE_DO_MMIO) { |
3598 | ret = 0; | 3668 | ret = 0; |
@@ -3649,6 +3719,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
3649 | [EXIT_REASON_MSR_WRITE] = handle_wrmsr, | 3719 | [EXIT_REASON_MSR_WRITE] = handle_wrmsr, |
3650 | [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, | 3720 | [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, |
3651 | [EXIT_REASON_HLT] = handle_halt, | 3721 | [EXIT_REASON_HLT] = handle_halt, |
3722 | [EXIT_REASON_INVD] = handle_invd, | ||
3652 | [EXIT_REASON_INVLPG] = handle_invlpg, | 3723 | [EXIT_REASON_INVLPG] = handle_invlpg, |
3653 | [EXIT_REASON_VMCALL] = handle_vmcall, | 3724 | [EXIT_REASON_VMCALL] = handle_vmcall, |
3654 | [EXIT_REASON_VMCLEAR] = handle_vmx_insn, | 3725 | [EXIT_REASON_VMCLEAR] = handle_vmx_insn, |
@@ -3676,6 +3747,12 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
3676 | static const int kvm_vmx_max_exit_handlers = | 3747 | static const int kvm_vmx_max_exit_handlers = |
3677 | ARRAY_SIZE(kvm_vmx_exit_handlers); | 3748 | ARRAY_SIZE(kvm_vmx_exit_handlers); |
3678 | 3749 | ||
3750 | static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) | ||
3751 | { | ||
3752 | *info1 = vmcs_readl(EXIT_QUALIFICATION); | ||
3753 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); | ||
3754 | } | ||
3755 | |||
3679 | /* | 3756 | /* |
3680 | * The guest has exited. See if we can fix it or if we need userspace | 3757 | * The guest has exited. See if we can fix it or if we need userspace |
3681 | * assistance. | 3758 | * assistance. |
@@ -3686,17 +3763,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
3686 | u32 exit_reason = vmx->exit_reason; | 3763 | u32 exit_reason = vmx->exit_reason; |
3687 | u32 vectoring_info = vmx->idt_vectoring_info; | 3764 | u32 vectoring_info = vmx->idt_vectoring_info; |
3688 | 3765 | ||
3689 | trace_kvm_exit(exit_reason, vcpu); | 3766 | trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); |
3690 | 3767 | ||
3691 | /* If guest state is invalid, start emulating */ | 3768 | /* If guest state is invalid, start emulating */ |
3692 | if (vmx->emulation_required && emulate_invalid_guest_state) | 3769 | if (vmx->emulation_required && emulate_invalid_guest_state) |
3693 | return handle_invalid_guest_state(vcpu); | 3770 | return handle_invalid_guest_state(vcpu); |
3694 | 3771 | ||
3695 | /* Access CR3 don't cause VMExit in paging mode, so we need | ||
3696 | * to sync with guest real CR3. */ | ||
3697 | if (enable_ept && is_paging(vcpu)) | ||
3698 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | ||
3699 | |||
3700 | if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { | 3772 | if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { |
3701 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 3773 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
3702 | vcpu->run->fail_entry.hardware_entry_failure_reason | 3774 | vcpu->run->fail_entry.hardware_entry_failure_reason |
@@ -4013,7 +4085,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4013 | ); | 4085 | ); |
4014 | 4086 | ||
4015 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) | 4087 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) |
4016 | | (1 << VCPU_EXREG_PDPTR)); | 4088 | | (1 << VCPU_EXREG_PDPTR) |
4089 | | (1 << VCPU_EXREG_CR3)); | ||
4017 | vcpu->arch.regs_dirty = 0; | 4090 | vcpu->arch.regs_dirty = 0; |
4018 | 4091 | ||
4019 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 4092 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
@@ -4280,6 +4353,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4280 | .get_cpl = vmx_get_cpl, | 4353 | .get_cpl = vmx_get_cpl, |
4281 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, | 4354 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, |
4282 | .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, | 4355 | .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, |
4356 | .decache_cr3 = vmx_decache_cr3, | ||
4283 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, | 4357 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, |
4284 | .set_cr0 = vmx_set_cr0, | 4358 | .set_cr0 = vmx_set_cr0, |
4285 | .set_cr3 = vmx_set_cr3, | 4359 | .set_cr3 = vmx_set_cr3, |
@@ -4320,7 +4394,9 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4320 | .get_tdp_level = get_ept_level, | 4394 | .get_tdp_level = get_ept_level, |
4321 | .get_mt_mask = vmx_get_mt_mask, | 4395 | .get_mt_mask = vmx_get_mt_mask, |
4322 | 4396 | ||
4397 | .get_exit_info = vmx_get_exit_info, | ||
4323 | .exit_reasons_str = vmx_exit_reasons_str, | 4398 | .exit_reasons_str = vmx_exit_reasons_str, |
4399 | |||
4324 | .get_lpage_level = vmx_get_lpage_level, | 4400 | .get_lpage_level = vmx_get_lpage_level, |
4325 | 4401 | ||
4326 | .cpuid_update = vmx_cpuid_update, | 4402 | .cpuid_update = vmx_cpuid_update, |
@@ -4396,8 +4472,6 @@ static int __init vmx_init(void) | |||
4396 | 4472 | ||
4397 | if (enable_ept) { | 4473 | if (enable_ept) { |
4398 | bypass_guest_pf = 0; | 4474 | bypass_guest_pf = 0; |
4399 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | ||
4400 | VMX_EPT_WRITABLE_MASK); | ||
4401 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, | 4475 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, |
4402 | VMX_EPT_EXECUTABLE_MASK); | 4476 | VMX_EPT_EXECUTABLE_MASK); |
4403 | kvm_enable_tdp(); | 4477 | kvm_enable_tdp(); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b989e1f1e5d3..bcc0efce85bf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
44 | #include <linux/perf_event.h> | 44 | #include <linux/perf_event.h> |
45 | #include <linux/uaccess.h> | 45 | #include <linux/uaccess.h> |
46 | #include <linux/hash.h> | ||
46 | #include <trace/events/kvm.h> | 47 | #include <trace/events/kvm.h> |
47 | 48 | ||
48 | #define CREATE_TRACE_POINTS | 49 | #define CREATE_TRACE_POINTS |
@@ -155,6 +156,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
155 | 156 | ||
156 | u64 __read_mostly host_xcr0; | 157 | u64 __read_mostly host_xcr0; |
157 | 158 | ||
159 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) | ||
160 | { | ||
161 | int i; | ||
162 | for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++) | ||
163 | vcpu->arch.apf.gfns[i] = ~0; | ||
164 | } | ||
165 | |||
158 | static void kvm_on_user_return(struct user_return_notifier *urn) | 166 | static void kvm_on_user_return(struct user_return_notifier *urn) |
159 | { | 167 | { |
160 | unsigned slot; | 168 | unsigned slot; |
@@ -326,23 +334,28 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) | |||
326 | } | 334 | } |
327 | EXPORT_SYMBOL_GPL(kvm_requeue_exception); | 335 | EXPORT_SYMBOL_GPL(kvm_requeue_exception); |
328 | 336 | ||
329 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu) | 337 | void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err) |
330 | { | 338 | { |
331 | unsigned error_code = vcpu->arch.fault.error_code; | 339 | if (err) |
340 | kvm_inject_gp(vcpu, 0); | ||
341 | else | ||
342 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
343 | } | ||
344 | EXPORT_SYMBOL_GPL(kvm_complete_insn_gp); | ||
332 | 345 | ||
346 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) | ||
347 | { | ||
333 | ++vcpu->stat.pf_guest; | 348 | ++vcpu->stat.pf_guest; |
334 | vcpu->arch.cr2 = vcpu->arch.fault.address; | 349 | vcpu->arch.cr2 = fault->address; |
335 | kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); | 350 | kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code); |
336 | } | 351 | } |
337 | 352 | ||
338 | void kvm_propagate_fault(struct kvm_vcpu *vcpu) | 353 | void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) |
339 | { | 354 | { |
340 | if (mmu_is_nested(vcpu) && !vcpu->arch.fault.nested) | 355 | if (mmu_is_nested(vcpu) && !fault->nested_page_fault) |
341 | vcpu->arch.nested_mmu.inject_page_fault(vcpu); | 356 | vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault); |
342 | else | 357 | else |
343 | vcpu->arch.mmu.inject_page_fault(vcpu); | 358 | vcpu->arch.mmu.inject_page_fault(vcpu, fault); |
344 | |||
345 | vcpu->arch.fault.nested = false; | ||
346 | } | 359 | } |
347 | 360 | ||
348 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) | 361 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) |
@@ -460,8 +473,8 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) | |||
460 | (unsigned long *)&vcpu->arch.regs_avail)) | 473 | (unsigned long *)&vcpu->arch.regs_avail)) |
461 | return true; | 474 | return true; |
462 | 475 | ||
463 | gfn = (vcpu->arch.cr3 & ~31u) >> PAGE_SHIFT; | 476 | gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT; |
464 | offset = (vcpu->arch.cr3 & ~31u) & (PAGE_SIZE - 1); | 477 | offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1); |
465 | r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), | 478 | r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), |
466 | PFERR_USER_MASK | PFERR_WRITE_MASK); | 479 | PFERR_USER_MASK | PFERR_WRITE_MASK); |
467 | if (r < 0) | 480 | if (r < 0) |
@@ -506,12 +519,15 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
506 | } else | 519 | } else |
507 | #endif | 520 | #endif |
508 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, | 521 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, |
509 | vcpu->arch.cr3)) | 522 | kvm_read_cr3(vcpu))) |
510 | return 1; | 523 | return 1; |
511 | } | 524 | } |
512 | 525 | ||
513 | kvm_x86_ops->set_cr0(vcpu, cr0); | 526 | kvm_x86_ops->set_cr0(vcpu, cr0); |
514 | 527 | ||
528 | if ((cr0 ^ old_cr0) & X86_CR0_PG) | ||
529 | kvm_clear_async_pf_completion_queue(vcpu); | ||
530 | |||
515 | if ((cr0 ^ old_cr0) & update_bits) | 531 | if ((cr0 ^ old_cr0) & update_bits) |
516 | kvm_mmu_reset_context(vcpu); | 532 | kvm_mmu_reset_context(vcpu); |
517 | return 0; | 533 | return 0; |
@@ -595,7 +611,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
595 | return 1; | 611 | return 1; |
596 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) | 612 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) |
597 | && ((cr4 ^ old_cr4) & pdptr_bits) | 613 | && ((cr4 ^ old_cr4) & pdptr_bits) |
598 | && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)) | 614 | && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, |
615 | kvm_read_cr3(vcpu))) | ||
599 | return 1; | 616 | return 1; |
600 | 617 | ||
601 | if (cr4 & X86_CR4_VMXE) | 618 | if (cr4 & X86_CR4_VMXE) |
@@ -615,7 +632,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); | |||
615 | 632 | ||
616 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 633 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
617 | { | 634 | { |
618 | if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { | 635 | if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { |
619 | kvm_mmu_sync_roots(vcpu); | 636 | kvm_mmu_sync_roots(vcpu); |
620 | kvm_mmu_flush_tlb(vcpu); | 637 | kvm_mmu_flush_tlb(vcpu); |
621 | return 0; | 638 | return 0; |
@@ -650,12 +667,13 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
650 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) | 667 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) |
651 | return 1; | 668 | return 1; |
652 | vcpu->arch.cr3 = cr3; | 669 | vcpu->arch.cr3 = cr3; |
670 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||
653 | vcpu->arch.mmu.new_cr3(vcpu); | 671 | vcpu->arch.mmu.new_cr3(vcpu); |
654 | return 0; | 672 | return 0; |
655 | } | 673 | } |
656 | EXPORT_SYMBOL_GPL(kvm_set_cr3); | 674 | EXPORT_SYMBOL_GPL(kvm_set_cr3); |
657 | 675 | ||
658 | int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | 676 | int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) |
659 | { | 677 | { |
660 | if (cr8 & CR8_RESERVED_BITS) | 678 | if (cr8 & CR8_RESERVED_BITS) |
661 | return 1; | 679 | return 1; |
@@ -665,12 +683,6 @@ int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | |||
665 | vcpu->arch.cr8 = cr8; | 683 | vcpu->arch.cr8 = cr8; |
666 | return 0; | 684 | return 0; |
667 | } | 685 | } |
668 | |||
669 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | ||
670 | { | ||
671 | if (__kvm_set_cr8(vcpu, cr8)) | ||
672 | kvm_inject_gp(vcpu, 0); | ||
673 | } | ||
674 | EXPORT_SYMBOL_GPL(kvm_set_cr8); | 686 | EXPORT_SYMBOL_GPL(kvm_set_cr8); |
675 | 687 | ||
676 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | 688 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) |
@@ -775,12 +787,12 @@ EXPORT_SYMBOL_GPL(kvm_get_dr); | |||
775 | * kvm-specific. Those are put in the beginning of the list. | 787 | * kvm-specific. Those are put in the beginning of the list. |
776 | */ | 788 | */ |
777 | 789 | ||
778 | #define KVM_SAVE_MSRS_BEGIN 7 | 790 | #define KVM_SAVE_MSRS_BEGIN 8 |
779 | static u32 msrs_to_save[] = { | 791 | static u32 msrs_to_save[] = { |
780 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 792 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
781 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 793 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
782 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 794 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
783 | HV_X64_MSR_APIC_ASSIST_PAGE, | 795 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, |
784 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 796 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
785 | MSR_STAR, | 797 | MSR_STAR, |
786 | #ifdef CONFIG_X86_64 | 798 | #ifdef CONFIG_X86_64 |
@@ -830,7 +842,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
830 | kvm_x86_ops->set_efer(vcpu, efer); | 842 | kvm_x86_ops->set_efer(vcpu, efer); |
831 | 843 | ||
832 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | 844 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; |
833 | kvm_mmu_reset_context(vcpu); | ||
834 | 845 | ||
835 | /* Update reserved bits */ | 846 | /* Update reserved bits */ |
836 | if ((efer ^ old_efer) & EFER_NX) | 847 | if ((efer ^ old_efer) & EFER_NX) |
@@ -976,7 +987,7 @@ static inline u64 nsec_to_cycles(u64 nsec) | |||
976 | if (kvm_tsc_changes_freq()) | 987 | if (kvm_tsc_changes_freq()) |
977 | printk_once(KERN_WARNING | 988 | printk_once(KERN_WARNING |
978 | "kvm: unreliable cycle conversion on adjustable rate TSC\n"); | 989 | "kvm: unreliable cycle conversion on adjustable rate TSC\n"); |
979 | ret = nsec * __get_cpu_var(cpu_tsc_khz); | 990 | ret = nsec * __this_cpu_read(cpu_tsc_khz); |
980 | do_div(ret, USEC_PER_SEC); | 991 | do_div(ret, USEC_PER_SEC); |
981 | return ret; | 992 | return ret; |
982 | } | 993 | } |
@@ -1061,7 +1072,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1061 | local_irq_save(flags); | 1072 | local_irq_save(flags); |
1062 | kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); | 1073 | kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); |
1063 | kernel_ns = get_kernel_ns(); | 1074 | kernel_ns = get_kernel_ns(); |
1064 | this_tsc_khz = __get_cpu_var(cpu_tsc_khz); | 1075 | this_tsc_khz = __this_cpu_read(cpu_tsc_khz); |
1065 | 1076 | ||
1066 | if (unlikely(this_tsc_khz == 0)) { | 1077 | if (unlikely(this_tsc_khz == 0)) { |
1067 | local_irq_restore(flags); | 1078 | local_irq_restore(flags); |
@@ -1418,6 +1429,30 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1418 | return 0; | 1429 | return 0; |
1419 | } | 1430 | } |
1420 | 1431 | ||
1432 | static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) | ||
1433 | { | ||
1434 | gpa_t gpa = data & ~0x3f; | ||
1435 | |||
1436 | /* Bits 2:5 are resrved, Should be zero */ | ||
1437 | if (data & 0x3c) | ||
1438 | return 1; | ||
1439 | |||
1440 | vcpu->arch.apf.msr_val = data; | ||
1441 | |||
1442 | if (!(data & KVM_ASYNC_PF_ENABLED)) { | ||
1443 | kvm_clear_async_pf_completion_queue(vcpu); | ||
1444 | kvm_async_pf_hash_reset(vcpu); | ||
1445 | return 0; | ||
1446 | } | ||
1447 | |||
1448 | if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa)) | ||
1449 | return 1; | ||
1450 | |||
1451 | vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS); | ||
1452 | kvm_async_pf_wakeup_all(vcpu); | ||
1453 | return 0; | ||
1454 | } | ||
1455 | |||
1421 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1456 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
1422 | { | 1457 | { |
1423 | switch (msr) { | 1458 | switch (msr) { |
@@ -1499,6 +1534,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1499 | } | 1534 | } |
1500 | break; | 1535 | break; |
1501 | } | 1536 | } |
1537 | case MSR_KVM_ASYNC_PF_EN: | ||
1538 | if (kvm_pv_enable_async_pf(vcpu, data)) | ||
1539 | return 1; | ||
1540 | break; | ||
1502 | case MSR_IA32_MCG_CTL: | 1541 | case MSR_IA32_MCG_CTL: |
1503 | case MSR_IA32_MCG_STATUS: | 1542 | case MSR_IA32_MCG_STATUS: |
1504 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: | 1543 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: |
@@ -1775,6 +1814,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1775 | case MSR_KVM_SYSTEM_TIME_NEW: | 1814 | case MSR_KVM_SYSTEM_TIME_NEW: |
1776 | data = vcpu->arch.time; | 1815 | data = vcpu->arch.time; |
1777 | break; | 1816 | break; |
1817 | case MSR_KVM_ASYNC_PF_EN: | ||
1818 | data = vcpu->arch.apf.msr_val; | ||
1819 | break; | ||
1778 | case MSR_IA32_P5_MC_ADDR: | 1820 | case MSR_IA32_P5_MC_ADDR: |
1779 | case MSR_IA32_P5_MC_TYPE: | 1821 | case MSR_IA32_P5_MC_TYPE: |
1780 | case MSR_IA32_MCG_CAP: | 1822 | case MSR_IA32_MCG_CAP: |
@@ -1904,6 +1946,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1904 | case KVM_CAP_NOP_IO_DELAY: | 1946 | case KVM_CAP_NOP_IO_DELAY: |
1905 | case KVM_CAP_MP_STATE: | 1947 | case KVM_CAP_MP_STATE: |
1906 | case KVM_CAP_SYNC_MMU: | 1948 | case KVM_CAP_SYNC_MMU: |
1949 | case KVM_CAP_USER_NMI: | ||
1907 | case KVM_CAP_REINJECT_CONTROL: | 1950 | case KVM_CAP_REINJECT_CONTROL: |
1908 | case KVM_CAP_IRQ_INJECT_STATUS: | 1951 | case KVM_CAP_IRQ_INJECT_STATUS: |
1909 | case KVM_CAP_ASSIGN_DEV_IRQ: | 1952 | case KVM_CAP_ASSIGN_DEV_IRQ: |
@@ -1922,6 +1965,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1922 | case KVM_CAP_DEBUGREGS: | 1965 | case KVM_CAP_DEBUGREGS: |
1923 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 1966 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
1924 | case KVM_CAP_XSAVE: | 1967 | case KVM_CAP_XSAVE: |
1968 | case KVM_CAP_ASYNC_PF: | ||
1925 | r = 1; | 1969 | r = 1; |
1926 | break; | 1970 | break; |
1927 | case KVM_CAP_COALESCED_MMIO: | 1971 | case KVM_CAP_COALESCED_MMIO: |
@@ -2185,6 +2229,11 @@ out: | |||
2185 | return r; | 2229 | return r; |
2186 | } | 2230 | } |
2187 | 2231 | ||
2232 | static void cpuid_mask(u32 *word, int wordnum) | ||
2233 | { | ||
2234 | *word &= boot_cpu_data.x86_capability[wordnum]; | ||
2235 | } | ||
2236 | |||
2188 | static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, | 2237 | static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
2189 | u32 index) | 2238 | u32 index) |
2190 | { | 2239 | { |
@@ -2259,7 +2308,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2259 | break; | 2308 | break; |
2260 | case 1: | 2309 | case 1: |
2261 | entry->edx &= kvm_supported_word0_x86_features; | 2310 | entry->edx &= kvm_supported_word0_x86_features; |
2311 | cpuid_mask(&entry->edx, 0); | ||
2262 | entry->ecx &= kvm_supported_word4_x86_features; | 2312 | entry->ecx &= kvm_supported_word4_x86_features; |
2313 | cpuid_mask(&entry->ecx, 4); | ||
2263 | /* we support x2apic emulation even if host does not support | 2314 | /* we support x2apic emulation even if host does not support |
2264 | * it since we emulate x2apic in software */ | 2315 | * it since we emulate x2apic in software */ |
2265 | entry->ecx |= F(X2APIC); | 2316 | entry->ecx |= F(X2APIC); |
@@ -2350,7 +2401,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2350 | break; | 2401 | break; |
2351 | case 0x80000001: | 2402 | case 0x80000001: |
2352 | entry->edx &= kvm_supported_word1_x86_features; | 2403 | entry->edx &= kvm_supported_word1_x86_features; |
2404 | cpuid_mask(&entry->edx, 1); | ||
2353 | entry->ecx &= kvm_supported_word6_x86_features; | 2405 | entry->ecx &= kvm_supported_word6_x86_features; |
2406 | cpuid_mask(&entry->ecx, 6); | ||
2354 | break; | 2407 | break; |
2355 | } | 2408 | } |
2356 | 2409 | ||
@@ -3169,20 +3222,18 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
3169 | struct kvm_memslots *slots, *old_slots; | 3222 | struct kvm_memslots *slots, *old_slots; |
3170 | unsigned long *dirty_bitmap; | 3223 | unsigned long *dirty_bitmap; |
3171 | 3224 | ||
3172 | r = -ENOMEM; | 3225 | dirty_bitmap = memslot->dirty_bitmap_head; |
3173 | dirty_bitmap = vmalloc(n); | 3226 | if (memslot->dirty_bitmap == dirty_bitmap) |
3174 | if (!dirty_bitmap) | 3227 | dirty_bitmap += n / sizeof(long); |
3175 | goto out; | ||
3176 | memset(dirty_bitmap, 0, n); | 3228 | memset(dirty_bitmap, 0, n); |
3177 | 3229 | ||
3178 | r = -ENOMEM; | 3230 | r = -ENOMEM; |
3179 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 3231 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
3180 | if (!slots) { | 3232 | if (!slots) |
3181 | vfree(dirty_bitmap); | ||
3182 | goto out; | 3233 | goto out; |
3183 | } | ||
3184 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 3234 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); |
3185 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; | 3235 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; |
3236 | slots->generation++; | ||
3186 | 3237 | ||
3187 | old_slots = kvm->memslots; | 3238 | old_slots = kvm->memslots; |
3188 | rcu_assign_pointer(kvm->memslots, slots); | 3239 | rcu_assign_pointer(kvm->memslots, slots); |
@@ -3195,11 +3246,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
3195 | spin_unlock(&kvm->mmu_lock); | 3246 | spin_unlock(&kvm->mmu_lock); |
3196 | 3247 | ||
3197 | r = -EFAULT; | 3248 | r = -EFAULT; |
3198 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) { | 3249 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) |
3199 | vfree(dirty_bitmap); | ||
3200 | goto out; | 3250 | goto out; |
3201 | } | ||
3202 | vfree(dirty_bitmap); | ||
3203 | } else { | 3251 | } else { |
3204 | r = -EFAULT; | 3252 | r = -EFAULT; |
3205 | if (clear_user(log->dirty_bitmap, n)) | 3253 | if (clear_user(log->dirty_bitmap, n)) |
@@ -3266,8 +3314,10 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3266 | if (vpic) { | 3314 | if (vpic) { |
3267 | r = kvm_ioapic_init(kvm); | 3315 | r = kvm_ioapic_init(kvm); |
3268 | if (r) { | 3316 | if (r) { |
3317 | mutex_lock(&kvm->slots_lock); | ||
3269 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | 3318 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, |
3270 | &vpic->dev); | 3319 | &vpic->dev); |
3320 | mutex_unlock(&kvm->slots_lock); | ||
3271 | kfree(vpic); | 3321 | kfree(vpic); |
3272 | goto create_irqchip_unlock; | 3322 | goto create_irqchip_unlock; |
3273 | } | 3323 | } |
@@ -3278,10 +3328,12 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3278 | smp_wmb(); | 3328 | smp_wmb(); |
3279 | r = kvm_setup_default_irq_routing(kvm); | 3329 | r = kvm_setup_default_irq_routing(kvm); |
3280 | if (r) { | 3330 | if (r) { |
3331 | mutex_lock(&kvm->slots_lock); | ||
3281 | mutex_lock(&kvm->irq_lock); | 3332 | mutex_lock(&kvm->irq_lock); |
3282 | kvm_ioapic_destroy(kvm); | 3333 | kvm_ioapic_destroy(kvm); |
3283 | kvm_destroy_pic(kvm); | 3334 | kvm_destroy_pic(kvm); |
3284 | mutex_unlock(&kvm->irq_lock); | 3335 | mutex_unlock(&kvm->irq_lock); |
3336 | mutex_unlock(&kvm->slots_lock); | ||
3285 | } | 3337 | } |
3286 | create_irqchip_unlock: | 3338 | create_irqchip_unlock: |
3287 | mutex_unlock(&kvm->lock); | 3339 | mutex_unlock(&kvm->lock); |
@@ -3557,63 +3609,63 @@ static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | |||
3557 | static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | 3609 | static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) |
3558 | { | 3610 | { |
3559 | gpa_t t_gpa; | 3611 | gpa_t t_gpa; |
3560 | u32 error; | 3612 | struct x86_exception exception; |
3561 | 3613 | ||
3562 | BUG_ON(!mmu_is_nested(vcpu)); | 3614 | BUG_ON(!mmu_is_nested(vcpu)); |
3563 | 3615 | ||
3564 | /* NPT walks are always user-walks */ | 3616 | /* NPT walks are always user-walks */ |
3565 | access |= PFERR_USER_MASK; | 3617 | access |= PFERR_USER_MASK; |
3566 | t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &error); | 3618 | t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception); |
3567 | if (t_gpa == UNMAPPED_GVA) | ||
3568 | vcpu->arch.fault.nested = true; | ||
3569 | 3619 | ||
3570 | return t_gpa; | 3620 | return t_gpa; |
3571 | } | 3621 | } |
3572 | 3622 | ||
3573 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3623 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, |
3624 | struct x86_exception *exception) | ||
3574 | { | 3625 | { |
3575 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3626 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3576 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error); | 3627 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); |
3577 | } | 3628 | } |
3578 | 3629 | ||
3579 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3630 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, |
3631 | struct x86_exception *exception) | ||
3580 | { | 3632 | { |
3581 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3633 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3582 | access |= PFERR_FETCH_MASK; | 3634 | access |= PFERR_FETCH_MASK; |
3583 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error); | 3635 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); |
3584 | } | 3636 | } |
3585 | 3637 | ||
3586 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3638 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, |
3639 | struct x86_exception *exception) | ||
3587 | { | 3640 | { |
3588 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3641 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3589 | access |= PFERR_WRITE_MASK; | 3642 | access |= PFERR_WRITE_MASK; |
3590 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error); | 3643 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); |
3591 | } | 3644 | } |
3592 | 3645 | ||
3593 | /* uses this to access any guest's mapped memory without checking CPL */ | 3646 | /* uses this to access any guest's mapped memory without checking CPL */ |
3594 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3647 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, |
3648 | struct x86_exception *exception) | ||
3595 | { | 3649 | { |
3596 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, error); | 3650 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception); |
3597 | } | 3651 | } |
3598 | 3652 | ||
3599 | static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, | 3653 | static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, |
3600 | struct kvm_vcpu *vcpu, u32 access, | 3654 | struct kvm_vcpu *vcpu, u32 access, |
3601 | u32 *error) | 3655 | struct x86_exception *exception) |
3602 | { | 3656 | { |
3603 | void *data = val; | 3657 | void *data = val; |
3604 | int r = X86EMUL_CONTINUE; | 3658 | int r = X86EMUL_CONTINUE; |
3605 | 3659 | ||
3606 | while (bytes) { | 3660 | while (bytes) { |
3607 | gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access, | 3661 | gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access, |
3608 | error); | 3662 | exception); |
3609 | unsigned offset = addr & (PAGE_SIZE-1); | 3663 | unsigned offset = addr & (PAGE_SIZE-1); |
3610 | unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); | 3664 | unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); |
3611 | int ret; | 3665 | int ret; |
3612 | 3666 | ||
3613 | if (gpa == UNMAPPED_GVA) { | 3667 | if (gpa == UNMAPPED_GVA) |
3614 | r = X86EMUL_PROPAGATE_FAULT; | 3668 | return X86EMUL_PROPAGATE_FAULT; |
3615 | goto out; | ||
3616 | } | ||
3617 | ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); | 3669 | ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); |
3618 | if (ret < 0) { | 3670 | if (ret < 0) { |
3619 | r = X86EMUL_IO_NEEDED; | 3671 | r = X86EMUL_IO_NEEDED; |
@@ -3630,31 +3682,35 @@ out: | |||
3630 | 3682 | ||
3631 | /* used for instruction fetching */ | 3683 | /* used for instruction fetching */ |
3632 | static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3684 | static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, |
3633 | struct kvm_vcpu *vcpu, u32 *error) | 3685 | struct kvm_vcpu *vcpu, |
3686 | struct x86_exception *exception) | ||
3634 | { | 3687 | { |
3635 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3688 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3636 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, | 3689 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, |
3637 | access | PFERR_FETCH_MASK, error); | 3690 | access | PFERR_FETCH_MASK, |
3691 | exception); | ||
3638 | } | 3692 | } |
3639 | 3693 | ||
3640 | static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3694 | static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, |
3641 | struct kvm_vcpu *vcpu, u32 *error) | 3695 | struct kvm_vcpu *vcpu, |
3696 | struct x86_exception *exception) | ||
3642 | { | 3697 | { |
3643 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3698 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3644 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, | 3699 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, |
3645 | error); | 3700 | exception); |
3646 | } | 3701 | } |
3647 | 3702 | ||
3648 | static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, | 3703 | static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, |
3649 | struct kvm_vcpu *vcpu, u32 *error) | 3704 | struct kvm_vcpu *vcpu, |
3705 | struct x86_exception *exception) | ||
3650 | { | 3706 | { |
3651 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); | 3707 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); |
3652 | } | 3708 | } |
3653 | 3709 | ||
3654 | static int kvm_write_guest_virt_system(gva_t addr, void *val, | 3710 | static int kvm_write_guest_virt_system(gva_t addr, void *val, |
3655 | unsigned int bytes, | 3711 | unsigned int bytes, |
3656 | struct kvm_vcpu *vcpu, | 3712 | struct kvm_vcpu *vcpu, |
3657 | u32 *error) | 3713 | struct x86_exception *exception) |
3658 | { | 3714 | { |
3659 | void *data = val; | 3715 | void *data = val; |
3660 | int r = X86EMUL_CONTINUE; | 3716 | int r = X86EMUL_CONTINUE; |
@@ -3662,15 +3718,13 @@ static int kvm_write_guest_virt_system(gva_t addr, void *val, | |||
3662 | while (bytes) { | 3718 | while (bytes) { |
3663 | gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, | 3719 | gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, |
3664 | PFERR_WRITE_MASK, | 3720 | PFERR_WRITE_MASK, |
3665 | error); | 3721 | exception); |
3666 | unsigned offset = addr & (PAGE_SIZE-1); | 3722 | unsigned offset = addr & (PAGE_SIZE-1); |
3667 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); | 3723 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); |
3668 | int ret; | 3724 | int ret; |
3669 | 3725 | ||
3670 | if (gpa == UNMAPPED_GVA) { | 3726 | if (gpa == UNMAPPED_GVA) |
3671 | r = X86EMUL_PROPAGATE_FAULT; | 3727 | return X86EMUL_PROPAGATE_FAULT; |
3672 | goto out; | ||
3673 | } | ||
3674 | ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); | 3728 | ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); |
3675 | if (ret < 0) { | 3729 | if (ret < 0) { |
3676 | r = X86EMUL_IO_NEEDED; | 3730 | r = X86EMUL_IO_NEEDED; |
@@ -3688,7 +3742,7 @@ out: | |||
3688 | static int emulator_read_emulated(unsigned long addr, | 3742 | static int emulator_read_emulated(unsigned long addr, |
3689 | void *val, | 3743 | void *val, |
3690 | unsigned int bytes, | 3744 | unsigned int bytes, |
3691 | unsigned int *error_code, | 3745 | struct x86_exception *exception, |
3692 | struct kvm_vcpu *vcpu) | 3746 | struct kvm_vcpu *vcpu) |
3693 | { | 3747 | { |
3694 | gpa_t gpa; | 3748 | gpa_t gpa; |
@@ -3701,7 +3755,7 @@ static int emulator_read_emulated(unsigned long addr, | |||
3701 | return X86EMUL_CONTINUE; | 3755 | return X86EMUL_CONTINUE; |
3702 | } | 3756 | } |
3703 | 3757 | ||
3704 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code); | 3758 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, exception); |
3705 | 3759 | ||
3706 | if (gpa == UNMAPPED_GVA) | 3760 | if (gpa == UNMAPPED_GVA) |
3707 | return X86EMUL_PROPAGATE_FAULT; | 3761 | return X86EMUL_PROPAGATE_FAULT; |
@@ -3710,8 +3764,8 @@ static int emulator_read_emulated(unsigned long addr, | |||
3710 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3764 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
3711 | goto mmio; | 3765 | goto mmio; |
3712 | 3766 | ||
3713 | if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL) | 3767 | if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception) |
3714 | == X86EMUL_CONTINUE) | 3768 | == X86EMUL_CONTINUE) |
3715 | return X86EMUL_CONTINUE; | 3769 | return X86EMUL_CONTINUE; |
3716 | 3770 | ||
3717 | mmio: | 3771 | mmio: |
@@ -3735,7 +3789,7 @@ mmio: | |||
3735 | } | 3789 | } |
3736 | 3790 | ||
3737 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 3791 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
3738 | const void *val, int bytes) | 3792 | const void *val, int bytes) |
3739 | { | 3793 | { |
3740 | int ret; | 3794 | int ret; |
3741 | 3795 | ||
@@ -3749,12 +3803,12 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3749 | static int emulator_write_emulated_onepage(unsigned long addr, | 3803 | static int emulator_write_emulated_onepage(unsigned long addr, |
3750 | const void *val, | 3804 | const void *val, |
3751 | unsigned int bytes, | 3805 | unsigned int bytes, |
3752 | unsigned int *error_code, | 3806 | struct x86_exception *exception, |
3753 | struct kvm_vcpu *vcpu) | 3807 | struct kvm_vcpu *vcpu) |
3754 | { | 3808 | { |
3755 | gpa_t gpa; | 3809 | gpa_t gpa; |
3756 | 3810 | ||
3757 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code); | 3811 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); |
3758 | 3812 | ||
3759 | if (gpa == UNMAPPED_GVA) | 3813 | if (gpa == UNMAPPED_GVA) |
3760 | return X86EMUL_PROPAGATE_FAULT; | 3814 | return X86EMUL_PROPAGATE_FAULT; |
@@ -3787,7 +3841,7 @@ mmio: | |||
3787 | int emulator_write_emulated(unsigned long addr, | 3841 | int emulator_write_emulated(unsigned long addr, |
3788 | const void *val, | 3842 | const void *val, |
3789 | unsigned int bytes, | 3843 | unsigned int bytes, |
3790 | unsigned int *error_code, | 3844 | struct x86_exception *exception, |
3791 | struct kvm_vcpu *vcpu) | 3845 | struct kvm_vcpu *vcpu) |
3792 | { | 3846 | { |
3793 | /* Crossing a page boundary? */ | 3847 | /* Crossing a page boundary? */ |
@@ -3795,7 +3849,7 @@ int emulator_write_emulated(unsigned long addr, | |||
3795 | int rc, now; | 3849 | int rc, now; |
3796 | 3850 | ||
3797 | now = -addr & ~PAGE_MASK; | 3851 | now = -addr & ~PAGE_MASK; |
3798 | rc = emulator_write_emulated_onepage(addr, val, now, error_code, | 3852 | rc = emulator_write_emulated_onepage(addr, val, now, exception, |
3799 | vcpu); | 3853 | vcpu); |
3800 | if (rc != X86EMUL_CONTINUE) | 3854 | if (rc != X86EMUL_CONTINUE) |
3801 | return rc; | 3855 | return rc; |
@@ -3803,7 +3857,7 @@ int emulator_write_emulated(unsigned long addr, | |||
3803 | val += now; | 3857 | val += now; |
3804 | bytes -= now; | 3858 | bytes -= now; |
3805 | } | 3859 | } |
3806 | return emulator_write_emulated_onepage(addr, val, bytes, error_code, | 3860 | return emulator_write_emulated_onepage(addr, val, bytes, exception, |
3807 | vcpu); | 3861 | vcpu); |
3808 | } | 3862 | } |
3809 | 3863 | ||
@@ -3821,7 +3875,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
3821 | const void *old, | 3875 | const void *old, |
3822 | const void *new, | 3876 | const void *new, |
3823 | unsigned int bytes, | 3877 | unsigned int bytes, |
3824 | unsigned int *error_code, | 3878 | struct x86_exception *exception, |
3825 | struct kvm_vcpu *vcpu) | 3879 | struct kvm_vcpu *vcpu) |
3826 | { | 3880 | { |
3827 | gpa_t gpa; | 3881 | gpa_t gpa; |
@@ -3879,7 +3933,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
3879 | emul_write: | 3933 | emul_write: |
3880 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); | 3934 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); |
3881 | 3935 | ||
3882 | return emulator_write_emulated(addr, new, bytes, error_code, vcpu); | 3936 | return emulator_write_emulated(addr, new, bytes, exception, vcpu); |
3883 | } | 3937 | } |
3884 | 3938 | ||
3885 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | 3939 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) |
@@ -3904,7 +3958,7 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val, | |||
3904 | if (vcpu->arch.pio.count) | 3958 | if (vcpu->arch.pio.count) |
3905 | goto data_avail; | 3959 | goto data_avail; |
3906 | 3960 | ||
3907 | trace_kvm_pio(0, port, size, 1); | 3961 | trace_kvm_pio(0, port, size, count); |
3908 | 3962 | ||
3909 | vcpu->arch.pio.port = port; | 3963 | vcpu->arch.pio.port = port; |
3910 | vcpu->arch.pio.in = 1; | 3964 | vcpu->arch.pio.in = 1; |
@@ -3932,7 +3986,7 @@ static int emulator_pio_out_emulated(int size, unsigned short port, | |||
3932 | const void *val, unsigned int count, | 3986 | const void *val, unsigned int count, |
3933 | struct kvm_vcpu *vcpu) | 3987 | struct kvm_vcpu *vcpu) |
3934 | { | 3988 | { |
3935 | trace_kvm_pio(1, port, size, 1); | 3989 | trace_kvm_pio(1, port, size, count); |
3936 | 3990 | ||
3937 | vcpu->arch.pio.port = port; | 3991 | vcpu->arch.pio.port = port; |
3938 | vcpu->arch.pio.in = 0; | 3992 | vcpu->arch.pio.in = 0; |
@@ -3973,13 +4027,15 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) | |||
3973 | return X86EMUL_CONTINUE; | 4027 | return X86EMUL_CONTINUE; |
3974 | 4028 | ||
3975 | if (kvm_x86_ops->has_wbinvd_exit()) { | 4029 | if (kvm_x86_ops->has_wbinvd_exit()) { |
3976 | preempt_disable(); | 4030 | int cpu = get_cpu(); |
4031 | |||
4032 | cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); | ||
3977 | smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, | 4033 | smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, |
3978 | wbinvd_ipi, NULL, 1); | 4034 | wbinvd_ipi, NULL, 1); |
3979 | preempt_enable(); | 4035 | put_cpu(); |
3980 | cpumask_clear(vcpu->arch.wbinvd_dirty_mask); | 4036 | cpumask_clear(vcpu->arch.wbinvd_dirty_mask); |
3981 | } | 4037 | } else |
3982 | wbinvd(); | 4038 | wbinvd(); |
3983 | return X86EMUL_CONTINUE; | 4039 | return X86EMUL_CONTINUE; |
3984 | } | 4040 | } |
3985 | EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); | 4041 | EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); |
@@ -4019,7 +4075,7 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | |||
4019 | value = vcpu->arch.cr2; | 4075 | value = vcpu->arch.cr2; |
4020 | break; | 4076 | break; |
4021 | case 3: | 4077 | case 3: |
4022 | value = vcpu->arch.cr3; | 4078 | value = kvm_read_cr3(vcpu); |
4023 | break; | 4079 | break; |
4024 | case 4: | 4080 | case 4: |
4025 | value = kvm_read_cr4(vcpu); | 4081 | value = kvm_read_cr4(vcpu); |
@@ -4053,7 +4109,7 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | |||
4053 | res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | 4109 | res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); |
4054 | break; | 4110 | break; |
4055 | case 8: | 4111 | case 8: |
4056 | res = __kvm_set_cr8(vcpu, val & 0xfUL); | 4112 | res = kvm_set_cr8(vcpu, val); |
4057 | break; | 4113 | break; |
4058 | default: | 4114 | default: |
4059 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | 4115 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); |
@@ -4206,12 +4262,13 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) | |||
4206 | static void inject_emulated_exception(struct kvm_vcpu *vcpu) | 4262 | static void inject_emulated_exception(struct kvm_vcpu *vcpu) |
4207 | { | 4263 | { |
4208 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | 4264 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
4209 | if (ctxt->exception == PF_VECTOR) | 4265 | if (ctxt->exception.vector == PF_VECTOR) |
4210 | kvm_propagate_fault(vcpu); | 4266 | kvm_propagate_fault(vcpu, &ctxt->exception); |
4211 | else if (ctxt->error_code_valid) | 4267 | else if (ctxt->exception.error_code_valid) |
4212 | kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code); | 4268 | kvm_queue_exception_e(vcpu, ctxt->exception.vector, |
4269 | ctxt->exception.error_code); | ||
4213 | else | 4270 | else |
4214 | kvm_queue_exception(vcpu, ctxt->exception); | 4271 | kvm_queue_exception(vcpu, ctxt->exception.vector); |
4215 | } | 4272 | } |
4216 | 4273 | ||
4217 | static void init_emulate_ctxt(struct kvm_vcpu *vcpu) | 4274 | static void init_emulate_ctxt(struct kvm_vcpu *vcpu) |
@@ -4267,13 +4324,19 @@ EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); | |||
4267 | 4324 | ||
4268 | static int handle_emulation_failure(struct kvm_vcpu *vcpu) | 4325 | static int handle_emulation_failure(struct kvm_vcpu *vcpu) |
4269 | { | 4326 | { |
4327 | int r = EMULATE_DONE; | ||
4328 | |||
4270 | ++vcpu->stat.insn_emulation_fail; | 4329 | ++vcpu->stat.insn_emulation_fail; |
4271 | trace_kvm_emulate_insn_failed(vcpu); | 4330 | trace_kvm_emulate_insn_failed(vcpu); |
4272 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 4331 | if (!is_guest_mode(vcpu)) { |
4273 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 4332 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
4274 | vcpu->run->internal.ndata = 0; | 4333 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
4334 | vcpu->run->internal.ndata = 0; | ||
4335 | r = EMULATE_FAIL; | ||
4336 | } | ||
4275 | kvm_queue_exception(vcpu, UD_VECTOR); | 4337 | kvm_queue_exception(vcpu, UD_VECTOR); |
4276 | return EMULATE_FAIL; | 4338 | |
4339 | return r; | ||
4277 | } | 4340 | } |
4278 | 4341 | ||
4279 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | 4342 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) |
@@ -4302,10 +4365,11 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | |||
4302 | return false; | 4365 | return false; |
4303 | } | 4366 | } |
4304 | 4367 | ||
4305 | int emulate_instruction(struct kvm_vcpu *vcpu, | 4368 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, |
4306 | unsigned long cr2, | 4369 | unsigned long cr2, |
4307 | u16 error_code, | 4370 | int emulation_type, |
4308 | int emulation_type) | 4371 | void *insn, |
4372 | int insn_len) | ||
4309 | { | 4373 | { |
4310 | int r; | 4374 | int r; |
4311 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | 4375 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; |
@@ -4323,10 +4387,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
4323 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 4387 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
4324 | init_emulate_ctxt(vcpu); | 4388 | init_emulate_ctxt(vcpu); |
4325 | vcpu->arch.emulate_ctxt.interruptibility = 0; | 4389 | vcpu->arch.emulate_ctxt.interruptibility = 0; |
4326 | vcpu->arch.emulate_ctxt.exception = -1; | 4390 | vcpu->arch.emulate_ctxt.have_exception = false; |
4327 | vcpu->arch.emulate_ctxt.perm_ok = false; | 4391 | vcpu->arch.emulate_ctxt.perm_ok = false; |
4328 | 4392 | ||
4329 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt); | 4393 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len); |
4330 | if (r == X86EMUL_PROPAGATE_FAULT) | 4394 | if (r == X86EMUL_PROPAGATE_FAULT) |
4331 | goto done; | 4395 | goto done; |
4332 | 4396 | ||
@@ -4389,7 +4453,7 @@ restart: | |||
4389 | } | 4453 | } |
4390 | 4454 | ||
4391 | done: | 4455 | done: |
4392 | if (vcpu->arch.emulate_ctxt.exception >= 0) { | 4456 | if (vcpu->arch.emulate_ctxt.have_exception) { |
4393 | inject_emulated_exception(vcpu); | 4457 | inject_emulated_exception(vcpu); |
4394 | r = EMULATE_DONE; | 4458 | r = EMULATE_DONE; |
4395 | } else if (vcpu->arch.pio.count) { | 4459 | } else if (vcpu->arch.pio.count) { |
@@ -4413,7 +4477,7 @@ done: | |||
4413 | 4477 | ||
4414 | return r; | 4478 | return r; |
4415 | } | 4479 | } |
4416 | EXPORT_SYMBOL_GPL(emulate_instruction); | 4480 | EXPORT_SYMBOL_GPL(x86_emulate_instruction); |
4417 | 4481 | ||
4418 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) | 4482 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) |
4419 | { | 4483 | { |
@@ -4427,7 +4491,7 @@ EXPORT_SYMBOL_GPL(kvm_fast_pio_out); | |||
4427 | 4491 | ||
4428 | static void tsc_bad(void *info) | 4492 | static void tsc_bad(void *info) |
4429 | { | 4493 | { |
4430 | __get_cpu_var(cpu_tsc_khz) = 0; | 4494 | __this_cpu_write(cpu_tsc_khz, 0); |
4431 | } | 4495 | } |
4432 | 4496 | ||
4433 | static void tsc_khz_changed(void *data) | 4497 | static void tsc_khz_changed(void *data) |
@@ -4441,7 +4505,7 @@ static void tsc_khz_changed(void *data) | |||
4441 | khz = cpufreq_quick_get(raw_smp_processor_id()); | 4505 | khz = cpufreq_quick_get(raw_smp_processor_id()); |
4442 | if (!khz) | 4506 | if (!khz) |
4443 | khz = tsc_khz; | 4507 | khz = tsc_khz; |
4444 | __get_cpu_var(cpu_tsc_khz) = khz; | 4508 | __this_cpu_write(cpu_tsc_khz, khz); |
4445 | } | 4509 | } |
4446 | 4510 | ||
4447 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | 4511 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, |
@@ -4653,7 +4717,6 @@ int kvm_arch_init(void *opaque) | |||
4653 | 4717 | ||
4654 | kvm_x86_ops = ops; | 4718 | kvm_x86_ops = ops; |
4655 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); | 4719 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); |
4656 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); | ||
4657 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 4720 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
4658 | PT_DIRTY_MASK, PT64_NX_MASK, 0); | 4721 | PT_DIRTY_MASK, PT64_NX_MASK, 0); |
4659 | 4722 | ||
@@ -5116,6 +5179,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5116 | vcpu->fpu_active = 0; | 5179 | vcpu->fpu_active = 0; |
5117 | kvm_x86_ops->fpu_deactivate(vcpu); | 5180 | kvm_x86_ops->fpu_deactivate(vcpu); |
5118 | } | 5181 | } |
5182 | if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { | ||
5183 | /* Page is swapped out. Do synthetic halt */ | ||
5184 | vcpu->arch.apf.halted = true; | ||
5185 | r = 1; | ||
5186 | goto out; | ||
5187 | } | ||
5119 | } | 5188 | } |
5120 | 5189 | ||
5121 | r = kvm_mmu_reload(vcpu); | 5190 | r = kvm_mmu_reload(vcpu); |
@@ -5244,7 +5313,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5244 | 5313 | ||
5245 | r = 1; | 5314 | r = 1; |
5246 | while (r > 0) { | 5315 | while (r > 0) { |
5247 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | 5316 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
5317 | !vcpu->arch.apf.halted) | ||
5248 | r = vcpu_enter_guest(vcpu); | 5318 | r = vcpu_enter_guest(vcpu); |
5249 | else { | 5319 | else { |
5250 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 5320 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
@@ -5257,6 +5327,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5257 | vcpu->arch.mp_state = | 5327 | vcpu->arch.mp_state = |
5258 | KVM_MP_STATE_RUNNABLE; | 5328 | KVM_MP_STATE_RUNNABLE; |
5259 | case KVM_MP_STATE_RUNNABLE: | 5329 | case KVM_MP_STATE_RUNNABLE: |
5330 | vcpu->arch.apf.halted = false; | ||
5260 | break; | 5331 | break; |
5261 | case KVM_MP_STATE_SIPI_RECEIVED: | 5332 | case KVM_MP_STATE_SIPI_RECEIVED: |
5262 | default: | 5333 | default: |
@@ -5278,6 +5349,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5278 | vcpu->run->exit_reason = KVM_EXIT_INTR; | 5349 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
5279 | ++vcpu->stat.request_irq_exits; | 5350 | ++vcpu->stat.request_irq_exits; |
5280 | } | 5351 | } |
5352 | |||
5353 | kvm_check_async_pf_completion(vcpu); | ||
5354 | |||
5281 | if (signal_pending(current)) { | 5355 | if (signal_pending(current)) { |
5282 | r = -EINTR; | 5356 | r = -EINTR; |
5283 | vcpu->run->exit_reason = KVM_EXIT_INTR; | 5357 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
@@ -5302,6 +5376,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
5302 | int r; | 5376 | int r; |
5303 | sigset_t sigsaved; | 5377 | sigset_t sigsaved; |
5304 | 5378 | ||
5379 | if (!tsk_used_math(current) && init_fpu(current)) | ||
5380 | return -ENOMEM; | ||
5381 | |||
5305 | if (vcpu->sigset_active) | 5382 | if (vcpu->sigset_active) |
5306 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 5383 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
5307 | 5384 | ||
@@ -5313,8 +5390,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
5313 | } | 5390 | } |
5314 | 5391 | ||
5315 | /* re-sync apic's tpr */ | 5392 | /* re-sync apic's tpr */ |
5316 | if (!irqchip_in_kernel(vcpu->kvm)) | 5393 | if (!irqchip_in_kernel(vcpu->kvm)) { |
5317 | kvm_set_cr8(vcpu, kvm_run->cr8); | 5394 | if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { |
5395 | r = -EINVAL; | ||
5396 | goto out; | ||
5397 | } | ||
5398 | } | ||
5318 | 5399 | ||
5319 | if (vcpu->arch.pio.count || vcpu->mmio_needed) { | 5400 | if (vcpu->arch.pio.count || vcpu->mmio_needed) { |
5320 | if (vcpu->mmio_needed) { | 5401 | if (vcpu->mmio_needed) { |
@@ -5323,7 +5404,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
5323 | vcpu->mmio_needed = 0; | 5404 | vcpu->mmio_needed = 0; |
5324 | } | 5405 | } |
5325 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 5406 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
5326 | r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); | 5407 | r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); |
5327 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 5408 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
5328 | if (r != EMULATE_DONE) { | 5409 | if (r != EMULATE_DONE) { |
5329 | r = 0; | 5410 | r = 0; |
@@ -5436,7 +5517,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
5436 | 5517 | ||
5437 | sregs->cr0 = kvm_read_cr0(vcpu); | 5518 | sregs->cr0 = kvm_read_cr0(vcpu); |
5438 | sregs->cr2 = vcpu->arch.cr2; | 5519 | sregs->cr2 = vcpu->arch.cr2; |
5439 | sregs->cr3 = vcpu->arch.cr3; | 5520 | sregs->cr3 = kvm_read_cr3(vcpu); |
5440 | sregs->cr4 = kvm_read_cr4(vcpu); | 5521 | sregs->cr4 = kvm_read_cr4(vcpu); |
5441 | sregs->cr8 = kvm_get_cr8(vcpu); | 5522 | sregs->cr8 = kvm_get_cr8(vcpu); |
5442 | sregs->efer = vcpu->arch.efer; | 5523 | sregs->efer = vcpu->arch.efer; |
@@ -5504,8 +5585,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5504 | kvm_x86_ops->set_gdt(vcpu, &dt); | 5585 | kvm_x86_ops->set_gdt(vcpu, &dt); |
5505 | 5586 | ||
5506 | vcpu->arch.cr2 = sregs->cr2; | 5587 | vcpu->arch.cr2 = sregs->cr2; |
5507 | mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; | 5588 | mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3; |
5508 | vcpu->arch.cr3 = sregs->cr3; | 5589 | vcpu->arch.cr3 = sregs->cr3; |
5590 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||
5509 | 5591 | ||
5510 | kvm_set_cr8(vcpu, sregs->cr8); | 5592 | kvm_set_cr8(vcpu, sregs->cr8); |
5511 | 5593 | ||
@@ -5522,7 +5604,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5522 | if (sregs->cr4 & X86_CR4_OSXSAVE) | 5604 | if (sregs->cr4 & X86_CR4_OSXSAVE) |
5523 | update_cpuid(vcpu); | 5605 | update_cpuid(vcpu); |
5524 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { | 5606 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
5525 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); | 5607 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); |
5526 | mmu_reset_needed = 1; | 5608 | mmu_reset_needed = 1; |
5527 | } | 5609 | } |
5528 | 5610 | ||
@@ -5773,6 +5855,8 @@ free_vcpu: | |||
5773 | 5855 | ||
5774 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | 5856 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) |
5775 | { | 5857 | { |
5858 | vcpu->arch.apf.msr_val = 0; | ||
5859 | |||
5776 | vcpu_load(vcpu); | 5860 | vcpu_load(vcpu); |
5777 | kvm_mmu_unload(vcpu); | 5861 | kvm_mmu_unload(vcpu); |
5778 | vcpu_put(vcpu); | 5862 | vcpu_put(vcpu); |
@@ -5792,6 +5876,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
5792 | vcpu->arch.dr7 = DR7_FIXED_1; | 5876 | vcpu->arch.dr7 = DR7_FIXED_1; |
5793 | 5877 | ||
5794 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5878 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5879 | vcpu->arch.apf.msr_val = 0; | ||
5880 | |||
5881 | kvm_clear_async_pf_completion_queue(vcpu); | ||
5882 | kvm_async_pf_hash_reset(vcpu); | ||
5883 | vcpu->arch.apf.halted = false; | ||
5795 | 5884 | ||
5796 | return kvm_x86_ops->vcpu_reset(vcpu); | 5885 | return kvm_x86_ops->vcpu_reset(vcpu); |
5797 | } | 5886 | } |
@@ -5881,6 +5970,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
5881 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) | 5970 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) |
5882 | goto fail_free_mce_banks; | 5971 | goto fail_free_mce_banks; |
5883 | 5972 | ||
5973 | kvm_async_pf_hash_reset(vcpu); | ||
5974 | |||
5884 | return 0; | 5975 | return 0; |
5885 | fail_free_mce_banks: | 5976 | fail_free_mce_banks: |
5886 | kfree(vcpu->arch.mce_banks); | 5977 | kfree(vcpu->arch.mce_banks); |
@@ -5906,13 +5997,8 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
5906 | free_page((unsigned long)vcpu->arch.pio_data); | 5997 | free_page((unsigned long)vcpu->arch.pio_data); |
5907 | } | 5998 | } |
5908 | 5999 | ||
5909 | struct kvm *kvm_arch_create_vm(void) | 6000 | int kvm_arch_init_vm(struct kvm *kvm) |
5910 | { | 6001 | { |
5911 | struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); | ||
5912 | |||
5913 | if (!kvm) | ||
5914 | return ERR_PTR(-ENOMEM); | ||
5915 | |||
5916 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 6002 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
5917 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 6003 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
5918 | 6004 | ||
@@ -5921,7 +6007,7 @@ struct kvm *kvm_arch_create_vm(void) | |||
5921 | 6007 | ||
5922 | spin_lock_init(&kvm->arch.tsc_write_lock); | 6008 | spin_lock_init(&kvm->arch.tsc_write_lock); |
5923 | 6009 | ||
5924 | return kvm; | 6010 | return 0; |
5925 | } | 6011 | } |
5926 | 6012 | ||
5927 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) | 6013 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) |
@@ -5939,8 +6025,10 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
5939 | /* | 6025 | /* |
5940 | * Unpin any mmu pages first. | 6026 | * Unpin any mmu pages first. |
5941 | */ | 6027 | */ |
5942 | kvm_for_each_vcpu(i, vcpu, kvm) | 6028 | kvm_for_each_vcpu(i, vcpu, kvm) { |
6029 | kvm_clear_async_pf_completion_queue(vcpu); | ||
5943 | kvm_unload_vcpu_mmu(vcpu); | 6030 | kvm_unload_vcpu_mmu(vcpu); |
6031 | } | ||
5944 | kvm_for_each_vcpu(i, vcpu, kvm) | 6032 | kvm_for_each_vcpu(i, vcpu, kvm) |
5945 | kvm_arch_vcpu_free(vcpu); | 6033 | kvm_arch_vcpu_free(vcpu); |
5946 | 6034 | ||
@@ -5964,13 +6052,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
5964 | kfree(kvm->arch.vpic); | 6052 | kfree(kvm->arch.vpic); |
5965 | kfree(kvm->arch.vioapic); | 6053 | kfree(kvm->arch.vioapic); |
5966 | kvm_free_vcpus(kvm); | 6054 | kvm_free_vcpus(kvm); |
5967 | kvm_free_physmem(kvm); | ||
5968 | if (kvm->arch.apic_access_page) | 6055 | if (kvm->arch.apic_access_page) |
5969 | put_page(kvm->arch.apic_access_page); | 6056 | put_page(kvm->arch.apic_access_page); |
5970 | if (kvm->arch.ept_identity_pagetable) | 6057 | if (kvm->arch.ept_identity_pagetable) |
5971 | put_page(kvm->arch.ept_identity_pagetable); | 6058 | put_page(kvm->arch.ept_identity_pagetable); |
5972 | cleanup_srcu_struct(&kvm->srcu); | ||
5973 | kfree(kvm); | ||
5974 | } | 6059 | } |
5975 | 6060 | ||
5976 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 6061 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
@@ -6051,7 +6136,9 @@ void kvm_arch_flush_shadow(struct kvm *kvm) | |||
6051 | 6136 | ||
6052 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 6137 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
6053 | { | 6138 | { |
6054 | return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE | 6139 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
6140 | !vcpu->arch.apf.halted) | ||
6141 | || !list_empty_careful(&vcpu->async_pf.done) | ||
6055 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED | 6142 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED |
6056 | || vcpu->arch.nmi_pending || | 6143 | || vcpu->arch.nmi_pending || |
6057 | (kvm_arch_interrupt_allowed(vcpu) && | 6144 | (kvm_arch_interrupt_allowed(vcpu) && |
@@ -6110,6 +6197,147 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | |||
6110 | } | 6197 | } |
6111 | EXPORT_SYMBOL_GPL(kvm_set_rflags); | 6198 | EXPORT_SYMBOL_GPL(kvm_set_rflags); |
6112 | 6199 | ||
6200 | void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) | ||
6201 | { | ||
6202 | int r; | ||
6203 | |||
6204 | if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || | ||
6205 | is_error_page(work->page)) | ||
6206 | return; | ||
6207 | |||
6208 | r = kvm_mmu_reload(vcpu); | ||
6209 | if (unlikely(r)) | ||
6210 | return; | ||
6211 | |||
6212 | if (!vcpu->arch.mmu.direct_map && | ||
6213 | work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu)) | ||
6214 | return; | ||
6215 | |||
6216 | vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true); | ||
6217 | } | ||
6218 | |||
6219 | static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) | ||
6220 | { | ||
6221 | return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU)); | ||
6222 | } | ||
6223 | |||
6224 | static inline u32 kvm_async_pf_next_probe(u32 key) | ||
6225 | { | ||
6226 | return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1); | ||
6227 | } | ||
6228 | |||
6229 | static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6230 | { | ||
6231 | u32 key = kvm_async_pf_hash_fn(gfn); | ||
6232 | |||
6233 | while (vcpu->arch.apf.gfns[key] != ~0) | ||
6234 | key = kvm_async_pf_next_probe(key); | ||
6235 | |||
6236 | vcpu->arch.apf.gfns[key] = gfn; | ||
6237 | } | ||
6238 | |||
6239 | static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6240 | { | ||
6241 | int i; | ||
6242 | u32 key = kvm_async_pf_hash_fn(gfn); | ||
6243 | |||
6244 | for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) && | ||
6245 | (vcpu->arch.apf.gfns[key] != gfn && | ||
6246 | vcpu->arch.apf.gfns[key] != ~0); i++) | ||
6247 | key = kvm_async_pf_next_probe(key); | ||
6248 | |||
6249 | return key; | ||
6250 | } | ||
6251 | |||
6252 | bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6253 | { | ||
6254 | return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn; | ||
6255 | } | ||
6256 | |||
6257 | static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6258 | { | ||
6259 | u32 i, j, k; | ||
6260 | |||
6261 | i = j = kvm_async_pf_gfn_slot(vcpu, gfn); | ||
6262 | while (true) { | ||
6263 | vcpu->arch.apf.gfns[i] = ~0; | ||
6264 | do { | ||
6265 | j = kvm_async_pf_next_probe(j); | ||
6266 | if (vcpu->arch.apf.gfns[j] == ~0) | ||
6267 | return; | ||
6268 | k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]); | ||
6269 | /* | ||
6270 | * k lies cyclically in ]i,j] | ||
6271 | * | i.k.j | | ||
6272 | * |....j i.k.| or |.k..j i...| | ||
6273 | */ | ||
6274 | } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j)); | ||
6275 | vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j]; | ||
6276 | i = j; | ||
6277 | } | ||
6278 | } | ||
6279 | |||
6280 | static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) | ||
6281 | { | ||
6282 | |||
6283 | return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val, | ||
6284 | sizeof(val)); | ||
6285 | } | ||
6286 | |||
6287 | void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, | ||
6288 | struct kvm_async_pf *work) | ||
6289 | { | ||
6290 | struct x86_exception fault; | ||
6291 | |||
6292 | trace_kvm_async_pf_not_present(work->arch.token, work->gva); | ||
6293 | kvm_add_async_pf_gfn(vcpu, work->arch.gfn); | ||
6294 | |||
6295 | if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) || | ||
6296 | (vcpu->arch.apf.send_user_only && | ||
6297 | kvm_x86_ops->get_cpl(vcpu) == 0)) | ||
6298 | kvm_make_request(KVM_REQ_APF_HALT, vcpu); | ||
6299 | else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) { | ||
6300 | fault.vector = PF_VECTOR; | ||
6301 | fault.error_code_valid = true; | ||
6302 | fault.error_code = 0; | ||
6303 | fault.nested_page_fault = false; | ||
6304 | fault.address = work->arch.token; | ||
6305 | kvm_inject_page_fault(vcpu, &fault); | ||
6306 | } | ||
6307 | } | ||
6308 | |||
6309 | void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, | ||
6310 | struct kvm_async_pf *work) | ||
6311 | { | ||
6312 | struct x86_exception fault; | ||
6313 | |||
6314 | trace_kvm_async_pf_ready(work->arch.token, work->gva); | ||
6315 | if (is_error_page(work->page)) | ||
6316 | work->arch.token = ~0; /* broadcast wakeup */ | ||
6317 | else | ||
6318 | kvm_del_async_pf_gfn(vcpu, work->arch.gfn); | ||
6319 | |||
6320 | if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) && | ||
6321 | !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { | ||
6322 | fault.vector = PF_VECTOR; | ||
6323 | fault.error_code_valid = true; | ||
6324 | fault.error_code = 0; | ||
6325 | fault.nested_page_fault = false; | ||
6326 | fault.address = work->arch.token; | ||
6327 | kvm_inject_page_fault(vcpu, &fault); | ||
6328 | } | ||
6329 | vcpu->arch.apf.halted = false; | ||
6330 | } | ||
6331 | |||
6332 | bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) | ||
6333 | { | ||
6334 | if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED)) | ||
6335 | return true; | ||
6336 | else | ||
6337 | return !kvm_event_needs_reinjection(vcpu) && | ||
6338 | kvm_x86_ops->interrupt_allowed(vcpu); | ||
6339 | } | ||
6340 | |||
6113 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); | 6341 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); |
6114 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); | 6342 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); |
6115 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); | 6343 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); |
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index ff485d361182..fc45ba887d05 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c | |||
@@ -121,7 +121,7 @@ inline void __const_udelay(unsigned long xloops) | |||
121 | asm("mull %%edx" | 121 | asm("mull %%edx" |
122 | :"=d" (xloops), "=&a" (d0) | 122 | :"=d" (xloops), "=&a" (d0) |
123 | :"1" (xloops), "0" | 123 | :"1" (xloops), "0" |
124 | (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); | 124 | (this_cpu_read(cpu_info.loops_per_jiffy) * (HZ/4))); |
125 | 125 | ||
126 | __delay(++xloops); | 126 | __delay(++xloops); |
127 | } | 127 | } |
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 738e6593799d..dbe34b931374 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
9 | #include <linux/vmstat.h> | 9 | #include <linux/vmstat.h> |
10 | #include <linux/highmem.h> | 10 | #include <linux/highmem.h> |
11 | #include <linux/swap.h> | ||
11 | 12 | ||
12 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
13 | 14 | ||
@@ -89,6 +90,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | |||
89 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | 90 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); |
90 | page = pte_page(pte); | 91 | page = pte_page(pte); |
91 | get_page(page); | 92 | get_page(page); |
93 | SetPageReferenced(page); | ||
92 | pages[*nr] = page; | 94 | pages[*nr] = page; |
93 | (*nr)++; | 95 | (*nr)++; |
94 | 96 | ||
@@ -103,6 +105,17 @@ static inline void get_head_page_multiple(struct page *page, int nr) | |||
103 | VM_BUG_ON(page != compound_head(page)); | 105 | VM_BUG_ON(page != compound_head(page)); |
104 | VM_BUG_ON(page_count(page) == 0); | 106 | VM_BUG_ON(page_count(page) == 0); |
105 | atomic_add(nr, &page->_count); | 107 | atomic_add(nr, &page->_count); |
108 | SetPageReferenced(page); | ||
109 | } | ||
110 | |||
111 | static inline void get_huge_page_tail(struct page *page) | ||
112 | { | ||
113 | /* | ||
114 | * __split_huge_page_refcount() cannot run | ||
115 | * from under us. | ||
116 | */ | ||
117 | VM_BUG_ON(atomic_read(&page->_count) < 0); | ||
118 | atomic_inc(&page->_count); | ||
106 | } | 119 | } |
107 | 120 | ||
108 | static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, | 121 | static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, |
@@ -128,6 +141,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, | |||
128 | do { | 141 | do { |
129 | VM_BUG_ON(compound_head(page) != head); | 142 | VM_BUG_ON(compound_head(page) != head); |
130 | pages[*nr] = page; | 143 | pages[*nr] = page; |
144 | if (PageTail(page)) | ||
145 | get_huge_page_tail(page); | ||
131 | (*nr)++; | 146 | (*nr)++; |
132 | page++; | 147 | page++; |
133 | refs++; | 148 | refs++; |
@@ -148,7 +163,18 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | |||
148 | pmd_t pmd = *pmdp; | 163 | pmd_t pmd = *pmdp; |
149 | 164 | ||
150 | next = pmd_addr_end(addr, end); | 165 | next = pmd_addr_end(addr, end); |
151 | if (pmd_none(pmd)) | 166 | /* |
167 | * The pmd_trans_splitting() check below explains why | ||
168 | * pmdp_splitting_flush has to flush the tlb, to stop | ||
169 | * this gup-fast code from running while we set the | ||
170 | * splitting bit in the pmd. Returning zero will take | ||
171 | * the slow path that will call wait_split_huge_page() | ||
172 | * if the pmd is still in splitting state. gup-fast | ||
173 | * can't because it has irq disabled and | ||
174 | * wait_split_huge_page() would never return as the | ||
175 | * tlb flush IPI wouldn't run. | ||
176 | */ | ||
177 | if (pmd_none(pmd) || pmd_trans_splitting(pmd)) | ||
152 | return 0; | 178 | return 0; |
153 | if (unlikely(pmd_large(pmd))) { | 179 | if (unlikely(pmd_large(pmd))) { |
154 | if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) | 180 | if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index f89b5bb4e93f..c821074b7f0b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <asm/bugs.h> | 45 | #include <asm/bugs.h> |
46 | #include <asm/tlb.h> | 46 | #include <asm/tlb.h> |
47 | #include <asm/tlbflush.h> | 47 | #include <asm/tlbflush.h> |
48 | #include <asm/olpc_ofw.h> | ||
48 | #include <asm/pgalloc.h> | 49 | #include <asm/pgalloc.h> |
49 | #include <asm/sections.h> | 50 | #include <asm/sections.h> |
50 | #include <asm/paravirt.h> | 51 | #include <asm/paravirt.h> |
@@ -715,6 +716,7 @@ void __init paging_init(void) | |||
715 | /* | 716 | /* |
716 | * NOTE: at this point the bootmem allocator is fully available. | 717 | * NOTE: at this point the bootmem allocator is fully available. |
717 | */ | 718 | */ |
719 | olpc_dt_build_devicetree(); | ||
718 | sparse_init(); | 720 | sparse_init(); |
719 | zone_sizes_init(); | 721 | zone_sizes_init(); |
720 | } | 722 | } |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 8be8c7d7bc89..500242d3c96d 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -320,6 +320,25 @@ int ptep_set_access_flags(struct vm_area_struct *vma, | |||
320 | return changed; | 320 | return changed; |
321 | } | 321 | } |
322 | 322 | ||
323 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
324 | int pmdp_set_access_flags(struct vm_area_struct *vma, | ||
325 | unsigned long address, pmd_t *pmdp, | ||
326 | pmd_t entry, int dirty) | ||
327 | { | ||
328 | int changed = !pmd_same(*pmdp, entry); | ||
329 | |||
330 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
331 | |||
332 | if (changed && dirty) { | ||
333 | *pmdp = entry; | ||
334 | pmd_update_defer(vma->vm_mm, address, pmdp); | ||
335 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
336 | } | ||
337 | |||
338 | return changed; | ||
339 | } | ||
340 | #endif | ||
341 | |||
323 | int ptep_test_and_clear_young(struct vm_area_struct *vma, | 342 | int ptep_test_and_clear_young(struct vm_area_struct *vma, |
324 | unsigned long addr, pte_t *ptep) | 343 | unsigned long addr, pte_t *ptep) |
325 | { | 344 | { |
@@ -335,6 +354,23 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, | |||
335 | return ret; | 354 | return ret; |
336 | } | 355 | } |
337 | 356 | ||
357 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
358 | int pmdp_test_and_clear_young(struct vm_area_struct *vma, | ||
359 | unsigned long addr, pmd_t *pmdp) | ||
360 | { | ||
361 | int ret = 0; | ||
362 | |||
363 | if (pmd_young(*pmdp)) | ||
364 | ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, | ||
365 | (unsigned long *)pmdp); | ||
366 | |||
367 | if (ret) | ||
368 | pmd_update(vma->vm_mm, addr, pmdp); | ||
369 | |||
370 | return ret; | ||
371 | } | ||
372 | #endif | ||
373 | |||
338 | int ptep_clear_flush_young(struct vm_area_struct *vma, | 374 | int ptep_clear_flush_young(struct vm_area_struct *vma, |
339 | unsigned long address, pte_t *ptep) | 375 | unsigned long address, pte_t *ptep) |
340 | { | 376 | { |
@@ -347,6 +383,36 @@ int ptep_clear_flush_young(struct vm_area_struct *vma, | |||
347 | return young; | 383 | return young; |
348 | } | 384 | } |
349 | 385 | ||
386 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
387 | int pmdp_clear_flush_young(struct vm_area_struct *vma, | ||
388 | unsigned long address, pmd_t *pmdp) | ||
389 | { | ||
390 | int young; | ||
391 | |||
392 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
393 | |||
394 | young = pmdp_test_and_clear_young(vma, address, pmdp); | ||
395 | if (young) | ||
396 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
397 | |||
398 | return young; | ||
399 | } | ||
400 | |||
401 | void pmdp_splitting_flush(struct vm_area_struct *vma, | ||
402 | unsigned long address, pmd_t *pmdp) | ||
403 | { | ||
404 | int set; | ||
405 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
406 | set = !test_and_set_bit(_PAGE_BIT_SPLITTING, | ||
407 | (unsigned long *)pmdp); | ||
408 | if (set) { | ||
409 | pmd_update(vma->vm_mm, address, pmdp); | ||
410 | /* need tlb flush only to serialize against gup-fast */ | ||
411 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
412 | } | ||
413 | } | ||
414 | #endif | ||
415 | |||
350 | /** | 416 | /** |
351 | * reserve_top_address - reserves a hole in the top of kernel address space | 417 | * reserve_top_address - reserves a hole in the top of kernel address space |
352 | * @reserve - size of hole to reserve | 418 | * @reserve - size of hole to reserve |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 358c8b9c96a7..e2b7b0c06cdf 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -65,7 +65,6 @@ static int profile_exceptions_notify(struct notifier_block *self, | |||
65 | 65 | ||
66 | switch (val) { | 66 | switch (val) { |
67 | case DIE_NMI: | 67 | case DIE_NMI: |
68 | case DIE_NMI_IPI: | ||
69 | if (ctr_running) | 68 | if (ctr_running) |
70 | model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); | 69 | model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); |
71 | else if (!nmi_enabled) | 70 | else if (!nmi_enabled) |
@@ -143,7 +142,7 @@ static inline int has_mux(void) | |||
143 | 142 | ||
144 | inline int op_x86_phys_to_virt(int phys) | 143 | inline int op_x86_phys_to_virt(int phys) |
145 | { | 144 | { |
146 | return __get_cpu_var(switch_index) + phys; | 145 | return __this_cpu_read(switch_index) + phys; |
147 | } | 146 | } |
148 | 147 | ||
149 | inline int op_x86_virt_to_phys(int virt) | 148 | inline int op_x86_virt_to_phys(int virt) |
@@ -361,7 +360,7 @@ static void nmi_cpu_setup(void *dummy) | |||
361 | static struct notifier_block profile_exceptions_nb = { | 360 | static struct notifier_block profile_exceptions_nb = { |
362 | .notifier_call = profile_exceptions_notify, | 361 | .notifier_call = profile_exceptions_notify, |
363 | .next = NULL, | 362 | .next = NULL, |
364 | .priority = 2 | 363 | .priority = NMI_LOCAL_LOW_PRIOR, |
365 | }; | 364 | }; |
366 | 365 | ||
367 | static void nmi_cpu_restore_registers(struct op_msrs *msrs) | 366 | static void nmi_cpu_restore_registers(struct op_msrs *msrs) |
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c index 0636dd93cef8..720bf5a53c51 100644 --- a/arch/x86/oprofile/nmi_timer_int.c +++ b/arch/x86/oprofile/nmi_timer_int.c | |||
@@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self, | |||
38 | static struct notifier_block profile_timer_exceptions_nb = { | 38 | static struct notifier_block profile_timer_exceptions_nb = { |
39 | .notifier_call = profile_timer_exceptions_notify, | 39 | .notifier_call = profile_timer_exceptions_notify, |
40 | .next = NULL, | 40 | .next = NULL, |
41 | .priority = 0 | 41 | .priority = NMI_LOW_PRIOR, |
42 | }; | 42 | }; |
43 | 43 | ||
44 | static int timer_start(void) | 44 | static int timer_start(void) |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index d769cda54082..94b745045e45 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
@@ -95,8 +95,8 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, | |||
95 | * counter width: | 95 | * counter width: |
96 | */ | 96 | */ |
97 | if (!(eax.split.version_id == 0 && | 97 | if (!(eax.split.version_id == 0 && |
98 | current_cpu_data.x86 == 6 && | 98 | __this_cpu_read(cpu_info.x86) == 6 && |
99 | current_cpu_data.x86_model == 15)) { | 99 | __this_cpu_read(cpu_info.x86_model) == 15)) { |
100 | 100 | ||
101 | if (counter_width < eax.split.bit_width) | 101 | if (counter_width < eax.split.bit_width) |
102 | counter_width = eax.split.bit_width; | 102 | counter_width = eax.split.bit_width; |
@@ -235,8 +235,8 @@ static void arch_perfmon_setup_counters(void) | |||
235 | eax.full = cpuid_eax(0xa); | 235 | eax.full = cpuid_eax(0xa); |
236 | 236 | ||
237 | /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ | 237 | /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ |
238 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && | 238 | if (eax.split.version_id == 0 && __this_cpu_read(cpu_info.x86) == 6 && |
239 | current_cpu_data.x86_model == 15) { | 239 | __this_cpu_read(cpu_info.x86_model) == 15) { |
240 | eax.split.version_id = 2; | 240 | eax.split.version_id = 2; |
241 | eax.split.num_counters = 2; | 241 | eax.split.num_counters = 2; |
242 | eax.split.bit_width = 40; | 242 | eax.split.bit_width = 40; |
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index 0846a5bbbfbd..ab8269b0da29 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c | |||
@@ -9,6 +9,7 @@ | |||
9 | * option) any later version. | 9 | * option) any later version. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/acpi.h> | ||
12 | #include <linux/delay.h> | 13 | #include <linux/delay.h> |
13 | #include <linux/dmi.h> | 14 | #include <linux/dmi.h> |
14 | #include <linux/pci.h> | 15 | #include <linux/pci.h> |
@@ -25,12 +26,14 @@ static void __devinit cnb20le_res(struct pci_dev *dev) | |||
25 | u8 fbus, lbus; | 26 | u8 fbus, lbus; |
26 | int i; | 27 | int i; |
27 | 28 | ||
29 | #ifdef CONFIG_ACPI | ||
28 | /* | 30 | /* |
29 | * The x86_pci_root_bus_res_quirks() function already refuses to use | 31 | * We should get host bridge information from ACPI unless the BIOS |
30 | * this information if ACPI _CRS was used. Therefore, we don't bother | 32 | * doesn't support it. |
31 | * checking if ACPI is enabled, and just generate the information | ||
32 | * for both the ACPI _CRS and no ACPI cases. | ||
33 | */ | 33 | */ |
34 | if (acpi_os_get_root_pointer()) | ||
35 | return; | ||
36 | #endif | ||
34 | 37 | ||
35 | info = &pci_root_info[pci_root_num]; | 38 | info = &pci_root_info[pci_root_num]; |
36 | pci_root_num++; | 39 | pci_root_num++; |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index f7c8a399978c..5fe75026ecc2 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -22,6 +22,7 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | | |||
22 | 22 | ||
23 | unsigned int pci_early_dump_regs; | 23 | unsigned int pci_early_dump_regs; |
24 | static int pci_bf_sort; | 24 | static int pci_bf_sort; |
25 | static int smbios_type_b1_flag; | ||
25 | int pci_routeirq; | 26 | int pci_routeirq; |
26 | int noioapicquirk; | 27 | int noioapicquirk; |
27 | #ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS | 28 | #ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS |
@@ -185,6 +186,39 @@ static int __devinit set_bf_sort(const struct dmi_system_id *d) | |||
185 | return 0; | 186 | return 0; |
186 | } | 187 | } |
187 | 188 | ||
189 | static void __devinit read_dmi_type_b1(const struct dmi_header *dm, | ||
190 | void *private_data) | ||
191 | { | ||
192 | u8 *d = (u8 *)dm + 4; | ||
193 | |||
194 | if (dm->type != 0xB1) | ||
195 | return; | ||
196 | switch (((*(u32 *)d) >> 9) & 0x03) { | ||
197 | case 0x00: | ||
198 | printk(KERN_INFO "dmi type 0xB1 record - unknown flag\n"); | ||
199 | break; | ||
200 | case 0x01: /* set pci=bfsort */ | ||
201 | smbios_type_b1_flag = 1; | ||
202 | break; | ||
203 | case 0x02: /* do not set pci=bfsort */ | ||
204 | smbios_type_b1_flag = 2; | ||
205 | break; | ||
206 | default: | ||
207 | break; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | static int __devinit find_sort_method(const struct dmi_system_id *d) | ||
212 | { | ||
213 | dmi_walk(read_dmi_type_b1, NULL); | ||
214 | |||
215 | if (smbios_type_b1_flag == 1) { | ||
216 | set_bf_sort(d); | ||
217 | return 0; | ||
218 | } | ||
219 | return -1; | ||
220 | } | ||
221 | |||
188 | /* | 222 | /* |
189 | * Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus) | 223 | * Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus) |
190 | */ | 224 | */ |
@@ -213,6 +247,13 @@ static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = { | |||
213 | }, | 247 | }, |
214 | #endif /* __i386__ */ | 248 | #endif /* __i386__ */ |
215 | { | 249 | { |
250 | .callback = find_sort_method, | ||
251 | .ident = "Dell System", | ||
252 | .matches = { | ||
253 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), | ||
254 | }, | ||
255 | }, | ||
256 | { | ||
216 | .callback = set_bf_sort, | 257 | .callback = set_bf_sort, |
217 | .ident = "Dell PowerEdge 1950", | 258 | .ident = "Dell PowerEdge 1950", |
218 | .matches = { | 259 | .matches = { |
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 9f9bfb705cf9..87e6c8323117 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -589,7 +589,8 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route | |||
589 | case PCI_DEVICE_ID_INTEL_ICH10_1: | 589 | case PCI_DEVICE_ID_INTEL_ICH10_1: |
590 | case PCI_DEVICE_ID_INTEL_ICH10_2: | 590 | case PCI_DEVICE_ID_INTEL_ICH10_2: |
591 | case PCI_DEVICE_ID_INTEL_ICH10_3: | 591 | case PCI_DEVICE_ID_INTEL_ICH10_3: |
592 | case PCI_DEVICE_ID_INTEL_PATSBURG_LPC: | 592 | case PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0: |
593 | case PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1: | ||
593 | r->name = "PIIX/ICH"; | 594 | r->name = "PIIX/ICH"; |
594 | r->get = pirq_piix_get; | 595 | r->get = pirq_piix_get; |
595 | r->set = pirq_piix_set; | 596 | r->set = pirq_piix_set; |
diff --git a/arch/x86/platform/mrst/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c index 65df603622b2..25bfdbb5b130 100644 --- a/arch/x86/platform/mrst/early_printk_mrst.c +++ b/arch/x86/platform/mrst/early_printk_mrst.c | |||
@@ -103,7 +103,7 @@ struct dw_spi_reg { | |||
103 | static unsigned long mrst_spi_paddr = MRST_REGBASE_SPI0; | 103 | static unsigned long mrst_spi_paddr = MRST_REGBASE_SPI0; |
104 | 104 | ||
105 | static u32 *pclk_spi0; | 105 | static u32 *pclk_spi0; |
106 | /* Always contains an accessable address, start with 0 */ | 106 | /* Always contains an accessible address, start with 0 */ |
107 | static struct dw_spi_reg *pspi; | 107 | static struct dw_spi_reg *pspi; |
108 | 108 | ||
109 | static struct kmsg_dumper dw_dumper; | 109 | static struct kmsg_dumper dw_dumper; |
diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile index c31b8fcb5a86..e797428b163b 100644 --- a/arch/x86/platform/olpc/Makefile +++ b/arch/x86/platform/olpc/Makefile | |||
@@ -1,3 +1,4 @@ | |||
1 | obj-$(CONFIG_OLPC) += olpc.o | 1 | obj-$(CONFIG_OLPC) += olpc.o |
2 | obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o | 2 | obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o |
3 | obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o | 3 | obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o |
4 | obj-$(CONFIG_OLPC_OPENFIRMWARE_DT) += olpc_dt.o | ||
diff --git a/arch/x86/platform/olpc/olpc-xo1.c b/arch/x86/platform/olpc/olpc-xo1.c index f5442c03abc3..127775696d6c 100644 --- a/arch/x86/platform/olpc/olpc-xo1.c +++ b/arch/x86/platform/olpc/olpc-xo1.c | |||
@@ -1,6 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Support for features of the OLPC XO-1 laptop | 2 | * Support for features of the OLPC XO-1 laptop |
3 | * | 3 | * |
4 | * Copyright (C) 2010 Andres Salomon <dilinger@queued.net> | ||
4 | * Copyright (C) 2010 One Laptop per Child | 5 | * Copyright (C) 2010 One Laptop per Child |
5 | * Copyright (C) 2006 Red Hat, Inc. | 6 | * Copyright (C) 2006 Red Hat, Inc. |
6 | * Copyright (C) 2006 Advanced Micro Devices, Inc. | 7 | * Copyright (C) 2006 Advanced Micro Devices, Inc. |
@@ -12,8 +13,6 @@ | |||
12 | */ | 13 | */ |
13 | 14 | ||
14 | #include <linux/module.h> | 15 | #include <linux/module.h> |
15 | #include <linux/pci.h> | ||
16 | #include <linux/pci_ids.h> | ||
17 | #include <linux/platform_device.h> | 16 | #include <linux/platform_device.h> |
18 | #include <linux/pm.h> | 17 | #include <linux/pm.h> |
19 | 18 | ||
@@ -22,9 +21,6 @@ | |||
22 | 21 | ||
23 | #define DRV_NAME "olpc-xo1" | 22 | #define DRV_NAME "olpc-xo1" |
24 | 23 | ||
25 | #define PMS_BAR 4 | ||
26 | #define ACPI_BAR 5 | ||
27 | |||
28 | /* PMC registers (PMS block) */ | 24 | /* PMC registers (PMS block) */ |
29 | #define PM_SCLK 0x10 | 25 | #define PM_SCLK 0x10 |
30 | #define PM_IN_SLPCTL 0x20 | 26 | #define PM_IN_SLPCTL 0x20 |
@@ -57,65 +53,67 @@ static void xo1_power_off(void) | |||
57 | outl(0x00002000, acpi_base + PM1_CNT); | 53 | outl(0x00002000, acpi_base + PM1_CNT); |
58 | } | 54 | } |
59 | 55 | ||
60 | /* Read the base addresses from the PCI BAR info */ | 56 | static int __devinit olpc_xo1_probe(struct platform_device *pdev) |
61 | static int __devinit setup_bases(struct pci_dev *pdev) | ||
62 | { | 57 | { |
63 | int r; | 58 | struct resource *res; |
64 | 59 | ||
65 | r = pci_enable_device_io(pdev); | 60 | /* don't run on non-XOs */ |
66 | if (r) { | 61 | if (!machine_is_olpc()) |
67 | dev_err(&pdev->dev, "can't enable device IO\n"); | 62 | return -ENODEV; |
68 | return r; | ||
69 | } | ||
70 | 63 | ||
71 | r = pci_request_region(pdev, ACPI_BAR, DRV_NAME); | 64 | res = platform_get_resource(pdev, IORESOURCE_IO, 0); |
72 | if (r) { | 65 | if (!res) { |
73 | dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", ACPI_BAR); | 66 | dev_err(&pdev->dev, "can't fetch device resource info\n"); |
74 | return r; | 67 | return -EIO; |
75 | } | 68 | } |
76 | 69 | ||
77 | r = pci_request_region(pdev, PMS_BAR, DRV_NAME); | 70 | if (!request_region(res->start, resource_size(res), DRV_NAME)) { |
78 | if (r) { | 71 | dev_err(&pdev->dev, "can't request region\n"); |
79 | dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", PMS_BAR); | 72 | return -EIO; |
80 | pci_release_region(pdev, ACPI_BAR); | ||
81 | return r; | ||
82 | } | 73 | } |
83 | 74 | ||
84 | acpi_base = pci_resource_start(pdev, ACPI_BAR); | 75 | if (strcmp(pdev->name, "cs5535-pms") == 0) |
85 | pms_base = pci_resource_start(pdev, PMS_BAR); | 76 | pms_base = res->start; |
77 | else if (strcmp(pdev->name, "cs5535-acpi") == 0) | ||
78 | acpi_base = res->start; | ||
79 | |||
80 | /* If we have both addresses, we can override the poweroff hook */ | ||
81 | if (pms_base && acpi_base) { | ||
82 | pm_power_off = xo1_power_off; | ||
83 | printk(KERN_INFO "OLPC XO-1 support registered\n"); | ||
84 | } | ||
86 | 85 | ||
87 | return 0; | 86 | return 0; |
88 | } | 87 | } |
89 | 88 | ||
90 | static int __devinit olpc_xo1_probe(struct platform_device *pdev) | 89 | static int __devexit olpc_xo1_remove(struct platform_device *pdev) |
91 | { | 90 | { |
92 | struct pci_dev *pcidev; | 91 | struct resource *r; |
93 | int r; | ||
94 | |||
95 | pcidev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, | ||
96 | NULL); | ||
97 | if (!pdev) | ||
98 | return -ENODEV; | ||
99 | |||
100 | r = setup_bases(pcidev); | ||
101 | if (r) | ||
102 | return r; | ||
103 | 92 | ||
104 | pm_power_off = xo1_power_off; | 93 | r = platform_get_resource(pdev, IORESOURCE_IO, 0); |
94 | release_region(r->start, resource_size(r)); | ||
105 | 95 | ||
106 | printk(KERN_INFO "OLPC XO-1 support registered\n"); | 96 | if (strcmp(pdev->name, "cs5535-pms") == 0) |
107 | return 0; | 97 | pms_base = 0; |
108 | } | 98 | else if (strcmp(pdev->name, "cs5535-acpi") == 0) |
99 | acpi_base = 0; | ||
109 | 100 | ||
110 | static int __devexit olpc_xo1_remove(struct platform_device *pdev) | ||
111 | { | ||
112 | pm_power_off = NULL; | 101 | pm_power_off = NULL; |
113 | return 0; | 102 | return 0; |
114 | } | 103 | } |
115 | 104 | ||
116 | static struct platform_driver olpc_xo1_driver = { | 105 | static struct platform_driver cs5535_pms_drv = { |
106 | .driver = { | ||
107 | .name = "cs5535-pms", | ||
108 | .owner = THIS_MODULE, | ||
109 | }, | ||
110 | .probe = olpc_xo1_probe, | ||
111 | .remove = __devexit_p(olpc_xo1_remove), | ||
112 | }; | ||
113 | |||
114 | static struct platform_driver cs5535_acpi_drv = { | ||
117 | .driver = { | 115 | .driver = { |
118 | .name = DRV_NAME, | 116 | .name = "cs5535-acpi", |
119 | .owner = THIS_MODULE, | 117 | .owner = THIS_MODULE, |
120 | }, | 118 | }, |
121 | .probe = olpc_xo1_probe, | 119 | .probe = olpc_xo1_probe, |
@@ -124,12 +122,23 @@ static struct platform_driver olpc_xo1_driver = { | |||
124 | 122 | ||
125 | static int __init olpc_xo1_init(void) | 123 | static int __init olpc_xo1_init(void) |
126 | { | 124 | { |
127 | return platform_driver_register(&olpc_xo1_driver); | 125 | int r; |
126 | |||
127 | r = platform_driver_register(&cs5535_pms_drv); | ||
128 | if (r) | ||
129 | return r; | ||
130 | |||
131 | r = platform_driver_register(&cs5535_acpi_drv); | ||
132 | if (r) | ||
133 | platform_driver_unregister(&cs5535_pms_drv); | ||
134 | |||
135 | return r; | ||
128 | } | 136 | } |
129 | 137 | ||
130 | static void __exit olpc_xo1_exit(void) | 138 | static void __exit olpc_xo1_exit(void) |
131 | { | 139 | { |
132 | platform_driver_unregister(&olpc_xo1_driver); | 140 | platform_driver_unregister(&cs5535_acpi_drv); |
141 | platform_driver_unregister(&cs5535_pms_drv); | ||
133 | } | 142 | } |
134 | 143 | ||
135 | MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>"); | 144 | MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>"); |
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c new file mode 100644 index 000000000000..dab874647530 --- /dev/null +++ b/arch/x86/platform/olpc/olpc_dt.c | |||
@@ -0,0 +1,183 @@ | |||
1 | /* | ||
2 | * OLPC-specific OFW device tree support code. | ||
3 | * | ||
4 | * Paul Mackerras August 1996. | ||
5 | * Copyright (C) 1996-2005 Paul Mackerras. | ||
6 | * | ||
7 | * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. | ||
8 | * {engebret|bergner}@us.ibm.com | ||
9 | * | ||
10 | * Adapted for sparc by David S. Miller davem@davemloft.net | ||
11 | * Adapted for x86/OLPC by Andres Salomon <dilinger@queued.net> | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or | ||
14 | * modify it under the terms of the GNU General Public License | ||
15 | * as published by the Free Software Foundation; either version | ||
16 | * 2 of the License, or (at your option) any later version. | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/bootmem.h> | ||
21 | #include <linux/of.h> | ||
22 | #include <linux/of_pdt.h> | ||
23 | #include <asm/olpc_ofw.h> | ||
24 | |||
25 | static phandle __init olpc_dt_getsibling(phandle node) | ||
26 | { | ||
27 | const void *args[] = { (void *)node }; | ||
28 | void *res[] = { &node }; | ||
29 | |||
30 | if ((s32)node == -1) | ||
31 | return 0; | ||
32 | |||
33 | if (olpc_ofw("peer", args, res) || (s32)node == -1) | ||
34 | return 0; | ||
35 | |||
36 | return node; | ||
37 | } | ||
38 | |||
39 | static phandle __init olpc_dt_getchild(phandle node) | ||
40 | { | ||
41 | const void *args[] = { (void *)node }; | ||
42 | void *res[] = { &node }; | ||
43 | |||
44 | if ((s32)node == -1) | ||
45 | return 0; | ||
46 | |||
47 | if (olpc_ofw("child", args, res) || (s32)node == -1) { | ||
48 | pr_err("PROM: %s: fetching child failed!\n", __func__); | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | return node; | ||
53 | } | ||
54 | |||
55 | static int __init olpc_dt_getproplen(phandle node, const char *prop) | ||
56 | { | ||
57 | const void *args[] = { (void *)node, prop }; | ||
58 | int len; | ||
59 | void *res[] = { &len }; | ||
60 | |||
61 | if ((s32)node == -1) | ||
62 | return -1; | ||
63 | |||
64 | if (olpc_ofw("getproplen", args, res)) { | ||
65 | pr_err("PROM: %s: getproplen failed!\n", __func__); | ||
66 | return -1; | ||
67 | } | ||
68 | |||
69 | return len; | ||
70 | } | ||
71 | |||
72 | static int __init olpc_dt_getproperty(phandle node, const char *prop, | ||
73 | char *buf, int bufsize) | ||
74 | { | ||
75 | int plen; | ||
76 | |||
77 | plen = olpc_dt_getproplen(node, prop); | ||
78 | if (plen > bufsize || plen < 1) { | ||
79 | return -1; | ||
80 | } else { | ||
81 | const void *args[] = { (void *)node, prop, buf, (void *)plen }; | ||
82 | void *res[] = { &plen }; | ||
83 | |||
84 | if (olpc_ofw("getprop", args, res)) { | ||
85 | pr_err("PROM: %s: getprop failed!\n", __func__); | ||
86 | return -1; | ||
87 | } | ||
88 | } | ||
89 | |||
90 | return plen; | ||
91 | } | ||
92 | |||
93 | static int __init olpc_dt_nextprop(phandle node, char *prev, char *buf) | ||
94 | { | ||
95 | const void *args[] = { (void *)node, prev, buf }; | ||
96 | int success; | ||
97 | void *res[] = { &success }; | ||
98 | |||
99 | buf[0] = '\0'; | ||
100 | |||
101 | if ((s32)node == -1) | ||
102 | return -1; | ||
103 | |||
104 | if (olpc_ofw("nextprop", args, res) || success != 1) | ||
105 | return -1; | ||
106 | |||
107 | return 0; | ||
108 | } | ||
109 | |||
110 | static int __init olpc_dt_pkg2path(phandle node, char *buf, | ||
111 | const int buflen, int *len) | ||
112 | { | ||
113 | const void *args[] = { (void *)node, buf, (void *)buflen }; | ||
114 | void *res[] = { len }; | ||
115 | |||
116 | if ((s32)node == -1) | ||
117 | return -1; | ||
118 | |||
119 | if (olpc_ofw("package-to-path", args, res) || *len < 1) | ||
120 | return -1; | ||
121 | |||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | static unsigned int prom_early_allocated __initdata; | ||
126 | |||
127 | void * __init prom_early_alloc(unsigned long size) | ||
128 | { | ||
129 | static u8 *mem; | ||
130 | static size_t free_mem; | ||
131 | void *res; | ||
132 | |||
133 | if (free_mem < size) { | ||
134 | const size_t chunk_size = max(PAGE_SIZE, size); | ||
135 | |||
136 | /* | ||
137 | * To mimimize the number of allocations, grab at least | ||
138 | * PAGE_SIZE of memory (that's an arbitrary choice that's | ||
139 | * fast enough on the platforms we care about while minimizing | ||
140 | * wasted bootmem) and hand off chunks of it to callers. | ||
141 | */ | ||
142 | res = alloc_bootmem(chunk_size); | ||
143 | if (!res) | ||
144 | return NULL; | ||
145 | prom_early_allocated += chunk_size; | ||
146 | memset(res, 0, chunk_size); | ||
147 | free_mem = chunk_size; | ||
148 | mem = res; | ||
149 | } | ||
150 | |||
151 | /* allocate from the local cache */ | ||
152 | free_mem -= size; | ||
153 | res = mem; | ||
154 | mem += size; | ||
155 | return res; | ||
156 | } | ||
157 | |||
158 | static struct of_pdt_ops prom_olpc_ops __initdata = { | ||
159 | .nextprop = olpc_dt_nextprop, | ||
160 | .getproplen = olpc_dt_getproplen, | ||
161 | .getproperty = olpc_dt_getproperty, | ||
162 | .getchild = olpc_dt_getchild, | ||
163 | .getsibling = olpc_dt_getsibling, | ||
164 | .pkg2path = olpc_dt_pkg2path, | ||
165 | }; | ||
166 | |||
167 | void __init olpc_dt_build_devicetree(void) | ||
168 | { | ||
169 | phandle root; | ||
170 | |||
171 | if (!olpc_ofw_is_installed()) | ||
172 | return; | ||
173 | |||
174 | root = olpc_dt_getsibling(0); | ||
175 | if (!root) { | ||
176 | pr_err("PROM: unable to get root node from OFW!\n"); | ||
177 | return; | ||
178 | } | ||
179 | of_pdt_build_devicetree(root, &prom_olpc_ops); | ||
180 | |||
181 | pr_info("PROM DT: Built device tree with %u bytes of memory.\n", | ||
182 | prom_early_allocated); | ||
183 | } | ||
diff --git a/arch/x86/platform/olpc/olpc_ofw.c b/arch/x86/platform/olpc/olpc_ofw.c index 787320464379..e7604f62870d 100644 --- a/arch/x86/platform/olpc/olpc_ofw.c +++ b/arch/x86/platform/olpc/olpc_ofw.c | |||
@@ -110,3 +110,8 @@ void __init olpc_ofw_detect(void) | |||
110 | (unsigned long)olpc_ofw_cif, (-start) >> 20); | 110 | (unsigned long)olpc_ofw_cif, (-start) >> 20); |
111 | reserve_top_address(-start); | 111 | reserve_top_address(-start); |
112 | } | 112 | } |
113 | |||
114 | bool __init olpc_ofw_is_installed(void) | ||
115 | { | ||
116 | return olpc_ofw_cif != NULL; | ||
117 | } | ||
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 779385158915..17c565de3d64 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -12,7 +12,8 @@ CFLAGS_mmu.o := $(nostackp) | |||
12 | 12 | ||
13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | 13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ |
14 | time.o xen-asm.o xen-asm_$(BITS).o \ | 14 | time.o xen-asm.o xen-asm_$(BITS).o \ |
15 | grant-table.o suspend.o platform-pci-unplug.o | 15 | grant-table.o suspend.o platform-pci-unplug.o \ |
16 | p2m.o | ||
16 | 17 | ||
17 | obj-$(CONFIG_SMP) += smp.o | 18 | obj-$(CONFIG_SMP) += smp.o |
18 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o | 19 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 44dcad43989d..7e8d3bc80af6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -574,8 +574,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) | |||
574 | 574 | ||
575 | preempt_disable(); | 575 | preempt_disable(); |
576 | 576 | ||
577 | start = __get_cpu_var(idt_desc).address; | 577 | start = __this_cpu_read(idt_desc.address); |
578 | end = start + __get_cpu_var(idt_desc).size + 1; | 578 | end = start + __this_cpu_read(idt_desc.size) + 1; |
579 | 579 | ||
580 | xen_mc_flush(); | 580 | xen_mc_flush(); |
581 | 581 | ||
@@ -1174,6 +1174,15 @@ asmlinkage void __init xen_start_kernel(void) | |||
1174 | 1174 | ||
1175 | xen_smp_init(); | 1175 | xen_smp_init(); |
1176 | 1176 | ||
1177 | #ifdef CONFIG_ACPI_NUMA | ||
1178 | /* | ||
1179 | * The pages we from Xen are not related to machine pages, so | ||
1180 | * any NUMA information the kernel tries to get from ACPI will | ||
1181 | * be meaningless. Prevent it from trying. | ||
1182 | */ | ||
1183 | acpi_numa = -1; | ||
1184 | #endif | ||
1185 | |||
1177 | pgd = (pgd_t *)xen_start_info->pt_base; | 1186 | pgd = (pgd_t *)xen_start_info->pt_base; |
1178 | 1187 | ||
1179 | if (!xen_initial_domain()) | 1188 | if (!xen_initial_domain()) |
@@ -1256,25 +1265,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
1256 | #endif | 1265 | #endif |
1257 | } | 1266 | } |
1258 | 1267 | ||
1259 | static uint32_t xen_cpuid_base(void) | ||
1260 | { | ||
1261 | uint32_t base, eax, ebx, ecx, edx; | ||
1262 | char signature[13]; | ||
1263 | |||
1264 | for (base = 0x40000000; base < 0x40010000; base += 0x100) { | ||
1265 | cpuid(base, &eax, &ebx, &ecx, &edx); | ||
1266 | *(uint32_t *)(signature + 0) = ebx; | ||
1267 | *(uint32_t *)(signature + 4) = ecx; | ||
1268 | *(uint32_t *)(signature + 8) = edx; | ||
1269 | signature[12] = 0; | ||
1270 | |||
1271 | if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2)) | ||
1272 | return base; | ||
1273 | } | ||
1274 | |||
1275 | return 0; | ||
1276 | } | ||
1277 | |||
1278 | static int init_hvm_pv_info(int *major, int *minor) | 1268 | static int init_hvm_pv_info(int *major, int *minor) |
1279 | { | 1269 | { |
1280 | uint32_t eax, ebx, ecx, edx, pages, msr, base; | 1270 | uint32_t eax, ebx, ecx, edx, pages, msr, base; |
@@ -1384,6 +1374,18 @@ static bool __init xen_hvm_platform(void) | |||
1384 | return true; | 1374 | return true; |
1385 | } | 1375 | } |
1386 | 1376 | ||
1377 | bool xen_hvm_need_lapic(void) | ||
1378 | { | ||
1379 | if (xen_pv_domain()) | ||
1380 | return false; | ||
1381 | if (!xen_hvm_domain()) | ||
1382 | return false; | ||
1383 | if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback) | ||
1384 | return false; | ||
1385 | return true; | ||
1386 | } | ||
1387 | EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); | ||
1388 | |||
1387 | const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = { | 1389 | const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = { |
1388 | .name = "Xen HVM", | 1390 | .name = "Xen HVM", |
1389 | .detect = xen_hvm_platform, | 1391 | .detect = xen_hvm_platform, |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 44924e551fde..5e92b61ad574 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -173,371 +173,6 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ | |||
173 | */ | 173 | */ |
174 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) | 174 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) |
175 | 175 | ||
176 | /* | ||
177 | * Xen leaves the responsibility for maintaining p2m mappings to the | ||
178 | * guests themselves, but it must also access and update the p2m array | ||
179 | * during suspend/resume when all the pages are reallocated. | ||
180 | * | ||
181 | * The p2m table is logically a flat array, but we implement it as a | ||
182 | * three-level tree to allow the address space to be sparse. | ||
183 | * | ||
184 | * Xen | ||
185 | * | | ||
186 | * p2m_top p2m_top_mfn | ||
187 | * / \ / \ | ||
188 | * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn | ||
189 | * / \ / \ / / | ||
190 | * p2m p2m p2m p2m p2m p2m p2m ... | ||
191 | * | ||
192 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | ||
193 | * | ||
194 | * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the | ||
195 | * maximum representable pseudo-physical address space is: | ||
196 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages | ||
197 | * | ||
198 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | ||
199 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | ||
200 | * 512 and 1024 entries respectively. | ||
201 | */ | ||
202 | |||
203 | unsigned long xen_max_p2m_pfn __read_mostly; | ||
204 | |||
205 | #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
206 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) | ||
207 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | ||
208 | |||
209 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) | ||
210 | |||
211 | /* Placeholders for holes in the address space */ | ||
212 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); | ||
213 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); | ||
214 | static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); | ||
215 | |||
216 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); | ||
217 | static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); | ||
218 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); | ||
219 | |||
220 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
221 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
222 | |||
223 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
224 | { | ||
225 | BUG_ON(pfn >= MAX_P2M_PFN); | ||
226 | return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); | ||
227 | } | ||
228 | |||
229 | static inline unsigned p2m_mid_index(unsigned long pfn) | ||
230 | { | ||
231 | return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; | ||
232 | } | ||
233 | |||
234 | static inline unsigned p2m_index(unsigned long pfn) | ||
235 | { | ||
236 | return pfn % P2M_PER_PAGE; | ||
237 | } | ||
238 | |||
239 | static void p2m_top_init(unsigned long ***top) | ||
240 | { | ||
241 | unsigned i; | ||
242 | |||
243 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
244 | top[i] = p2m_mid_missing; | ||
245 | } | ||
246 | |||
247 | static void p2m_top_mfn_init(unsigned long *top) | ||
248 | { | ||
249 | unsigned i; | ||
250 | |||
251 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
252 | top[i] = virt_to_mfn(p2m_mid_missing_mfn); | ||
253 | } | ||
254 | |||
255 | static void p2m_top_mfn_p_init(unsigned long **top) | ||
256 | { | ||
257 | unsigned i; | ||
258 | |||
259 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
260 | top[i] = p2m_mid_missing_mfn; | ||
261 | } | ||
262 | |||
263 | static void p2m_mid_init(unsigned long **mid) | ||
264 | { | ||
265 | unsigned i; | ||
266 | |||
267 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
268 | mid[i] = p2m_missing; | ||
269 | } | ||
270 | |||
271 | static void p2m_mid_mfn_init(unsigned long *mid) | ||
272 | { | ||
273 | unsigned i; | ||
274 | |||
275 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
276 | mid[i] = virt_to_mfn(p2m_missing); | ||
277 | } | ||
278 | |||
279 | static void p2m_init(unsigned long *p2m) | ||
280 | { | ||
281 | unsigned i; | ||
282 | |||
283 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
284 | p2m[i] = INVALID_P2M_ENTRY; | ||
285 | } | ||
286 | |||
287 | /* | ||
288 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | ||
289 | * | ||
290 | * This is called both at boot time, and after resuming from suspend: | ||
291 | * - At boot time we're called very early, and must use extend_brk() | ||
292 | * to allocate memory. | ||
293 | * | ||
294 | * - After resume we're called from within stop_machine, but the mfn | ||
295 | * tree should alreay be completely allocated. | ||
296 | */ | ||
297 | void xen_build_mfn_list_list(void) | ||
298 | { | ||
299 | unsigned long pfn; | ||
300 | |||
301 | /* Pre-initialize p2m_top_mfn to be completely missing */ | ||
302 | if (p2m_top_mfn == NULL) { | ||
303 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
304 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
305 | |||
306 | p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
307 | p2m_top_mfn_p_init(p2m_top_mfn_p); | ||
308 | |||
309 | p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
310 | p2m_top_mfn_init(p2m_top_mfn); | ||
311 | } else { | ||
312 | /* Reinitialise, mfn's all change after migration */ | ||
313 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
314 | } | ||
315 | |||
316 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { | ||
317 | unsigned topidx = p2m_top_index(pfn); | ||
318 | unsigned mididx = p2m_mid_index(pfn); | ||
319 | unsigned long **mid; | ||
320 | unsigned long *mid_mfn_p; | ||
321 | |||
322 | mid = p2m_top[topidx]; | ||
323 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
324 | |||
325 | /* Don't bother allocating any mfn mid levels if | ||
326 | * they're just missing, just update the stored mfn, | ||
327 | * since all could have changed over a migrate. | ||
328 | */ | ||
329 | if (mid == p2m_mid_missing) { | ||
330 | BUG_ON(mididx); | ||
331 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
332 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | ||
333 | pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; | ||
334 | continue; | ||
335 | } | ||
336 | |||
337 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
338 | /* | ||
339 | * XXX boot-time only! We should never find | ||
340 | * missing parts of the mfn tree after | ||
341 | * runtime. extend_brk() will BUG if we call | ||
342 | * it too late. | ||
343 | */ | ||
344 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
345 | p2m_mid_mfn_init(mid_mfn_p); | ||
346 | |||
347 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
348 | } | ||
349 | |||
350 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
351 | mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); | ||
352 | } | ||
353 | } | ||
354 | |||
355 | void xen_setup_mfn_list_list(void) | ||
356 | { | ||
357 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
358 | |||
359 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
360 | virt_to_mfn(p2m_top_mfn); | ||
361 | HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; | ||
362 | } | ||
363 | |||
364 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
365 | void __init xen_build_dynamic_phys_to_machine(void) | ||
366 | { | ||
367 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
368 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
369 | unsigned long pfn; | ||
370 | |||
371 | xen_max_p2m_pfn = max_pfn; | ||
372 | |||
373 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
374 | p2m_init(p2m_missing); | ||
375 | |||
376 | p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
377 | p2m_mid_init(p2m_mid_missing); | ||
378 | |||
379 | p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
380 | p2m_top_init(p2m_top); | ||
381 | |||
382 | /* | ||
383 | * The domain builder gives us a pre-constructed p2m array in | ||
384 | * mfn_list for all the pages initially given to us, so we just | ||
385 | * need to graft that into our tree structure. | ||
386 | */ | ||
387 | for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { | ||
388 | unsigned topidx = p2m_top_index(pfn); | ||
389 | unsigned mididx = p2m_mid_index(pfn); | ||
390 | |||
391 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
392 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
393 | p2m_mid_init(mid); | ||
394 | |||
395 | p2m_top[topidx] = mid; | ||
396 | } | ||
397 | |||
398 | p2m_top[topidx][mididx] = &mfn_list[pfn]; | ||
399 | } | ||
400 | } | ||
401 | |||
402 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
403 | { | ||
404 | unsigned topidx, mididx, idx; | ||
405 | |||
406 | if (unlikely(pfn >= MAX_P2M_PFN)) | ||
407 | return INVALID_P2M_ENTRY; | ||
408 | |||
409 | topidx = p2m_top_index(pfn); | ||
410 | mididx = p2m_mid_index(pfn); | ||
411 | idx = p2m_index(pfn); | ||
412 | |||
413 | return p2m_top[topidx][mididx][idx]; | ||
414 | } | ||
415 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
416 | |||
417 | static void *alloc_p2m_page(void) | ||
418 | { | ||
419 | return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); | ||
420 | } | ||
421 | |||
422 | static void free_p2m_page(void *p) | ||
423 | { | ||
424 | free_page((unsigned long)p); | ||
425 | } | ||
426 | |||
427 | /* | ||
428 | * Fully allocate the p2m structure for a given pfn. We need to check | ||
429 | * that both the top and mid levels are allocated, and make sure the | ||
430 | * parallel mfn tree is kept in sync. We may race with other cpus, so | ||
431 | * the new pages are installed with cmpxchg; if we lose the race then | ||
432 | * simply free the page we allocated and use the one that's there. | ||
433 | */ | ||
434 | static bool alloc_p2m(unsigned long pfn) | ||
435 | { | ||
436 | unsigned topidx, mididx; | ||
437 | unsigned long ***top_p, **mid; | ||
438 | unsigned long *top_mfn_p, *mid_mfn; | ||
439 | |||
440 | topidx = p2m_top_index(pfn); | ||
441 | mididx = p2m_mid_index(pfn); | ||
442 | |||
443 | top_p = &p2m_top[topidx]; | ||
444 | mid = *top_p; | ||
445 | |||
446 | if (mid == p2m_mid_missing) { | ||
447 | /* Mid level is missing, allocate a new one */ | ||
448 | mid = alloc_p2m_page(); | ||
449 | if (!mid) | ||
450 | return false; | ||
451 | |||
452 | p2m_mid_init(mid); | ||
453 | |||
454 | if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) | ||
455 | free_p2m_page(mid); | ||
456 | } | ||
457 | |||
458 | top_mfn_p = &p2m_top_mfn[topidx]; | ||
459 | mid_mfn = p2m_top_mfn_p[topidx]; | ||
460 | |||
461 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); | ||
462 | |||
463 | if (mid_mfn == p2m_mid_missing_mfn) { | ||
464 | /* Separately check the mid mfn level */ | ||
465 | unsigned long missing_mfn; | ||
466 | unsigned long mid_mfn_mfn; | ||
467 | |||
468 | mid_mfn = alloc_p2m_page(); | ||
469 | if (!mid_mfn) | ||
470 | return false; | ||
471 | |||
472 | p2m_mid_mfn_init(mid_mfn); | ||
473 | |||
474 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | ||
475 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | ||
476 | if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) | ||
477 | free_p2m_page(mid_mfn); | ||
478 | else | ||
479 | p2m_top_mfn_p[topidx] = mid_mfn; | ||
480 | } | ||
481 | |||
482 | if (p2m_top[topidx][mididx] == p2m_missing) { | ||
483 | /* p2m leaf page is missing */ | ||
484 | unsigned long *p2m; | ||
485 | |||
486 | p2m = alloc_p2m_page(); | ||
487 | if (!p2m) | ||
488 | return false; | ||
489 | |||
490 | p2m_init(p2m); | ||
491 | |||
492 | if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) | ||
493 | free_p2m_page(p2m); | ||
494 | else | ||
495 | mid_mfn[mididx] = virt_to_mfn(p2m); | ||
496 | } | ||
497 | |||
498 | return true; | ||
499 | } | ||
500 | |||
501 | /* Try to install p2m mapping; fail if intermediate bits missing */ | ||
502 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
503 | { | ||
504 | unsigned topidx, mididx, idx; | ||
505 | |||
506 | if (unlikely(pfn >= MAX_P2M_PFN)) { | ||
507 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
508 | return true; | ||
509 | } | ||
510 | |||
511 | topidx = p2m_top_index(pfn); | ||
512 | mididx = p2m_mid_index(pfn); | ||
513 | idx = p2m_index(pfn); | ||
514 | |||
515 | if (p2m_top[topidx][mididx] == p2m_missing) | ||
516 | return mfn == INVALID_P2M_ENTRY; | ||
517 | |||
518 | p2m_top[topidx][mididx][idx] = mfn; | ||
519 | |||
520 | return true; | ||
521 | } | ||
522 | |||
523 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
524 | { | ||
525 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
526 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
527 | return true; | ||
528 | } | ||
529 | |||
530 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | ||
531 | if (!alloc_p2m(pfn)) | ||
532 | return false; | ||
533 | |||
534 | if (!__set_phys_to_machine(pfn, mfn)) | ||
535 | return false; | ||
536 | } | ||
537 | |||
538 | return true; | ||
539 | } | ||
540 | |||
541 | unsigned long arbitrary_virt_to_mfn(void *vaddr) | 176 | unsigned long arbitrary_virt_to_mfn(void *vaddr) |
542 | { | 177 | { |
543 | xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); | 178 | xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); |
@@ -566,6 +201,7 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr) | |||
566 | offset = address & ~PAGE_MASK; | 201 | offset = address & ~PAGE_MASK; |
567 | return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); | 202 | return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); |
568 | } | 203 | } |
204 | EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine); | ||
569 | 205 | ||
570 | void make_lowmem_page_readonly(void *vaddr) | 206 | void make_lowmem_page_readonly(void *vaddr) |
571 | { | 207 | { |
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 9e565da5d1f7..4ec8035e3216 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h | |||
@@ -22,7 +22,7 @@ static inline void xen_mc_batch(void) | |||
22 | unsigned long flags; | 22 | unsigned long flags; |
23 | /* need to disable interrupts until this entry is complete */ | 23 | /* need to disable interrupts until this entry is complete */ |
24 | local_irq_save(flags); | 24 | local_irq_save(flags); |
25 | __get_cpu_var(xen_mc_irq_flags) = flags; | 25 | __this_cpu_write(xen_mc_irq_flags, flags); |
26 | } | 26 | } |
27 | 27 | ||
28 | static inline struct multicall_space xen_mc_entry(size_t args) | 28 | static inline struct multicall_space xen_mc_entry(size_t args) |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c new file mode 100644 index 000000000000..8f2251d2a3f8 --- /dev/null +++ b/arch/x86/xen/p2m.c | |||
@@ -0,0 +1,510 @@ | |||
1 | /* | ||
2 | * Xen leaves the responsibility for maintaining p2m mappings to the | ||
3 | * guests themselves, but it must also access and update the p2m array | ||
4 | * during suspend/resume when all the pages are reallocated. | ||
5 | * | ||
6 | * The p2m table is logically a flat array, but we implement it as a | ||
7 | * three-level tree to allow the address space to be sparse. | ||
8 | * | ||
9 | * Xen | ||
10 | * | | ||
11 | * p2m_top p2m_top_mfn | ||
12 | * / \ / \ | ||
13 | * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn | ||
14 | * / \ / \ / / | ||
15 | * p2m p2m p2m p2m p2m p2m p2m ... | ||
16 | * | ||
17 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | ||
18 | * | ||
19 | * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the | ||
20 | * maximum representable pseudo-physical address space is: | ||
21 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages | ||
22 | * | ||
23 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | ||
24 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | ||
25 | * 512 and 1024 entries respectively. | ||
26 | */ | ||
27 | |||
28 | #include <linux/init.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/list.h> | ||
31 | #include <linux/hash.h> | ||
32 | #include <linux/sched.h> | ||
33 | |||
34 | #include <asm/cache.h> | ||
35 | #include <asm/setup.h> | ||
36 | |||
37 | #include <asm/xen/page.h> | ||
38 | #include <asm/xen/hypercall.h> | ||
39 | #include <asm/xen/hypervisor.h> | ||
40 | |||
41 | #include "xen-ops.h" | ||
42 | |||
43 | static void __init m2p_override_init(void); | ||
44 | |||
45 | unsigned long xen_max_p2m_pfn __read_mostly; | ||
46 | |||
47 | #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
48 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) | ||
49 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | ||
50 | |||
51 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) | ||
52 | |||
53 | /* Placeholders for holes in the address space */ | ||
54 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); | ||
55 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); | ||
56 | static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); | ||
57 | |||
58 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); | ||
59 | static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); | ||
60 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); | ||
61 | |||
62 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
63 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
64 | |||
65 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
66 | { | ||
67 | BUG_ON(pfn >= MAX_P2M_PFN); | ||
68 | return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); | ||
69 | } | ||
70 | |||
71 | static inline unsigned p2m_mid_index(unsigned long pfn) | ||
72 | { | ||
73 | return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; | ||
74 | } | ||
75 | |||
76 | static inline unsigned p2m_index(unsigned long pfn) | ||
77 | { | ||
78 | return pfn % P2M_PER_PAGE; | ||
79 | } | ||
80 | |||
81 | static void p2m_top_init(unsigned long ***top) | ||
82 | { | ||
83 | unsigned i; | ||
84 | |||
85 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
86 | top[i] = p2m_mid_missing; | ||
87 | } | ||
88 | |||
89 | static void p2m_top_mfn_init(unsigned long *top) | ||
90 | { | ||
91 | unsigned i; | ||
92 | |||
93 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
94 | top[i] = virt_to_mfn(p2m_mid_missing_mfn); | ||
95 | } | ||
96 | |||
97 | static void p2m_top_mfn_p_init(unsigned long **top) | ||
98 | { | ||
99 | unsigned i; | ||
100 | |||
101 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
102 | top[i] = p2m_mid_missing_mfn; | ||
103 | } | ||
104 | |||
105 | static void p2m_mid_init(unsigned long **mid) | ||
106 | { | ||
107 | unsigned i; | ||
108 | |||
109 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
110 | mid[i] = p2m_missing; | ||
111 | } | ||
112 | |||
113 | static void p2m_mid_mfn_init(unsigned long *mid) | ||
114 | { | ||
115 | unsigned i; | ||
116 | |||
117 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
118 | mid[i] = virt_to_mfn(p2m_missing); | ||
119 | } | ||
120 | |||
121 | static void p2m_init(unsigned long *p2m) | ||
122 | { | ||
123 | unsigned i; | ||
124 | |||
125 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
126 | p2m[i] = INVALID_P2M_ENTRY; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | ||
131 | * | ||
132 | * This is called both at boot time, and after resuming from suspend: | ||
133 | * - At boot time we're called very early, and must use extend_brk() | ||
134 | * to allocate memory. | ||
135 | * | ||
136 | * - After resume we're called from within stop_machine, but the mfn | ||
137 | * tree should alreay be completely allocated. | ||
138 | */ | ||
139 | void xen_build_mfn_list_list(void) | ||
140 | { | ||
141 | unsigned long pfn; | ||
142 | |||
143 | /* Pre-initialize p2m_top_mfn to be completely missing */ | ||
144 | if (p2m_top_mfn == NULL) { | ||
145 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
146 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
147 | |||
148 | p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
149 | p2m_top_mfn_p_init(p2m_top_mfn_p); | ||
150 | |||
151 | p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
152 | p2m_top_mfn_init(p2m_top_mfn); | ||
153 | } else { | ||
154 | /* Reinitialise, mfn's all change after migration */ | ||
155 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
156 | } | ||
157 | |||
158 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { | ||
159 | unsigned topidx = p2m_top_index(pfn); | ||
160 | unsigned mididx = p2m_mid_index(pfn); | ||
161 | unsigned long **mid; | ||
162 | unsigned long *mid_mfn_p; | ||
163 | |||
164 | mid = p2m_top[topidx]; | ||
165 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
166 | |||
167 | /* Don't bother allocating any mfn mid levels if | ||
168 | * they're just missing, just update the stored mfn, | ||
169 | * since all could have changed over a migrate. | ||
170 | */ | ||
171 | if (mid == p2m_mid_missing) { | ||
172 | BUG_ON(mididx); | ||
173 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
174 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | ||
175 | pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; | ||
176 | continue; | ||
177 | } | ||
178 | |||
179 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
180 | /* | ||
181 | * XXX boot-time only! We should never find | ||
182 | * missing parts of the mfn tree after | ||
183 | * runtime. extend_brk() will BUG if we call | ||
184 | * it too late. | ||
185 | */ | ||
186 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
187 | p2m_mid_mfn_init(mid_mfn_p); | ||
188 | |||
189 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
190 | } | ||
191 | |||
192 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
193 | mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); | ||
194 | } | ||
195 | } | ||
196 | |||
197 | void xen_setup_mfn_list_list(void) | ||
198 | { | ||
199 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
200 | |||
201 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
202 | virt_to_mfn(p2m_top_mfn); | ||
203 | HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; | ||
204 | } | ||
205 | |||
206 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
207 | void __init xen_build_dynamic_phys_to_machine(void) | ||
208 | { | ||
209 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
210 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
211 | unsigned long pfn; | ||
212 | |||
213 | xen_max_p2m_pfn = max_pfn; | ||
214 | |||
215 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
216 | p2m_init(p2m_missing); | ||
217 | |||
218 | p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
219 | p2m_mid_init(p2m_mid_missing); | ||
220 | |||
221 | p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
222 | p2m_top_init(p2m_top); | ||
223 | |||
224 | /* | ||
225 | * The domain builder gives us a pre-constructed p2m array in | ||
226 | * mfn_list for all the pages initially given to us, so we just | ||
227 | * need to graft that into our tree structure. | ||
228 | */ | ||
229 | for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { | ||
230 | unsigned topidx = p2m_top_index(pfn); | ||
231 | unsigned mididx = p2m_mid_index(pfn); | ||
232 | |||
233 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
234 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
235 | p2m_mid_init(mid); | ||
236 | |||
237 | p2m_top[topidx] = mid; | ||
238 | } | ||
239 | |||
240 | p2m_top[topidx][mididx] = &mfn_list[pfn]; | ||
241 | } | ||
242 | |||
243 | m2p_override_init(); | ||
244 | } | ||
245 | |||
246 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
247 | { | ||
248 | unsigned topidx, mididx, idx; | ||
249 | |||
250 | if (unlikely(pfn >= MAX_P2M_PFN)) | ||
251 | return INVALID_P2M_ENTRY; | ||
252 | |||
253 | topidx = p2m_top_index(pfn); | ||
254 | mididx = p2m_mid_index(pfn); | ||
255 | idx = p2m_index(pfn); | ||
256 | |||
257 | return p2m_top[topidx][mididx][idx]; | ||
258 | } | ||
259 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
260 | |||
261 | static void *alloc_p2m_page(void) | ||
262 | { | ||
263 | return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); | ||
264 | } | ||
265 | |||
266 | static void free_p2m_page(void *p) | ||
267 | { | ||
268 | free_page((unsigned long)p); | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * Fully allocate the p2m structure for a given pfn. We need to check | ||
273 | * that both the top and mid levels are allocated, and make sure the | ||
274 | * parallel mfn tree is kept in sync. We may race with other cpus, so | ||
275 | * the new pages are installed with cmpxchg; if we lose the race then | ||
276 | * simply free the page we allocated and use the one that's there. | ||
277 | */ | ||
278 | static bool alloc_p2m(unsigned long pfn) | ||
279 | { | ||
280 | unsigned topidx, mididx; | ||
281 | unsigned long ***top_p, **mid; | ||
282 | unsigned long *top_mfn_p, *mid_mfn; | ||
283 | |||
284 | topidx = p2m_top_index(pfn); | ||
285 | mididx = p2m_mid_index(pfn); | ||
286 | |||
287 | top_p = &p2m_top[topidx]; | ||
288 | mid = *top_p; | ||
289 | |||
290 | if (mid == p2m_mid_missing) { | ||
291 | /* Mid level is missing, allocate a new one */ | ||
292 | mid = alloc_p2m_page(); | ||
293 | if (!mid) | ||
294 | return false; | ||
295 | |||
296 | p2m_mid_init(mid); | ||
297 | |||
298 | if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) | ||
299 | free_p2m_page(mid); | ||
300 | } | ||
301 | |||
302 | top_mfn_p = &p2m_top_mfn[topidx]; | ||
303 | mid_mfn = p2m_top_mfn_p[topidx]; | ||
304 | |||
305 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); | ||
306 | |||
307 | if (mid_mfn == p2m_mid_missing_mfn) { | ||
308 | /* Separately check the mid mfn level */ | ||
309 | unsigned long missing_mfn; | ||
310 | unsigned long mid_mfn_mfn; | ||
311 | |||
312 | mid_mfn = alloc_p2m_page(); | ||
313 | if (!mid_mfn) | ||
314 | return false; | ||
315 | |||
316 | p2m_mid_mfn_init(mid_mfn); | ||
317 | |||
318 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | ||
319 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | ||
320 | if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) | ||
321 | free_p2m_page(mid_mfn); | ||
322 | else | ||
323 | p2m_top_mfn_p[topidx] = mid_mfn; | ||
324 | } | ||
325 | |||
326 | if (p2m_top[topidx][mididx] == p2m_missing) { | ||
327 | /* p2m leaf page is missing */ | ||
328 | unsigned long *p2m; | ||
329 | |||
330 | p2m = alloc_p2m_page(); | ||
331 | if (!p2m) | ||
332 | return false; | ||
333 | |||
334 | p2m_init(p2m); | ||
335 | |||
336 | if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) | ||
337 | free_p2m_page(p2m); | ||
338 | else | ||
339 | mid_mfn[mididx] = virt_to_mfn(p2m); | ||
340 | } | ||
341 | |||
342 | return true; | ||
343 | } | ||
344 | |||
345 | /* Try to install p2m mapping; fail if intermediate bits missing */ | ||
346 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
347 | { | ||
348 | unsigned topidx, mididx, idx; | ||
349 | |||
350 | if (unlikely(pfn >= MAX_P2M_PFN)) { | ||
351 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
352 | return true; | ||
353 | } | ||
354 | |||
355 | topidx = p2m_top_index(pfn); | ||
356 | mididx = p2m_mid_index(pfn); | ||
357 | idx = p2m_index(pfn); | ||
358 | |||
359 | if (p2m_top[topidx][mididx] == p2m_missing) | ||
360 | return mfn == INVALID_P2M_ENTRY; | ||
361 | |||
362 | p2m_top[topidx][mididx][idx] = mfn; | ||
363 | |||
364 | return true; | ||
365 | } | ||
366 | |||
367 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
368 | { | ||
369 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
370 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
371 | return true; | ||
372 | } | ||
373 | |||
374 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | ||
375 | if (!alloc_p2m(pfn)) | ||
376 | return false; | ||
377 | |||
378 | if (!__set_phys_to_machine(pfn, mfn)) | ||
379 | return false; | ||
380 | } | ||
381 | |||
382 | return true; | ||
383 | } | ||
384 | |||
385 | #define M2P_OVERRIDE_HASH_SHIFT 10 | ||
386 | #define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT) | ||
387 | |||
388 | static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH); | ||
389 | static DEFINE_SPINLOCK(m2p_override_lock); | ||
390 | |||
391 | static void __init m2p_override_init(void) | ||
392 | { | ||
393 | unsigned i; | ||
394 | |||
395 | m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, | ||
396 | sizeof(unsigned long)); | ||
397 | |||
398 | for (i = 0; i < M2P_OVERRIDE_HASH; i++) | ||
399 | INIT_LIST_HEAD(&m2p_overrides[i]); | ||
400 | } | ||
401 | |||
402 | static unsigned long mfn_hash(unsigned long mfn) | ||
403 | { | ||
404 | return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); | ||
405 | } | ||
406 | |||
407 | /* Add an MFN override for a particular page */ | ||
408 | int m2p_add_override(unsigned long mfn, struct page *page) | ||
409 | { | ||
410 | unsigned long flags; | ||
411 | unsigned long pfn; | ||
412 | unsigned long address; | ||
413 | unsigned level; | ||
414 | pte_t *ptep = NULL; | ||
415 | |||
416 | pfn = page_to_pfn(page); | ||
417 | if (!PageHighMem(page)) { | ||
418 | address = (unsigned long)__va(pfn << PAGE_SHIFT); | ||
419 | ptep = lookup_address(address, &level); | ||
420 | |||
421 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, | ||
422 | "m2p_add_override: pfn %lx not mapped", pfn)) | ||
423 | return -EINVAL; | ||
424 | } | ||
425 | |||
426 | page->private = mfn; | ||
427 | page->index = pfn_to_mfn(pfn); | ||
428 | |||
429 | __set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); | ||
430 | if (!PageHighMem(page)) | ||
431 | /* Just zap old mapping for now */ | ||
432 | pte_clear(&init_mm, address, ptep); | ||
433 | |||
434 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
435 | list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); | ||
436 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
437 | |||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | int m2p_remove_override(struct page *page) | ||
442 | { | ||
443 | unsigned long flags; | ||
444 | unsigned long mfn; | ||
445 | unsigned long pfn; | ||
446 | unsigned long address; | ||
447 | unsigned level; | ||
448 | pte_t *ptep = NULL; | ||
449 | |||
450 | pfn = page_to_pfn(page); | ||
451 | mfn = get_phys_to_machine(pfn); | ||
452 | if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) | ||
453 | return -EINVAL; | ||
454 | |||
455 | if (!PageHighMem(page)) { | ||
456 | address = (unsigned long)__va(pfn << PAGE_SHIFT); | ||
457 | ptep = lookup_address(address, &level); | ||
458 | |||
459 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, | ||
460 | "m2p_remove_override: pfn %lx not mapped", pfn)) | ||
461 | return -EINVAL; | ||
462 | } | ||
463 | |||
464 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
465 | list_del(&page->lru); | ||
466 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
467 | __set_phys_to_machine(pfn, page->index); | ||
468 | |||
469 | if (!PageHighMem(page)) | ||
470 | set_pte_at(&init_mm, address, ptep, | ||
471 | pfn_pte(pfn, PAGE_KERNEL)); | ||
472 | /* No tlb flush necessary because the caller already | ||
473 | * left the pte unmapped. */ | ||
474 | |||
475 | return 0; | ||
476 | } | ||
477 | |||
478 | struct page *m2p_find_override(unsigned long mfn) | ||
479 | { | ||
480 | unsigned long flags; | ||
481 | struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; | ||
482 | struct page *p, *ret; | ||
483 | |||
484 | ret = NULL; | ||
485 | |||
486 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
487 | |||
488 | list_for_each_entry(p, bucket, lru) { | ||
489 | if (p->private == mfn) { | ||
490 | ret = p; | ||
491 | break; | ||
492 | } | ||
493 | } | ||
494 | |||
495 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
496 | |||
497 | return ret; | ||
498 | } | ||
499 | |||
500 | unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) | ||
501 | { | ||
502 | struct page *p = m2p_find_override(mfn); | ||
503 | unsigned long ret = pfn; | ||
504 | |||
505 | if (p) | ||
506 | ret = page_to_pfn(p); | ||
507 | |||
508 | return ret; | ||
509 | } | ||
510 | EXPORT_SYMBOL_GPL(m2p_find_override_pfn); | ||
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 23e061b9327b..cc9b1e182fcf 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -159,8 +159,8 @@ static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) | |||
159 | { | 159 | { |
160 | struct xen_spinlock *prev; | 160 | struct xen_spinlock *prev; |
161 | 161 | ||
162 | prev = __get_cpu_var(lock_spinners); | 162 | prev = __this_cpu_read(lock_spinners); |
163 | __get_cpu_var(lock_spinners) = xl; | 163 | __this_cpu_write(lock_spinners, xl); |
164 | 164 | ||
165 | wmb(); /* set lock of interest before count */ | 165 | wmb(); /* set lock of interest before count */ |
166 | 166 | ||
@@ -179,14 +179,14 @@ static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock | |||
179 | asm(LOCK_PREFIX " decw %0" | 179 | asm(LOCK_PREFIX " decw %0" |
180 | : "+m" (xl->spinners) : : "memory"); | 180 | : "+m" (xl->spinners) : : "memory"); |
181 | wmb(); /* decrement count before restoring lock */ | 181 | wmb(); /* decrement count before restoring lock */ |
182 | __get_cpu_var(lock_spinners) = prev; | 182 | __this_cpu_write(lock_spinners, prev); |
183 | } | 183 | } |
184 | 184 | ||
185 | static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) | 185 | static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) |
186 | { | 186 | { |
187 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | 187 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; |
188 | struct xen_spinlock *prev; | 188 | struct xen_spinlock *prev; |
189 | int irq = __get_cpu_var(lock_kicker_irq); | 189 | int irq = __this_cpu_read(lock_kicker_irq); |
190 | int ret; | 190 | int ret; |
191 | u64 start; | 191 | u64 start; |
192 | 192 | ||
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 5da5e53fb94c..067759e3d6a5 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -135,24 +135,24 @@ static void do_stolen_accounting(void) | |||
135 | 135 | ||
136 | /* Add the appropriate number of ticks of stolen time, | 136 | /* Add the appropriate number of ticks of stolen time, |
137 | including any left-overs from last time. */ | 137 | including any left-overs from last time. */ |
138 | stolen = runnable + offline + __get_cpu_var(xen_residual_stolen); | 138 | stolen = runnable + offline + __this_cpu_read(xen_residual_stolen); |
139 | 139 | ||
140 | if (stolen < 0) | 140 | if (stolen < 0) |
141 | stolen = 0; | 141 | stolen = 0; |
142 | 142 | ||
143 | ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); | 143 | ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); |
144 | __get_cpu_var(xen_residual_stolen) = stolen; | 144 | __this_cpu_write(xen_residual_stolen, stolen); |
145 | account_steal_ticks(ticks); | 145 | account_steal_ticks(ticks); |
146 | 146 | ||
147 | /* Add the appropriate number of ticks of blocked time, | 147 | /* Add the appropriate number of ticks of blocked time, |
148 | including any left-overs from last time. */ | 148 | including any left-overs from last time. */ |
149 | blocked += __get_cpu_var(xen_residual_blocked); | 149 | blocked += __this_cpu_read(xen_residual_blocked); |
150 | 150 | ||
151 | if (blocked < 0) | 151 | if (blocked < 0) |
152 | blocked = 0; | 152 | blocked = 0; |
153 | 153 | ||
154 | ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); | 154 | ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); |
155 | __get_cpu_var(xen_residual_blocked) = blocked; | 155 | __this_cpu_write(xen_residual_blocked, blocked); |
156 | account_idle_ticks(ticks); | 156 | account_idle_ticks(ticks); |
157 | } | 157 | } |
158 | 158 | ||