diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/Kconfig | 3 | ||||
-rw-r--r-- | arch/x86/kvm/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 589 | ||||
-rw-r--r-- | arch/x86/kvm/i8254.c | 41 | ||||
-rw-r--r-- | arch/x86/kvm/i8254.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/i8259.c | 87 | ||||
-rw-r--r-- | arch/x86/kvm/irq.h | 10 | ||||
-rw-r--r-- | arch/x86/kvm/kvm_cache_regs.h | 31 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 52 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.h | 8 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 152 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 35 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 36 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 660 | ||||
-rw-r--r-- | arch/x86/kvm/trace.h | 224 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 862 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 1680 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 30 |
18 files changed, 3159 insertions, 1346 deletions
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b84e571f4175..970bbd479516 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -28,6 +28,8 @@ config KVM | |||
28 | select HAVE_KVM_IRQCHIP | 28 | select HAVE_KVM_IRQCHIP |
29 | select HAVE_KVM_EVENTFD | 29 | select HAVE_KVM_EVENTFD |
30 | select KVM_APIC_ARCHITECTURE | 30 | select KVM_APIC_ARCHITECTURE |
31 | select USER_RETURN_NOTIFIER | ||
32 | select KVM_MMIO | ||
31 | ---help--- | 33 | ---help--- |
32 | Support hosting fully virtualized guest machines using hardware | 34 | Support hosting fully virtualized guest machines using hardware |
33 | virtualization extensions. You will need a fairly recent | 35 | virtualization extensions. You will need a fairly recent |
@@ -64,6 +66,7 @@ config KVM_AMD | |||
64 | 66 | ||
65 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under | 67 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under |
66 | # the virtualization menu. | 68 | # the virtualization menu. |
69 | source drivers/vhost/Kconfig | ||
67 | source drivers/lguest/Kconfig | 70 | source drivers/lguest/Kconfig |
68 | source drivers/virtio/Kconfig | 71 | source drivers/virtio/Kconfig |
69 | 72 | ||
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 0e7fe78d0f74..31a7035c4bd9 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -6,7 +6,8 @@ CFLAGS_svm.o := -I. | |||
6 | CFLAGS_vmx.o := -I. | 6 | CFLAGS_vmx.o := -I. |
7 | 7 | ||
8 | kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | 8 | kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ |
9 | coalesced_mmio.o irq_comm.o eventfd.o) | 9 | coalesced_mmio.o irq_comm.o eventfd.o \ |
10 | assigned-dev.o) | ||
10 | kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) | 11 | kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) |
11 | 12 | ||
12 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 13 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1be5cd640e93..4dade6ac0827 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -32,7 +32,7 @@ | |||
32 | #include <linux/module.h> | 32 | #include <linux/module.h> |
33 | #include <asm/kvm_emulate.h> | 33 | #include <asm/kvm_emulate.h> |
34 | 34 | ||
35 | #include "mmu.h" /* for is_long_mode() */ | 35 | #include "x86.h" |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * Opcode effective-address decode tables. | 38 | * Opcode effective-address decode tables. |
@@ -75,6 +75,10 @@ | |||
75 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | 75 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ |
76 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | 76 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ |
77 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ | 77 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ |
78 | /* Misc flags */ | ||
79 | #define Lock (1<<26) /* lock prefix is allowed for the instruction */ | ||
80 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ | ||
81 | #define No64 (1<<28) | ||
78 | /* Source 2 operand type */ | 82 | /* Source 2 operand type */ |
79 | #define Src2None (0<<29) | 83 | #define Src2None (0<<29) |
80 | #define Src2CL (1<<29) | 84 | #define Src2CL (1<<29) |
@@ -86,35 +90,40 @@ | |||
86 | enum { | 90 | enum { |
87 | Group1_80, Group1_81, Group1_82, Group1_83, | 91 | Group1_80, Group1_81, Group1_82, Group1_83, |
88 | Group1A, Group3_Byte, Group3, Group4, Group5, Group7, | 92 | Group1A, Group3_Byte, Group3, Group4, Group5, Group7, |
93 | Group8, Group9, | ||
89 | }; | 94 | }; |
90 | 95 | ||
91 | static u32 opcode_table[256] = { | 96 | static u32 opcode_table[256] = { |
92 | /* 0x00 - 0x07 */ | 97 | /* 0x00 - 0x07 */ |
93 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 98 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
94 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 99 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
95 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, | 100 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
101 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | ||
96 | /* 0x08 - 0x0F */ | 102 | /* 0x08 - 0x0F */ |
97 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 103 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
98 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 104 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
99 | 0, 0, 0, 0, | 105 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
106 | ImplicitOps | Stack | No64, 0, | ||
100 | /* 0x10 - 0x17 */ | 107 | /* 0x10 - 0x17 */ |
101 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 108 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
102 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 109 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
103 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, | 110 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
111 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | ||
104 | /* 0x18 - 0x1F */ | 112 | /* 0x18 - 0x1F */ |
105 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 113 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
106 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 114 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
107 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, | 115 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
116 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | ||
108 | /* 0x20 - 0x27 */ | 117 | /* 0x20 - 0x27 */ |
109 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 118 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
110 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 119 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
111 | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, | 120 | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, |
112 | /* 0x28 - 0x2F */ | 121 | /* 0x28 - 0x2F */ |
113 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 122 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
114 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 123 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
115 | 0, 0, 0, 0, | 124 | 0, 0, 0, 0, |
116 | /* 0x30 - 0x37 */ | 125 | /* 0x30 - 0x37 */ |
117 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 126 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
118 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 127 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
119 | 0, 0, 0, 0, | 128 | 0, 0, 0, 0, |
120 | /* 0x38 - 0x3F */ | 129 | /* 0x38 - 0x3F */ |
@@ -133,7 +142,8 @@ static u32 opcode_table[256] = { | |||
133 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, | 142 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, |
134 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, | 143 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, |
135 | /* 0x60 - 0x67 */ | 144 | /* 0x60 - 0x67 */ |
136 | 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , | 145 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, |
146 | 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , | ||
137 | 0, 0, 0, 0, | 147 | 0, 0, 0, 0, |
138 | /* 0x68 - 0x6F */ | 148 | /* 0x68 - 0x6F */ |
139 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, | 149 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, |
@@ -149,7 +159,7 @@ static u32 opcode_table[256] = { | |||
149 | Group | Group1_80, Group | Group1_81, | 159 | Group | Group1_80, Group | Group1_81, |
150 | Group | Group1_82, Group | Group1_83, | 160 | Group | Group1_82, Group | Group1_83, |
151 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 161 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
152 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 162 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
153 | /* 0x88 - 0x8F */ | 163 | /* 0x88 - 0x8F */ |
154 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, | 164 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, |
155 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, | 165 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, |
@@ -158,7 +168,7 @@ static u32 opcode_table[256] = { | |||
158 | /* 0x90 - 0x97 */ | 168 | /* 0x90 - 0x97 */ |
159 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, | 169 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, |
160 | /* 0x98 - 0x9F */ | 170 | /* 0x98 - 0x9F */ |
161 | 0, 0, SrcImm | Src2Imm16, 0, | 171 | 0, 0, SrcImm | Src2Imm16 | No64, 0, |
162 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, | 172 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, |
163 | /* 0xA0 - 0xA7 */ | 173 | /* 0xA0 - 0xA7 */ |
164 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 174 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, |
@@ -185,7 +195,7 @@ static u32 opcode_table[256] = { | |||
185 | ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, | 195 | ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, |
186 | /* 0xC8 - 0xCF */ | 196 | /* 0xC8 - 0xCF */ |
187 | 0, 0, 0, ImplicitOps | Stack, | 197 | 0, 0, 0, ImplicitOps | Stack, |
188 | ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps, | 198 | ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps, |
189 | /* 0xD0 - 0xD7 */ | 199 | /* 0xD0 - 0xD7 */ |
190 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, | 200 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, |
191 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, | 201 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, |
@@ -198,12 +208,12 @@ static u32 opcode_table[256] = { | |||
198 | ByteOp | SrcImmUByte, SrcImmUByte, | 208 | ByteOp | SrcImmUByte, SrcImmUByte, |
199 | /* 0xE8 - 0xEF */ | 209 | /* 0xE8 - 0xEF */ |
200 | SrcImm | Stack, SrcImm | ImplicitOps, | 210 | SrcImm | Stack, SrcImm | ImplicitOps, |
201 | SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps, | 211 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, |
202 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 212 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
203 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 213 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
204 | /* 0xF0 - 0xF7 */ | 214 | /* 0xF0 - 0xF7 */ |
205 | 0, 0, 0, 0, | 215 | 0, 0, 0, 0, |
206 | ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3, | 216 | ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, |
207 | /* 0xF8 - 0xFF */ | 217 | /* 0xF8 - 0xFF */ |
208 | ImplicitOps, 0, ImplicitOps, ImplicitOps, | 218 | ImplicitOps, 0, ImplicitOps, ImplicitOps, |
209 | ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, | 219 | ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, |
@@ -211,16 +221,20 @@ static u32 opcode_table[256] = { | |||
211 | 221 | ||
212 | static u32 twobyte_table[256] = { | 222 | static u32 twobyte_table[256] = { |
213 | /* 0x00 - 0x0F */ | 223 | /* 0x00 - 0x0F */ |
214 | 0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0, | 224 | 0, Group | GroupDual | Group7, 0, 0, |
215 | ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, | 225 | 0, ImplicitOps, ImplicitOps | Priv, 0, |
226 | ImplicitOps | Priv, ImplicitOps | Priv, 0, 0, | ||
227 | 0, ImplicitOps | ModRM, 0, 0, | ||
216 | /* 0x10 - 0x1F */ | 228 | /* 0x10 - 0x1F */ |
217 | 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, | 229 | 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, |
218 | /* 0x20 - 0x2F */ | 230 | /* 0x20 - 0x2F */ |
219 | ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0, | 231 | ModRM | ImplicitOps | Priv, ModRM | Priv, |
232 | ModRM | ImplicitOps | Priv, ModRM | Priv, | ||
233 | 0, 0, 0, 0, | ||
220 | 0, 0, 0, 0, 0, 0, 0, 0, | 234 | 0, 0, 0, 0, 0, 0, 0, 0, |
221 | /* 0x30 - 0x3F */ | 235 | /* 0x30 - 0x3F */ |
222 | ImplicitOps, 0, ImplicitOps, 0, | 236 | ImplicitOps | Priv, 0, ImplicitOps | Priv, 0, |
223 | ImplicitOps, ImplicitOps, 0, 0, | 237 | ImplicitOps, ImplicitOps | Priv, 0, 0, |
224 | 0, 0, 0, 0, 0, 0, 0, 0, | 238 | 0, 0, 0, 0, 0, 0, 0, 0, |
225 | /* 0x40 - 0x47 */ | 239 | /* 0x40 - 0x47 */ |
226 | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, | 240 | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, |
@@ -244,25 +258,29 @@ static u32 twobyte_table[256] = { | |||
244 | /* 0x90 - 0x9F */ | 258 | /* 0x90 - 0x9F */ |
245 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 259 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
246 | /* 0xA0 - 0xA7 */ | 260 | /* 0xA0 - 0xA7 */ |
247 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, | 261 | ImplicitOps | Stack, ImplicitOps | Stack, |
262 | 0, DstMem | SrcReg | ModRM | BitOp, | ||
248 | DstMem | SrcReg | Src2ImmByte | ModRM, | 263 | DstMem | SrcReg | Src2ImmByte | ModRM, |
249 | DstMem | SrcReg | Src2CL | ModRM, 0, 0, | 264 | DstMem | SrcReg | Src2CL | ModRM, 0, 0, |
250 | /* 0xA8 - 0xAF */ | 265 | /* 0xA8 - 0xAF */ |
251 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, | 266 | ImplicitOps | Stack, ImplicitOps | Stack, |
267 | 0, DstMem | SrcReg | ModRM | BitOp | Lock, | ||
252 | DstMem | SrcReg | Src2ImmByte | ModRM, | 268 | DstMem | SrcReg | Src2ImmByte | ModRM, |
253 | DstMem | SrcReg | Src2CL | ModRM, | 269 | DstMem | SrcReg | Src2CL | ModRM, |
254 | ModRM, 0, | 270 | ModRM, 0, |
255 | /* 0xB0 - 0xB7 */ | 271 | /* 0xB0 - 0xB7 */ |
256 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, | 272 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
257 | DstMem | SrcReg | ModRM | BitOp, | 273 | 0, DstMem | SrcReg | ModRM | BitOp | Lock, |
258 | 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, | 274 | 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, |
259 | DstReg | SrcMem16 | ModRM | Mov, | 275 | DstReg | SrcMem16 | ModRM | Mov, |
260 | /* 0xB8 - 0xBF */ | 276 | /* 0xB8 - 0xBF */ |
261 | 0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp, | 277 | 0, 0, |
278 | Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock, | ||
262 | 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, | 279 | 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, |
263 | DstReg | SrcMem16 | ModRM | Mov, | 280 | DstReg | SrcMem16 | ModRM | Mov, |
264 | /* 0xC0 - 0xCF */ | 281 | /* 0xC0 - 0xCF */ |
265 | 0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM, | 282 | 0, 0, 0, DstMem | SrcReg | ModRM | Mov, |
283 | 0, 0, 0, Group | GroupDual | Group9, | ||
266 | 0, 0, 0, 0, 0, 0, 0, 0, | 284 | 0, 0, 0, 0, 0, 0, 0, 0, |
267 | /* 0xD0 - 0xDF */ | 285 | /* 0xD0 - 0xDF */ |
268 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 286 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
@@ -274,25 +292,41 @@ static u32 twobyte_table[256] = { | |||
274 | 292 | ||
275 | static u32 group_table[] = { | 293 | static u32 group_table[] = { |
276 | [Group1_80*8] = | 294 | [Group1_80*8] = |
277 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 295 | ByteOp | DstMem | SrcImm | ModRM | Lock, |
278 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 296 | ByteOp | DstMem | SrcImm | ModRM | Lock, |
279 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 297 | ByteOp | DstMem | SrcImm | ModRM | Lock, |
280 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 298 | ByteOp | DstMem | SrcImm | ModRM | Lock, |
299 | ByteOp | DstMem | SrcImm | ModRM | Lock, | ||
300 | ByteOp | DstMem | SrcImm | ModRM | Lock, | ||
301 | ByteOp | DstMem | SrcImm | ModRM | Lock, | ||
302 | ByteOp | DstMem | SrcImm | ModRM, | ||
281 | [Group1_81*8] = | 303 | [Group1_81*8] = |
282 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | 304 | DstMem | SrcImm | ModRM | Lock, |
283 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | 305 | DstMem | SrcImm | ModRM | Lock, |
284 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | 306 | DstMem | SrcImm | ModRM | Lock, |
285 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | 307 | DstMem | SrcImm | ModRM | Lock, |
308 | DstMem | SrcImm | ModRM | Lock, | ||
309 | DstMem | SrcImm | ModRM | Lock, | ||
310 | DstMem | SrcImm | ModRM | Lock, | ||
311 | DstMem | SrcImm | ModRM, | ||
286 | [Group1_82*8] = | 312 | [Group1_82*8] = |
287 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 313 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, |
288 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 314 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, |
289 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 315 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, |
290 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 316 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, |
317 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, | ||
318 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, | ||
319 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, | ||
320 | ByteOp | DstMem | SrcImm | ModRM | No64, | ||
291 | [Group1_83*8] = | 321 | [Group1_83*8] = |
292 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | 322 | DstMem | SrcImmByte | ModRM | Lock, |
293 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | 323 | DstMem | SrcImmByte | ModRM | Lock, |
294 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | 324 | DstMem | SrcImmByte | ModRM | Lock, |
295 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | 325 | DstMem | SrcImmByte | ModRM | Lock, |
326 | DstMem | SrcImmByte | ModRM | Lock, | ||
327 | DstMem | SrcImmByte | ModRM | Lock, | ||
328 | DstMem | SrcImmByte | ModRM | Lock, | ||
329 | DstMem | SrcImmByte | ModRM, | ||
296 | [Group1A*8] = | 330 | [Group1A*8] = |
297 | DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, | 331 | DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, |
298 | [Group3_Byte*8] = | 332 | [Group3_Byte*8] = |
@@ -311,24 +345,39 @@ static u32 group_table[] = { | |||
311 | SrcMem | ModRM | Stack, 0, | 345 | SrcMem | ModRM | Stack, 0, |
312 | SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, | 346 | SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, |
313 | [Group7*8] = | 347 | [Group7*8] = |
314 | 0, 0, ModRM | SrcMem, ModRM | SrcMem, | 348 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, |
315 | SrcNone | ModRM | DstMem | Mov, 0, | 349 | SrcNone | ModRM | DstMem | Mov, 0, |
316 | SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, | 350 | SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv, |
351 | [Group8*8] = | ||
352 | 0, 0, 0, 0, | ||
353 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, | ||
354 | DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, | ||
355 | [Group9*8] = | ||
356 | 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0, | ||
317 | }; | 357 | }; |
318 | 358 | ||
319 | static u32 group2_table[] = { | 359 | static u32 group2_table[] = { |
320 | [Group7*8] = | 360 | [Group7*8] = |
321 | SrcNone | ModRM, 0, 0, SrcNone | ModRM, | 361 | SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM, |
322 | SrcNone | ModRM | DstMem | Mov, 0, | 362 | SrcNone | ModRM | DstMem | Mov, 0, |
323 | SrcMem16 | ModRM | Mov, 0, | 363 | SrcMem16 | ModRM | Mov, 0, |
364 | [Group9*8] = | ||
365 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
324 | }; | 366 | }; |
325 | 367 | ||
326 | /* EFLAGS bit definitions. */ | 368 | /* EFLAGS bit definitions. */ |
369 | #define EFLG_ID (1<<21) | ||
370 | #define EFLG_VIP (1<<20) | ||
371 | #define EFLG_VIF (1<<19) | ||
372 | #define EFLG_AC (1<<18) | ||
327 | #define EFLG_VM (1<<17) | 373 | #define EFLG_VM (1<<17) |
328 | #define EFLG_RF (1<<16) | 374 | #define EFLG_RF (1<<16) |
375 | #define EFLG_IOPL (3<<12) | ||
376 | #define EFLG_NT (1<<14) | ||
329 | #define EFLG_OF (1<<11) | 377 | #define EFLG_OF (1<<11) |
330 | #define EFLG_DF (1<<10) | 378 | #define EFLG_DF (1<<10) |
331 | #define EFLG_IF (1<<9) | 379 | #define EFLG_IF (1<<9) |
380 | #define EFLG_TF (1<<8) | ||
332 | #define EFLG_SF (1<<7) | 381 | #define EFLG_SF (1<<7) |
333 | #define EFLG_ZF (1<<6) | 382 | #define EFLG_ZF (1<<6) |
334 | #define EFLG_AF (1<<4) | 383 | #define EFLG_AF (1<<4) |
@@ -597,7 +646,7 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | |||
597 | 646 | ||
598 | if (linear < fc->start || linear >= fc->end) { | 647 | if (linear < fc->start || linear >= fc->end) { |
599 | size = min(15UL, PAGE_SIZE - offset_in_page(linear)); | 648 | size = min(15UL, PAGE_SIZE - offset_in_page(linear)); |
600 | rc = ops->read_std(linear, fc->data, size, ctxt->vcpu); | 649 | rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL); |
601 | if (rc) | 650 | if (rc) |
602 | return rc; | 651 | return rc; |
603 | fc->start = linear; | 652 | fc->start = linear; |
@@ -613,6 +662,9 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | |||
613 | { | 662 | { |
614 | int rc = 0; | 663 | int rc = 0; |
615 | 664 | ||
665 | /* x86 instructions are limited to 15 bytes. */ | ||
666 | if (eip + size - ctxt->decode.eip_orig > 15) | ||
667 | return X86EMUL_UNHANDLEABLE; | ||
616 | eip += ctxt->cs_base; | 668 | eip += ctxt->cs_base; |
617 | while (size--) { | 669 | while (size--) { |
618 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); | 670 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); |
@@ -649,11 +701,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
649 | op_bytes = 3; | 701 | op_bytes = 3; |
650 | *address = 0; | 702 | *address = 0; |
651 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, | 703 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, |
652 | ctxt->vcpu); | 704 | ctxt->vcpu, NULL); |
653 | if (rc) | 705 | if (rc) |
654 | return rc; | 706 | return rc; |
655 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, | 707 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, |
656 | ctxt->vcpu); | 708 | ctxt->vcpu, NULL); |
657 | return rc; | 709 | return rc; |
658 | } | 710 | } |
659 | 711 | ||
@@ -871,12 +923,13 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
871 | /* Shadow copy of register state. Committed on successful emulation. */ | 923 | /* Shadow copy of register state. Committed on successful emulation. */ |
872 | 924 | ||
873 | memset(c, 0, sizeof(struct decode_cache)); | 925 | memset(c, 0, sizeof(struct decode_cache)); |
874 | c->eip = kvm_rip_read(ctxt->vcpu); | 926 | c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu); |
875 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | 927 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); |
876 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 928 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
877 | 929 | ||
878 | switch (mode) { | 930 | switch (mode) { |
879 | case X86EMUL_MODE_REAL: | 931 | case X86EMUL_MODE_REAL: |
932 | case X86EMUL_MODE_VM86: | ||
880 | case X86EMUL_MODE_PROT16: | 933 | case X86EMUL_MODE_PROT16: |
881 | def_op_bytes = def_ad_bytes = 2; | 934 | def_op_bytes = def_ad_bytes = 2; |
882 | break; | 935 | break; |
@@ -962,6 +1015,11 @@ done_prefixes: | |||
962 | } | 1015 | } |
963 | } | 1016 | } |
964 | 1017 | ||
1018 | if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
1019 | kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction"); | ||
1020 | return -1; | ||
1021 | } | ||
1022 | |||
965 | if (c->d & Group) { | 1023 | if (c->d & Group) { |
966 | group = c->d & GroupMask; | 1024 | group = c->d & GroupMask; |
967 | c->modrm = insn_fetch(u8, 1, c->eip); | 1025 | c->modrm = insn_fetch(u8, 1, c->eip); |
@@ -1179,13 +1237,119 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
1179 | rc = ops->read_emulated(register_address(c, ss_base(ctxt), | 1237 | rc = ops->read_emulated(register_address(c, ss_base(ctxt), |
1180 | c->regs[VCPU_REGS_RSP]), | 1238 | c->regs[VCPU_REGS_RSP]), |
1181 | dest, len, ctxt->vcpu); | 1239 | dest, len, ctxt->vcpu); |
1182 | if (rc != 0) | 1240 | if (rc != X86EMUL_CONTINUE) |
1183 | return rc; | 1241 | return rc; |
1184 | 1242 | ||
1185 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], len); | 1243 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], len); |
1186 | return rc; | 1244 | return rc; |
1187 | } | 1245 | } |
1188 | 1246 | ||
1247 | static int emulate_popf(struct x86_emulate_ctxt *ctxt, | ||
1248 | struct x86_emulate_ops *ops, | ||
1249 | void *dest, int len) | ||
1250 | { | ||
1251 | int rc; | ||
1252 | unsigned long val, change_mask; | ||
1253 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | ||
1254 | int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu); | ||
1255 | |||
1256 | rc = emulate_pop(ctxt, ops, &val, len); | ||
1257 | if (rc != X86EMUL_CONTINUE) | ||
1258 | return rc; | ||
1259 | |||
1260 | change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF | ||
1261 | | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID; | ||
1262 | |||
1263 | switch(ctxt->mode) { | ||
1264 | case X86EMUL_MODE_PROT64: | ||
1265 | case X86EMUL_MODE_PROT32: | ||
1266 | case X86EMUL_MODE_PROT16: | ||
1267 | if (cpl == 0) | ||
1268 | change_mask |= EFLG_IOPL; | ||
1269 | if (cpl <= iopl) | ||
1270 | change_mask |= EFLG_IF; | ||
1271 | break; | ||
1272 | case X86EMUL_MODE_VM86: | ||
1273 | if (iopl < 3) { | ||
1274 | kvm_inject_gp(ctxt->vcpu, 0); | ||
1275 | return X86EMUL_PROPAGATE_FAULT; | ||
1276 | } | ||
1277 | change_mask |= EFLG_IF; | ||
1278 | break; | ||
1279 | default: /* real mode */ | ||
1280 | change_mask |= (EFLG_IOPL | EFLG_IF); | ||
1281 | break; | ||
1282 | } | ||
1283 | |||
1284 | *(unsigned long *)dest = | ||
1285 | (ctxt->eflags & ~change_mask) | (val & change_mask); | ||
1286 | |||
1287 | return rc; | ||
1288 | } | ||
1289 | |||
1290 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) | ||
1291 | { | ||
1292 | struct decode_cache *c = &ctxt->decode; | ||
1293 | struct kvm_segment segment; | ||
1294 | |||
1295 | kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg); | ||
1296 | |||
1297 | c->src.val = segment.selector; | ||
1298 | emulate_push(ctxt); | ||
1299 | } | ||
1300 | |||
1301 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | ||
1302 | struct x86_emulate_ops *ops, int seg) | ||
1303 | { | ||
1304 | struct decode_cache *c = &ctxt->decode; | ||
1305 | unsigned long selector; | ||
1306 | int rc; | ||
1307 | |||
1308 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); | ||
1309 | if (rc != 0) | ||
1310 | return rc; | ||
1311 | |||
1312 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg); | ||
1313 | return rc; | ||
1314 | } | ||
1315 | |||
1316 | static void emulate_pusha(struct x86_emulate_ctxt *ctxt) | ||
1317 | { | ||
1318 | struct decode_cache *c = &ctxt->decode; | ||
1319 | unsigned long old_esp = c->regs[VCPU_REGS_RSP]; | ||
1320 | int reg = VCPU_REGS_RAX; | ||
1321 | |||
1322 | while (reg <= VCPU_REGS_RDI) { | ||
1323 | (reg == VCPU_REGS_RSP) ? | ||
1324 | (c->src.val = old_esp) : (c->src.val = c->regs[reg]); | ||
1325 | |||
1326 | emulate_push(ctxt); | ||
1327 | ++reg; | ||
1328 | } | ||
1329 | } | ||
1330 | |||
1331 | static int emulate_popa(struct x86_emulate_ctxt *ctxt, | ||
1332 | struct x86_emulate_ops *ops) | ||
1333 | { | ||
1334 | struct decode_cache *c = &ctxt->decode; | ||
1335 | int rc = 0; | ||
1336 | int reg = VCPU_REGS_RDI; | ||
1337 | |||
1338 | while (reg >= VCPU_REGS_RAX) { | ||
1339 | if (reg == VCPU_REGS_RSP) { | ||
1340 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], | ||
1341 | c->op_bytes); | ||
1342 | --reg; | ||
1343 | } | ||
1344 | |||
1345 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); | ||
1346 | if (rc != 0) | ||
1347 | break; | ||
1348 | --reg; | ||
1349 | } | ||
1350 | return rc; | ||
1351 | } | ||
1352 | |||
1189 | static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | 1353 | static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, |
1190 | struct x86_emulate_ops *ops) | 1354 | struct x86_emulate_ops *ops) |
1191 | { | 1355 | { |
@@ -1290,7 +1454,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | |||
1290 | int rc; | 1454 | int rc; |
1291 | 1455 | ||
1292 | rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); | 1456 | rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); |
1293 | if (rc != 0) | 1457 | if (rc != X86EMUL_CONTINUE) |
1294 | return rc; | 1458 | return rc; |
1295 | 1459 | ||
1296 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || | 1460 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || |
@@ -1305,7 +1469,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | |||
1305 | (u32) c->regs[VCPU_REGS_RBX]; | 1469 | (u32) c->regs[VCPU_REGS_RBX]; |
1306 | 1470 | ||
1307 | rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); | 1471 | rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); |
1308 | if (rc != 0) | 1472 | if (rc != X86EMUL_CONTINUE) |
1309 | return rc; | 1473 | return rc; |
1310 | ctxt->eflags |= EFLG_ZF; | 1474 | ctxt->eflags |= EFLG_ZF; |
1311 | } | 1475 | } |
@@ -1327,7 +1491,7 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | |||
1327 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); | 1491 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); |
1328 | if (rc) | 1492 | if (rc) |
1329 | return rc; | 1493 | return rc; |
1330 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS); | 1494 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS); |
1331 | return rc; | 1495 | return rc; |
1332 | } | 1496 | } |
1333 | 1497 | ||
@@ -1371,7 +1535,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1371 | &c->dst.val, | 1535 | &c->dst.val, |
1372 | c->dst.bytes, | 1536 | c->dst.bytes, |
1373 | ctxt->vcpu); | 1537 | ctxt->vcpu); |
1374 | if (rc != 0) | 1538 | if (rc != X86EMUL_CONTINUE) |
1375 | return rc; | 1539 | return rc; |
1376 | break; | 1540 | break; |
1377 | case OP_NONE: | 1541 | case OP_NONE: |
@@ -1434,9 +1598,8 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) | |||
1434 | u64 msr_data; | 1598 | u64 msr_data; |
1435 | 1599 | ||
1436 | /* syscall is not available in real mode */ | 1600 | /* syscall is not available in real mode */ |
1437 | if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL | 1601 | if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) |
1438 | || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) | 1602 | return X86EMUL_UNHANDLEABLE; |
1439 | return -1; | ||
1440 | 1603 | ||
1441 | setup_syscalls_segments(ctxt, &cs, &ss); | 1604 | setup_syscalls_segments(ctxt, &cs, &ss); |
1442 | 1605 | ||
@@ -1473,7 +1636,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) | |||
1473 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); | 1636 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); |
1474 | } | 1637 | } |
1475 | 1638 | ||
1476 | return 0; | 1639 | return X86EMUL_CONTINUE; |
1477 | } | 1640 | } |
1478 | 1641 | ||
1479 | static int | 1642 | static int |
@@ -1483,22 +1646,17 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
1483 | struct kvm_segment cs, ss; | 1646 | struct kvm_segment cs, ss; |
1484 | u64 msr_data; | 1647 | u64 msr_data; |
1485 | 1648 | ||
1486 | /* inject #UD if LOCK prefix is used */ | 1649 | /* inject #GP if in real mode */ |
1487 | if (c->lock_prefix) | 1650 | if (ctxt->mode == X86EMUL_MODE_REAL) { |
1488 | return -1; | ||
1489 | |||
1490 | /* inject #GP if in real mode or paging is disabled */ | ||
1491 | if (ctxt->mode == X86EMUL_MODE_REAL || | ||
1492 | !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) { | ||
1493 | kvm_inject_gp(ctxt->vcpu, 0); | 1651 | kvm_inject_gp(ctxt->vcpu, 0); |
1494 | return -1; | 1652 | return X86EMUL_UNHANDLEABLE; |
1495 | } | 1653 | } |
1496 | 1654 | ||
1497 | /* XXX sysenter/sysexit have not been tested in 64bit mode. | 1655 | /* XXX sysenter/sysexit have not been tested in 64bit mode. |
1498 | * Therefore, we inject an #UD. | 1656 | * Therefore, we inject an #UD. |
1499 | */ | 1657 | */ |
1500 | if (ctxt->mode == X86EMUL_MODE_PROT64) | 1658 | if (ctxt->mode == X86EMUL_MODE_PROT64) |
1501 | return -1; | 1659 | return X86EMUL_UNHANDLEABLE; |
1502 | 1660 | ||
1503 | setup_syscalls_segments(ctxt, &cs, &ss); | 1661 | setup_syscalls_segments(ctxt, &cs, &ss); |
1504 | 1662 | ||
@@ -1507,13 +1665,13 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
1507 | case X86EMUL_MODE_PROT32: | 1665 | case X86EMUL_MODE_PROT32: |
1508 | if ((msr_data & 0xfffc) == 0x0) { | 1666 | if ((msr_data & 0xfffc) == 0x0) { |
1509 | kvm_inject_gp(ctxt->vcpu, 0); | 1667 | kvm_inject_gp(ctxt->vcpu, 0); |
1510 | return -1; | 1668 | return X86EMUL_PROPAGATE_FAULT; |
1511 | } | 1669 | } |
1512 | break; | 1670 | break; |
1513 | case X86EMUL_MODE_PROT64: | 1671 | case X86EMUL_MODE_PROT64: |
1514 | if (msr_data == 0x0) { | 1672 | if (msr_data == 0x0) { |
1515 | kvm_inject_gp(ctxt->vcpu, 0); | 1673 | kvm_inject_gp(ctxt->vcpu, 0); |
1516 | return -1; | 1674 | return X86EMUL_PROPAGATE_FAULT; |
1517 | } | 1675 | } |
1518 | break; | 1676 | break; |
1519 | } | 1677 | } |
@@ -1538,7 +1696,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
1538 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); | 1696 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); |
1539 | c->regs[VCPU_REGS_RSP] = msr_data; | 1697 | c->regs[VCPU_REGS_RSP] = msr_data; |
1540 | 1698 | ||
1541 | return 0; | 1699 | return X86EMUL_CONTINUE; |
1542 | } | 1700 | } |
1543 | 1701 | ||
1544 | static int | 1702 | static int |
@@ -1549,21 +1707,11 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1549 | u64 msr_data; | 1707 | u64 msr_data; |
1550 | int usermode; | 1708 | int usermode; |
1551 | 1709 | ||
1552 | /* inject #UD if LOCK prefix is used */ | 1710 | /* inject #GP if in real mode or Virtual 8086 mode */ |
1553 | if (c->lock_prefix) | 1711 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1554 | return -1; | 1712 | ctxt->mode == X86EMUL_MODE_VM86) { |
1555 | |||
1556 | /* inject #GP if in real mode or paging is disabled */ | ||
1557 | if (ctxt->mode == X86EMUL_MODE_REAL | ||
1558 | || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) { | ||
1559 | kvm_inject_gp(ctxt->vcpu, 0); | ||
1560 | return -1; | ||
1561 | } | ||
1562 | |||
1563 | /* sysexit must be called from CPL 0 */ | ||
1564 | if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) { | ||
1565 | kvm_inject_gp(ctxt->vcpu, 0); | 1713 | kvm_inject_gp(ctxt->vcpu, 0); |
1566 | return -1; | 1714 | return X86EMUL_UNHANDLEABLE; |
1567 | } | 1715 | } |
1568 | 1716 | ||
1569 | setup_syscalls_segments(ctxt, &cs, &ss); | 1717 | setup_syscalls_segments(ctxt, &cs, &ss); |
@@ -1581,7 +1729,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1581 | cs.selector = (u16)(msr_data + 16); | 1729 | cs.selector = (u16)(msr_data + 16); |
1582 | if ((msr_data & 0xfffc) == 0x0) { | 1730 | if ((msr_data & 0xfffc) == 0x0) { |
1583 | kvm_inject_gp(ctxt->vcpu, 0); | 1731 | kvm_inject_gp(ctxt->vcpu, 0); |
1584 | return -1; | 1732 | return X86EMUL_PROPAGATE_FAULT; |
1585 | } | 1733 | } |
1586 | ss.selector = (u16)(msr_data + 24); | 1734 | ss.selector = (u16)(msr_data + 24); |
1587 | break; | 1735 | break; |
@@ -1589,7 +1737,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1589 | cs.selector = (u16)(msr_data + 32); | 1737 | cs.selector = (u16)(msr_data + 32); |
1590 | if (msr_data == 0x0) { | 1738 | if (msr_data == 0x0) { |
1591 | kvm_inject_gp(ctxt->vcpu, 0); | 1739 | kvm_inject_gp(ctxt->vcpu, 0); |
1592 | return -1; | 1740 | return X86EMUL_PROPAGATE_FAULT; |
1593 | } | 1741 | } |
1594 | ss.selector = cs.selector + 8; | 1742 | ss.selector = cs.selector + 8; |
1595 | cs.db = 0; | 1743 | cs.db = 0; |
@@ -1605,7 +1753,58 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1605 | c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX]; | 1753 | c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX]; |
1606 | c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX]; | 1754 | c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX]; |
1607 | 1755 | ||
1608 | return 0; | 1756 | return X86EMUL_CONTINUE; |
1757 | } | ||
1758 | |||
1759 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | ||
1760 | { | ||
1761 | int iopl; | ||
1762 | if (ctxt->mode == X86EMUL_MODE_REAL) | ||
1763 | return false; | ||
1764 | if (ctxt->mode == X86EMUL_MODE_VM86) | ||
1765 | return true; | ||
1766 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | ||
1767 | return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl; | ||
1768 | } | ||
1769 | |||
1770 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | ||
1771 | struct x86_emulate_ops *ops, | ||
1772 | u16 port, u16 len) | ||
1773 | { | ||
1774 | struct kvm_segment tr_seg; | ||
1775 | int r; | ||
1776 | u16 io_bitmap_ptr; | ||
1777 | u8 perm, bit_idx = port & 0x7; | ||
1778 | unsigned mask = (1 << len) - 1; | ||
1779 | |||
1780 | kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR); | ||
1781 | if (tr_seg.unusable) | ||
1782 | return false; | ||
1783 | if (tr_seg.limit < 103) | ||
1784 | return false; | ||
1785 | r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, | ||
1786 | NULL); | ||
1787 | if (r != X86EMUL_CONTINUE) | ||
1788 | return false; | ||
1789 | if (io_bitmap_ptr + port/8 > tr_seg.limit) | ||
1790 | return false; | ||
1791 | r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1, | ||
1792 | ctxt->vcpu, NULL); | ||
1793 | if (r != X86EMUL_CONTINUE) | ||
1794 | return false; | ||
1795 | if ((perm >> bit_idx) & mask) | ||
1796 | return false; | ||
1797 | return true; | ||
1798 | } | ||
1799 | |||
1800 | static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, | ||
1801 | struct x86_emulate_ops *ops, | ||
1802 | u16 port, u16 len) | ||
1803 | { | ||
1804 | if (emulator_bad_iopl(ctxt)) | ||
1805 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) | ||
1806 | return false; | ||
1807 | return true; | ||
1609 | } | 1808 | } |
1610 | 1809 | ||
1611 | int | 1810 | int |
@@ -1629,6 +1828,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1629 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 1828 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
1630 | saved_eip = c->eip; | 1829 | saved_eip = c->eip; |
1631 | 1830 | ||
1831 | /* LOCK prefix is allowed only with some instructions */ | ||
1832 | if (c->lock_prefix && !(c->d & Lock)) { | ||
1833 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
1834 | goto done; | ||
1835 | } | ||
1836 | |||
1837 | /* Privileged instruction can be executed only in CPL=0 */ | ||
1838 | if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) { | ||
1839 | kvm_inject_gp(ctxt->vcpu, 0); | ||
1840 | goto done; | ||
1841 | } | ||
1842 | |||
1632 | if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) | 1843 | if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) |
1633 | memop = c->modrm_ea; | 1844 | memop = c->modrm_ea; |
1634 | 1845 | ||
@@ -1669,7 +1880,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1669 | &c->src.val, | 1880 | &c->src.val, |
1670 | c->src.bytes, | 1881 | c->src.bytes, |
1671 | ctxt->vcpu); | 1882 | ctxt->vcpu); |
1672 | if (rc != 0) | 1883 | if (rc != X86EMUL_CONTINUE) |
1673 | goto done; | 1884 | goto done; |
1674 | c->src.orig_val = c->src.val; | 1885 | c->src.orig_val = c->src.val; |
1675 | } | 1886 | } |
@@ -1688,12 +1899,15 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1688 | c->dst.ptr = (void *)c->dst.ptr + | 1899 | c->dst.ptr = (void *)c->dst.ptr + |
1689 | (c->src.val & mask) / 8; | 1900 | (c->src.val & mask) / 8; |
1690 | } | 1901 | } |
1691 | if (!(c->d & Mov) && | 1902 | if (!(c->d & Mov)) { |
1692 | /* optimisation - avoid slow emulated read */ | 1903 | /* optimisation - avoid slow emulated read */ |
1693 | ((rc = ops->read_emulated((unsigned long)c->dst.ptr, | 1904 | rc = ops->read_emulated((unsigned long)c->dst.ptr, |
1694 | &c->dst.val, | 1905 | &c->dst.val, |
1695 | c->dst.bytes, ctxt->vcpu)) != 0)) | 1906 | c->dst.bytes, |
1696 | goto done; | 1907 | ctxt->vcpu); |
1908 | if (rc != X86EMUL_CONTINUE) | ||
1909 | goto done; | ||
1910 | } | ||
1697 | } | 1911 | } |
1698 | c->dst.orig_val = c->dst.val; | 1912 | c->dst.orig_val = c->dst.val; |
1699 | 1913 | ||
@@ -1707,18 +1921,45 @@ special_insn: | |||
1707 | add: /* add */ | 1921 | add: /* add */ |
1708 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); | 1922 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); |
1709 | break; | 1923 | break; |
1924 | case 0x06: /* push es */ | ||
1925 | emulate_push_sreg(ctxt, VCPU_SREG_ES); | ||
1926 | break; | ||
1927 | case 0x07: /* pop es */ | ||
1928 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | ||
1929 | if (rc != 0) | ||
1930 | goto done; | ||
1931 | break; | ||
1710 | case 0x08 ... 0x0d: | 1932 | case 0x08 ... 0x0d: |
1711 | or: /* or */ | 1933 | or: /* or */ |
1712 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); | 1934 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); |
1713 | break; | 1935 | break; |
1936 | case 0x0e: /* push cs */ | ||
1937 | emulate_push_sreg(ctxt, VCPU_SREG_CS); | ||
1938 | break; | ||
1714 | case 0x10 ... 0x15: | 1939 | case 0x10 ... 0x15: |
1715 | adc: /* adc */ | 1940 | adc: /* adc */ |
1716 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); | 1941 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); |
1717 | break; | 1942 | break; |
1943 | case 0x16: /* push ss */ | ||
1944 | emulate_push_sreg(ctxt, VCPU_SREG_SS); | ||
1945 | break; | ||
1946 | case 0x17: /* pop ss */ | ||
1947 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | ||
1948 | if (rc != 0) | ||
1949 | goto done; | ||
1950 | break; | ||
1718 | case 0x18 ... 0x1d: | 1951 | case 0x18 ... 0x1d: |
1719 | sbb: /* sbb */ | 1952 | sbb: /* sbb */ |
1720 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); | 1953 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); |
1721 | break; | 1954 | break; |
1955 | case 0x1e: /* push ds */ | ||
1956 | emulate_push_sreg(ctxt, VCPU_SREG_DS); | ||
1957 | break; | ||
1958 | case 0x1f: /* pop ds */ | ||
1959 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | ||
1960 | if (rc != 0) | ||
1961 | goto done; | ||
1962 | break; | ||
1722 | case 0x20 ... 0x25: | 1963 | case 0x20 ... 0x25: |
1723 | and: /* and */ | 1964 | and: /* and */ |
1724 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); | 1965 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); |
@@ -1750,6 +1991,14 @@ special_insn: | |||
1750 | if (rc != 0) | 1991 | if (rc != 0) |
1751 | goto done; | 1992 | goto done; |
1752 | break; | 1993 | break; |
1994 | case 0x60: /* pusha */ | ||
1995 | emulate_pusha(ctxt); | ||
1996 | break; | ||
1997 | case 0x61: /* popa */ | ||
1998 | rc = emulate_popa(ctxt, ops); | ||
1999 | if (rc != 0) | ||
2000 | goto done; | ||
2001 | break; | ||
1753 | case 0x63: /* movsxd */ | 2002 | case 0x63: /* movsxd */ |
1754 | if (ctxt->mode != X86EMUL_MODE_PROT64) | 2003 | if (ctxt->mode != X86EMUL_MODE_PROT64) |
1755 | goto cannot_emulate; | 2004 | goto cannot_emulate; |
@@ -1761,7 +2010,12 @@ special_insn: | |||
1761 | break; | 2010 | break; |
1762 | case 0x6c: /* insb */ | 2011 | case 0x6c: /* insb */ |
1763 | case 0x6d: /* insw/insd */ | 2012 | case 0x6d: /* insw/insd */ |
1764 | if (kvm_emulate_pio_string(ctxt->vcpu, NULL, | 2013 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2014 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | ||
2015 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2016 | goto done; | ||
2017 | } | ||
2018 | if (kvm_emulate_pio_string(ctxt->vcpu, | ||
1765 | 1, | 2019 | 1, |
1766 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2020 | (c->d & ByteOp) ? 1 : c->op_bytes, |
1767 | c->rep_prefix ? | 2021 | c->rep_prefix ? |
@@ -1777,7 +2031,12 @@ special_insn: | |||
1777 | return 0; | 2031 | return 0; |
1778 | case 0x6e: /* outsb */ | 2032 | case 0x6e: /* outsb */ |
1779 | case 0x6f: /* outsw/outsd */ | 2033 | case 0x6f: /* outsw/outsd */ |
1780 | if (kvm_emulate_pio_string(ctxt->vcpu, NULL, | 2034 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2035 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | ||
2036 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2037 | goto done; | ||
2038 | } | ||
2039 | if (kvm_emulate_pio_string(ctxt->vcpu, | ||
1781 | 0, | 2040 | 0, |
1782 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2041 | (c->d & ByteOp) ? 1 : c->op_bytes, |
1783 | c->rep_prefix ? | 2042 | c->rep_prefix ? |
@@ -1863,25 +2122,19 @@ special_insn: | |||
1863 | break; | 2122 | break; |
1864 | case 0x8e: { /* mov seg, r/m16 */ | 2123 | case 0x8e: { /* mov seg, r/m16 */ |
1865 | uint16_t sel; | 2124 | uint16_t sel; |
1866 | int type_bits; | ||
1867 | int err; | ||
1868 | 2125 | ||
1869 | sel = c->src.val; | 2126 | sel = c->src.val; |
1870 | if (c->modrm_reg == VCPU_SREG_SS) | ||
1871 | toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); | ||
1872 | 2127 | ||
1873 | if (c->modrm_reg <= 5) { | 2128 | if (c->modrm_reg == VCPU_SREG_CS || |
1874 | type_bits = (c->modrm_reg == 1) ? 9 : 1; | 2129 | c->modrm_reg > VCPU_SREG_GS) { |
1875 | err = kvm_load_segment_descriptor(ctxt->vcpu, sel, | 2130 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
1876 | type_bits, c->modrm_reg); | 2131 | goto done; |
1877 | } else { | ||
1878 | printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n", | ||
1879 | c->modrm); | ||
1880 | goto cannot_emulate; | ||
1881 | } | 2132 | } |
1882 | 2133 | ||
1883 | if (err < 0) | 2134 | if (c->modrm_reg == VCPU_SREG_SS) |
1884 | goto cannot_emulate; | 2135 | toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); |
2136 | |||
2137 | rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg); | ||
1885 | 2138 | ||
1886 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2139 | c->dst.type = OP_NONE; /* Disable writeback. */ |
1887 | break; | 2140 | break; |
@@ -1910,7 +2163,10 @@ special_insn: | |||
1910 | c->dst.type = OP_REG; | 2163 | c->dst.type = OP_REG; |
1911 | c->dst.ptr = (unsigned long *) &ctxt->eflags; | 2164 | c->dst.ptr = (unsigned long *) &ctxt->eflags; |
1912 | c->dst.bytes = c->op_bytes; | 2165 | c->dst.bytes = c->op_bytes; |
1913 | goto pop_instruction; | 2166 | rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); |
2167 | if (rc != X86EMUL_CONTINUE) | ||
2168 | goto done; | ||
2169 | break; | ||
1914 | case 0xa0 ... 0xa1: /* mov */ | 2170 | case 0xa0 ... 0xa1: /* mov */ |
1915 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | 2171 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; |
1916 | c->dst.val = c->src.val; | 2172 | c->dst.val = c->src.val; |
@@ -1924,11 +2180,12 @@ special_insn: | |||
1924 | c->dst.ptr = (unsigned long *)register_address(c, | 2180 | c->dst.ptr = (unsigned long *)register_address(c, |
1925 | es_base(ctxt), | 2181 | es_base(ctxt), |
1926 | c->regs[VCPU_REGS_RDI]); | 2182 | c->regs[VCPU_REGS_RDI]); |
1927 | if ((rc = ops->read_emulated(register_address(c, | 2183 | rc = ops->read_emulated(register_address(c, |
1928 | seg_override_base(ctxt, c), | 2184 | seg_override_base(ctxt, c), |
1929 | c->regs[VCPU_REGS_RSI]), | 2185 | c->regs[VCPU_REGS_RSI]), |
1930 | &c->dst.val, | 2186 | &c->dst.val, |
1931 | c->dst.bytes, ctxt->vcpu)) != 0) | 2187 | c->dst.bytes, ctxt->vcpu); |
2188 | if (rc != X86EMUL_CONTINUE) | ||
1932 | goto done; | 2189 | goto done; |
1933 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | 2190 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], |
1934 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | 2191 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes |
@@ -1943,10 +2200,11 @@ special_insn: | |||
1943 | c->src.ptr = (unsigned long *)register_address(c, | 2200 | c->src.ptr = (unsigned long *)register_address(c, |
1944 | seg_override_base(ctxt, c), | 2201 | seg_override_base(ctxt, c), |
1945 | c->regs[VCPU_REGS_RSI]); | 2202 | c->regs[VCPU_REGS_RSI]); |
1946 | if ((rc = ops->read_emulated((unsigned long)c->src.ptr, | 2203 | rc = ops->read_emulated((unsigned long)c->src.ptr, |
1947 | &c->src.val, | 2204 | &c->src.val, |
1948 | c->src.bytes, | 2205 | c->src.bytes, |
1949 | ctxt->vcpu)) != 0) | 2206 | ctxt->vcpu); |
2207 | if (rc != X86EMUL_CONTINUE) | ||
1950 | goto done; | 2208 | goto done; |
1951 | 2209 | ||
1952 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2210 | c->dst.type = OP_NONE; /* Disable writeback. */ |
@@ -1954,10 +2212,11 @@ special_insn: | |||
1954 | c->dst.ptr = (unsigned long *)register_address(c, | 2212 | c->dst.ptr = (unsigned long *)register_address(c, |
1955 | es_base(ctxt), | 2213 | es_base(ctxt), |
1956 | c->regs[VCPU_REGS_RDI]); | 2214 | c->regs[VCPU_REGS_RDI]); |
1957 | if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, | 2215 | rc = ops->read_emulated((unsigned long)c->dst.ptr, |
1958 | &c->dst.val, | 2216 | &c->dst.val, |
1959 | c->dst.bytes, | 2217 | c->dst.bytes, |
1960 | ctxt->vcpu)) != 0) | 2218 | ctxt->vcpu); |
2219 | if (rc != X86EMUL_CONTINUE) | ||
1961 | goto done; | 2220 | goto done; |
1962 | 2221 | ||
1963 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); | 2222 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); |
@@ -1987,12 +2246,13 @@ special_insn: | |||
1987 | c->dst.type = OP_REG; | 2246 | c->dst.type = OP_REG; |
1988 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 2247 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1989 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | 2248 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; |
1990 | if ((rc = ops->read_emulated(register_address(c, | 2249 | rc = ops->read_emulated(register_address(c, |
1991 | seg_override_base(ctxt, c), | 2250 | seg_override_base(ctxt, c), |
1992 | c->regs[VCPU_REGS_RSI]), | 2251 | c->regs[VCPU_REGS_RSI]), |
1993 | &c->dst.val, | 2252 | &c->dst.val, |
1994 | c->dst.bytes, | 2253 | c->dst.bytes, |
1995 | ctxt->vcpu)) != 0) | 2254 | ctxt->vcpu); |
2255 | if (rc != X86EMUL_CONTINUE) | ||
1996 | goto done; | 2256 | goto done; |
1997 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | 2257 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], |
1998 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | 2258 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes |
@@ -2048,11 +2308,9 @@ special_insn: | |||
2048 | case 0xe9: /* jmp rel */ | 2308 | case 0xe9: /* jmp rel */ |
2049 | goto jmp; | 2309 | goto jmp; |
2050 | case 0xea: /* jmp far */ | 2310 | case 0xea: /* jmp far */ |
2051 | if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9, | 2311 | if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, |
2052 | VCPU_SREG_CS) < 0) { | 2312 | VCPU_SREG_CS)) |
2053 | DPRINTF("jmp far: Failed to load CS descriptor\n"); | 2313 | goto done; |
2054 | goto cannot_emulate; | ||
2055 | } | ||
2056 | 2314 | ||
2057 | c->eip = c->src.val; | 2315 | c->eip = c->src.val; |
2058 | break; | 2316 | break; |
@@ -2070,7 +2328,13 @@ special_insn: | |||
2070 | case 0xef: /* out (e/r)ax,dx */ | 2328 | case 0xef: /* out (e/r)ax,dx */ |
2071 | port = c->regs[VCPU_REGS_RDX]; | 2329 | port = c->regs[VCPU_REGS_RDX]; |
2072 | io_dir_in = 0; | 2330 | io_dir_in = 0; |
2073 | do_io: if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in, | 2331 | do_io: |
2332 | if (!emulator_io_permited(ctxt, ops, port, | ||
2333 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | ||
2334 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2335 | goto done; | ||
2336 | } | ||
2337 | if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, | ||
2074 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2338 | (c->d & ByteOp) ? 1 : c->op_bytes, |
2075 | port) != 0) { | 2339 | port) != 0) { |
2076 | c->eip = saved_eip; | 2340 | c->eip = saved_eip; |
@@ -2095,13 +2359,21 @@ special_insn: | |||
2095 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2359 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2096 | break; | 2360 | break; |
2097 | case 0xfa: /* cli */ | 2361 | case 0xfa: /* cli */ |
2098 | ctxt->eflags &= ~X86_EFLAGS_IF; | 2362 | if (emulator_bad_iopl(ctxt)) |
2099 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2363 | kvm_inject_gp(ctxt->vcpu, 0); |
2364 | else { | ||
2365 | ctxt->eflags &= ~X86_EFLAGS_IF; | ||
2366 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
2367 | } | ||
2100 | break; | 2368 | break; |
2101 | case 0xfb: /* sti */ | 2369 | case 0xfb: /* sti */ |
2102 | toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); | 2370 | if (emulator_bad_iopl(ctxt)) |
2103 | ctxt->eflags |= X86_EFLAGS_IF; | 2371 | kvm_inject_gp(ctxt->vcpu, 0); |
2104 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2372 | else { |
2373 | toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); | ||
2374 | ctxt->eflags |= X86_EFLAGS_IF; | ||
2375 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
2376 | } | ||
2105 | break; | 2377 | break; |
2106 | case 0xfc: /* cld */ | 2378 | case 0xfc: /* cld */ |
2107 | ctxt->eflags &= ~EFLG_DF; | 2379 | ctxt->eflags &= ~EFLG_DF; |
@@ -2204,8 +2476,9 @@ twobyte_insn: | |||
2204 | } | 2476 | } |
2205 | break; | 2477 | break; |
2206 | case 0x05: /* syscall */ | 2478 | case 0x05: /* syscall */ |
2207 | if (emulate_syscall(ctxt) == -1) | 2479 | rc = emulate_syscall(ctxt); |
2208 | goto cannot_emulate; | 2480 | if (rc != X86EMUL_CONTINUE) |
2481 | goto done; | ||
2209 | else | 2482 | else |
2210 | goto writeback; | 2483 | goto writeback; |
2211 | break; | 2484 | break; |
@@ -2276,14 +2549,16 @@ twobyte_insn: | |||
2276 | c->dst.type = OP_NONE; | 2549 | c->dst.type = OP_NONE; |
2277 | break; | 2550 | break; |
2278 | case 0x34: /* sysenter */ | 2551 | case 0x34: /* sysenter */ |
2279 | if (emulate_sysenter(ctxt) == -1) | 2552 | rc = emulate_sysenter(ctxt); |
2280 | goto cannot_emulate; | 2553 | if (rc != X86EMUL_CONTINUE) |
2554 | goto done; | ||
2281 | else | 2555 | else |
2282 | goto writeback; | 2556 | goto writeback; |
2283 | break; | 2557 | break; |
2284 | case 0x35: /* sysexit */ | 2558 | case 0x35: /* sysexit */ |
2285 | if (emulate_sysexit(ctxt) == -1) | 2559 | rc = emulate_sysexit(ctxt); |
2286 | goto cannot_emulate; | 2560 | if (rc != X86EMUL_CONTINUE) |
2561 | goto done; | ||
2287 | else | 2562 | else |
2288 | goto writeback; | 2563 | goto writeback; |
2289 | break; | 2564 | break; |
@@ -2297,6 +2572,14 @@ twobyte_insn: | |||
2297 | jmp_rel(c, c->src.val); | 2572 | jmp_rel(c, c->src.val); |
2298 | c->dst.type = OP_NONE; | 2573 | c->dst.type = OP_NONE; |
2299 | break; | 2574 | break; |
2575 | case 0xa0: /* push fs */ | ||
2576 | emulate_push_sreg(ctxt, VCPU_SREG_FS); | ||
2577 | break; | ||
2578 | case 0xa1: /* pop fs */ | ||
2579 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | ||
2580 | if (rc != 0) | ||
2581 | goto done; | ||
2582 | break; | ||
2300 | case 0xa3: | 2583 | case 0xa3: |
2301 | bt: /* bt */ | 2584 | bt: /* bt */ |
2302 | c->dst.type = OP_NONE; | 2585 | c->dst.type = OP_NONE; |
@@ -2308,6 +2591,14 @@ twobyte_insn: | |||
2308 | case 0xa5: /* shld cl, r, r/m */ | 2591 | case 0xa5: /* shld cl, r, r/m */ |
2309 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); | 2592 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); |
2310 | break; | 2593 | break; |
2594 | case 0xa8: /* push gs */ | ||
2595 | emulate_push_sreg(ctxt, VCPU_SREG_GS); | ||
2596 | break; | ||
2597 | case 0xa9: /* pop gs */ | ||
2598 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | ||
2599 | if (rc != 0) | ||
2600 | goto done; | ||
2601 | break; | ||
2311 | case 0xab: | 2602 | case 0xab: |
2312 | bts: /* bts */ | 2603 | bts: /* bts */ |
2313 | /* only subword offset */ | 2604 | /* only subword offset */ |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 144e7f60b5e2..0150affad25d 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -29,7 +29,10 @@ | |||
29 | * Based on QEMU and Xen. | 29 | * Based on QEMU and Xen. |
30 | */ | 30 | */ |
31 | 31 | ||
32 | #define pr_fmt(fmt) "pit: " fmt | ||
33 | |||
32 | #include <linux/kvm_host.h> | 34 | #include <linux/kvm_host.h> |
35 | #include <linux/slab.h> | ||
33 | 36 | ||
34 | #include "irq.h" | 37 | #include "irq.h" |
35 | #include "i8254.h" | 38 | #include "i8254.h" |
@@ -240,11 +243,11 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
240 | { | 243 | { |
241 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, | 244 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, |
242 | irq_ack_notifier); | 245 | irq_ack_notifier); |
243 | spin_lock(&ps->inject_lock); | 246 | raw_spin_lock(&ps->inject_lock); |
244 | if (atomic_dec_return(&ps->pit_timer.pending) < 0) | 247 | if (atomic_dec_return(&ps->pit_timer.pending) < 0) |
245 | atomic_inc(&ps->pit_timer.pending); | 248 | atomic_inc(&ps->pit_timer.pending); |
246 | ps->irq_ack = 1; | 249 | ps->irq_ack = 1; |
247 | spin_unlock(&ps->inject_lock); | 250 | raw_spin_unlock(&ps->inject_lock); |
248 | } | 251 | } |
249 | 252 | ||
250 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | 253 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) |
@@ -262,7 +265,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | |||
262 | 265 | ||
263 | static void destroy_pit_timer(struct kvm_timer *pt) | 266 | static void destroy_pit_timer(struct kvm_timer *pt) |
264 | { | 267 | { |
265 | pr_debug("pit: execute del timer!\n"); | 268 | pr_debug("execute del timer!\n"); |
266 | hrtimer_cancel(&pt->timer); | 269 | hrtimer_cancel(&pt->timer); |
267 | } | 270 | } |
268 | 271 | ||
@@ -284,7 +287,7 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) | |||
284 | 287 | ||
285 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); | 288 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); |
286 | 289 | ||
287 | pr_debug("pit: create pit timer, interval is %llu nsec\n", interval); | 290 | pr_debug("create pit timer, interval is %llu nsec\n", interval); |
288 | 291 | ||
289 | /* TODO The new value only affected after the retriggered */ | 292 | /* TODO The new value only affected after the retriggered */ |
290 | hrtimer_cancel(&pt->timer); | 293 | hrtimer_cancel(&pt->timer); |
@@ -309,7 +312,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
309 | 312 | ||
310 | WARN_ON(!mutex_is_locked(&ps->lock)); | 313 | WARN_ON(!mutex_is_locked(&ps->lock)); |
311 | 314 | ||
312 | pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); | 315 | pr_debug("load_count val is %d, channel is %d\n", val, channel); |
313 | 316 | ||
314 | /* | 317 | /* |
315 | * The largest possible initial count is 0; this is equivalent | 318 | * The largest possible initial count is 0; this is equivalent |
@@ -395,8 +398,8 @@ static int pit_ioport_write(struct kvm_io_device *this, | |||
395 | mutex_lock(&pit_state->lock); | 398 | mutex_lock(&pit_state->lock); |
396 | 399 | ||
397 | if (val != 0) | 400 | if (val != 0) |
398 | pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n", | 401 | pr_debug("write addr is 0x%x, len is %d, val is 0x%x\n", |
399 | (unsigned int)addr, len, val); | 402 | (unsigned int)addr, len, val); |
400 | 403 | ||
401 | if (addr == 3) { | 404 | if (addr == 3) { |
402 | channel = val >> 6; | 405 | channel = val >> 6; |
@@ -465,6 +468,9 @@ static int pit_ioport_read(struct kvm_io_device *this, | |||
465 | return -EOPNOTSUPP; | 468 | return -EOPNOTSUPP; |
466 | 469 | ||
467 | addr &= KVM_PIT_CHANNEL_MASK; | 470 | addr &= KVM_PIT_CHANNEL_MASK; |
471 | if (addr == 3) | ||
472 | return 0; | ||
473 | |||
468 | s = &pit_state->channels[addr]; | 474 | s = &pit_state->channels[addr]; |
469 | 475 | ||
470 | mutex_lock(&pit_state->lock); | 476 | mutex_lock(&pit_state->lock); |
@@ -600,7 +606,7 @@ static const struct kvm_io_device_ops speaker_dev_ops = { | |||
600 | .write = speaker_ioport_write, | 606 | .write = speaker_ioport_write, |
601 | }; | 607 | }; |
602 | 608 | ||
603 | /* Caller must have writers lock on slots_lock */ | 609 | /* Caller must hold slots_lock */ |
604 | struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | 610 | struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) |
605 | { | 611 | { |
606 | struct kvm_pit *pit; | 612 | struct kvm_pit *pit; |
@@ -619,7 +625,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
619 | 625 | ||
620 | mutex_init(&pit->pit_state.lock); | 626 | mutex_init(&pit->pit_state.lock); |
621 | mutex_lock(&pit->pit_state.lock); | 627 | mutex_lock(&pit->pit_state.lock); |
622 | spin_lock_init(&pit->pit_state.inject_lock); | 628 | raw_spin_lock_init(&pit->pit_state.inject_lock); |
623 | 629 | ||
624 | kvm->arch.vpit = pit; | 630 | kvm->arch.vpit = pit; |
625 | pit->kvm = kvm; | 631 | pit->kvm = kvm; |
@@ -640,13 +646,13 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
640 | kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); | 646 | kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); |
641 | 647 | ||
642 | kvm_iodevice_init(&pit->dev, &pit_dev_ops); | 648 | kvm_iodevice_init(&pit->dev, &pit_dev_ops); |
643 | ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev); | 649 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev); |
644 | if (ret < 0) | 650 | if (ret < 0) |
645 | goto fail; | 651 | goto fail; |
646 | 652 | ||
647 | if (flags & KVM_PIT_SPEAKER_DUMMY) { | 653 | if (flags & KVM_PIT_SPEAKER_DUMMY) { |
648 | kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); | 654 | kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); |
649 | ret = __kvm_io_bus_register_dev(&kvm->pio_bus, | 655 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, |
650 | &pit->speaker_dev); | 656 | &pit->speaker_dev); |
651 | if (ret < 0) | 657 | if (ret < 0) |
652 | goto fail_unregister; | 658 | goto fail_unregister; |
@@ -655,11 +661,12 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
655 | return pit; | 661 | return pit; |
656 | 662 | ||
657 | fail_unregister: | 663 | fail_unregister: |
658 | __kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev); | 664 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev); |
659 | 665 | ||
660 | fail: | 666 | fail: |
661 | if (pit->irq_source_id >= 0) | 667 | kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier); |
662 | kvm_free_irq_source_id(kvm, pit->irq_source_id); | 668 | kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); |
669 | kvm_free_irq_source_id(kvm, pit->irq_source_id); | ||
663 | 670 | ||
664 | kfree(pit); | 671 | kfree(pit); |
665 | return NULL; | 672 | return NULL; |
@@ -688,10 +695,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm) | |||
688 | struct kvm_vcpu *vcpu; | 695 | struct kvm_vcpu *vcpu; |
689 | int i; | 696 | int i; |
690 | 697 | ||
691 | mutex_lock(&kvm->irq_lock); | ||
692 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); | 698 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); |
693 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); | 699 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); |
694 | mutex_unlock(&kvm->irq_lock); | ||
695 | 700 | ||
696 | /* | 701 | /* |
697 | * Provides NMI watchdog support via Virtual Wire mode. | 702 | * Provides NMI watchdog support via Virtual Wire mode. |
@@ -720,12 +725,12 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) | |||
720 | /* Try to inject pending interrupts when | 725 | /* Try to inject pending interrupts when |
721 | * last one has been acked. | 726 | * last one has been acked. |
722 | */ | 727 | */ |
723 | spin_lock(&ps->inject_lock); | 728 | raw_spin_lock(&ps->inject_lock); |
724 | if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { | 729 | if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { |
725 | ps->irq_ack = 0; | 730 | ps->irq_ack = 0; |
726 | inject = 1; | 731 | inject = 1; |
727 | } | 732 | } |
728 | spin_unlock(&ps->inject_lock); | 733 | raw_spin_unlock(&ps->inject_lock); |
729 | if (inject) | 734 | if (inject) |
730 | __inject_pit_timer_intr(kvm); | 735 | __inject_pit_timer_intr(kvm); |
731 | } | 736 | } |
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index d4c1c7ffdc09..900d6b0ba7c2 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
@@ -27,7 +27,7 @@ struct kvm_kpit_state { | |||
27 | u32 speaker_data_on; | 27 | u32 speaker_data_on; |
28 | struct mutex lock; | 28 | struct mutex lock; |
29 | struct kvm_pit *pit; | 29 | struct kvm_pit *pit; |
30 | spinlock_t inject_lock; | 30 | raw_spinlock_t inject_lock; |
31 | unsigned long irq_ack; | 31 | unsigned long irq_ack; |
32 | struct kvm_irq_ack_notifier irq_ack_notifier; | 32 | struct kvm_irq_ack_notifier irq_ack_notifier; |
33 | }; | 33 | }; |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 01f151682802..a790fa128a9f 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -26,6 +26,7 @@ | |||
26 | * Port from Qemu. | 26 | * Port from Qemu. |
27 | */ | 27 | */ |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/slab.h> | ||
29 | #include <linux/bitops.h> | 30 | #include <linux/bitops.h> |
30 | #include "irq.h" | 31 | #include "irq.h" |
31 | 32 | ||
@@ -38,16 +39,25 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | |||
38 | s->isr_ack |= (1 << irq); | 39 | s->isr_ack |= (1 << irq); |
39 | if (s != &s->pics_state->pics[0]) | 40 | if (s != &s->pics_state->pics[0]) |
40 | irq += 8; | 41 | irq += 8; |
42 | /* | ||
43 | * We are dropping lock while calling ack notifiers since ack | ||
44 | * notifier callbacks for assigned devices call into PIC recursively. | ||
45 | * Other interrupt may be delivered to PIC while lock is dropped but | ||
46 | * it should be safe since PIC state is already updated at this stage. | ||
47 | */ | ||
48 | raw_spin_unlock(&s->pics_state->lock); | ||
41 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); | 49 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); |
50 | raw_spin_lock(&s->pics_state->lock); | ||
42 | } | 51 | } |
43 | 52 | ||
44 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | 53 | void kvm_pic_clear_isr_ack(struct kvm *kvm) |
45 | { | 54 | { |
46 | struct kvm_pic *s = pic_irqchip(kvm); | 55 | struct kvm_pic *s = pic_irqchip(kvm); |
47 | spin_lock(&s->lock); | 56 | |
57 | raw_spin_lock(&s->lock); | ||
48 | s->pics[0].isr_ack = 0xff; | 58 | s->pics[0].isr_ack = 0xff; |
49 | s->pics[1].isr_ack = 0xff; | 59 | s->pics[1].isr_ack = 0xff; |
50 | spin_unlock(&s->lock); | 60 | raw_spin_unlock(&s->lock); |
51 | } | 61 | } |
52 | 62 | ||
53 | /* | 63 | /* |
@@ -148,9 +158,9 @@ static void pic_update_irq(struct kvm_pic *s) | |||
148 | 158 | ||
149 | void kvm_pic_update_irq(struct kvm_pic *s) | 159 | void kvm_pic_update_irq(struct kvm_pic *s) |
150 | { | 160 | { |
151 | spin_lock(&s->lock); | 161 | raw_spin_lock(&s->lock); |
152 | pic_update_irq(s); | 162 | pic_update_irq(s); |
153 | spin_unlock(&s->lock); | 163 | raw_spin_unlock(&s->lock); |
154 | } | 164 | } |
155 | 165 | ||
156 | int kvm_pic_set_irq(void *opaque, int irq, int level) | 166 | int kvm_pic_set_irq(void *opaque, int irq, int level) |
@@ -158,14 +168,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) | |||
158 | struct kvm_pic *s = opaque; | 168 | struct kvm_pic *s = opaque; |
159 | int ret = -1; | 169 | int ret = -1; |
160 | 170 | ||
161 | spin_lock(&s->lock); | 171 | raw_spin_lock(&s->lock); |
162 | if (irq >= 0 && irq < PIC_NUM_PINS) { | 172 | if (irq >= 0 && irq < PIC_NUM_PINS) { |
163 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | 173 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); |
164 | pic_update_irq(s); | 174 | pic_update_irq(s); |
165 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, | 175 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, |
166 | s->pics[irq >> 3].imr, ret == 0); | 176 | s->pics[irq >> 3].imr, ret == 0); |
167 | } | 177 | } |
168 | spin_unlock(&s->lock); | 178 | raw_spin_unlock(&s->lock); |
169 | 179 | ||
170 | return ret; | 180 | return ret; |
171 | } | 181 | } |
@@ -176,16 +186,18 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) | |||
176 | static inline void pic_intack(struct kvm_kpic_state *s, int irq) | 186 | static inline void pic_intack(struct kvm_kpic_state *s, int irq) |
177 | { | 187 | { |
178 | s->isr |= 1 << irq; | 188 | s->isr |= 1 << irq; |
179 | if (s->auto_eoi) { | ||
180 | if (s->rotate_on_auto_eoi) | ||
181 | s->priority_add = (irq + 1) & 7; | ||
182 | pic_clear_isr(s, irq); | ||
183 | } | ||
184 | /* | 189 | /* |
185 | * We don't clear a level sensitive interrupt here | 190 | * We don't clear a level sensitive interrupt here |
186 | */ | 191 | */ |
187 | if (!(s->elcr & (1 << irq))) | 192 | if (!(s->elcr & (1 << irq))) |
188 | s->irr &= ~(1 << irq); | 193 | s->irr &= ~(1 << irq); |
194 | |||
195 | if (s->auto_eoi) { | ||
196 | if (s->rotate_on_auto_eoi) | ||
197 | s->priority_add = (irq + 1) & 7; | ||
198 | pic_clear_isr(s, irq); | ||
199 | } | ||
200 | |||
189 | } | 201 | } |
190 | 202 | ||
191 | int kvm_pic_read_irq(struct kvm *kvm) | 203 | int kvm_pic_read_irq(struct kvm *kvm) |
@@ -193,7 +205,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
193 | int irq, irq2, intno; | 205 | int irq, irq2, intno; |
194 | struct kvm_pic *s = pic_irqchip(kvm); | 206 | struct kvm_pic *s = pic_irqchip(kvm); |
195 | 207 | ||
196 | spin_lock(&s->lock); | 208 | raw_spin_lock(&s->lock); |
197 | irq = pic_get_irq(&s->pics[0]); | 209 | irq = pic_get_irq(&s->pics[0]); |
198 | if (irq >= 0) { | 210 | if (irq >= 0) { |
199 | pic_intack(&s->pics[0], irq); | 211 | pic_intack(&s->pics[0], irq); |
@@ -218,29 +230,18 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
218 | intno = s->pics[0].irq_base + irq; | 230 | intno = s->pics[0].irq_base + irq; |
219 | } | 231 | } |
220 | pic_update_irq(s); | 232 | pic_update_irq(s); |
221 | spin_unlock(&s->lock); | 233 | raw_spin_unlock(&s->lock); |
222 | 234 | ||
223 | return intno; | 235 | return intno; |
224 | } | 236 | } |
225 | 237 | ||
226 | void kvm_pic_reset(struct kvm_kpic_state *s) | 238 | void kvm_pic_reset(struct kvm_kpic_state *s) |
227 | { | 239 | { |
228 | int irq, irqbase, n; | 240 | int irq; |
229 | struct kvm *kvm = s->pics_state->irq_request_opaque; | 241 | struct kvm *kvm = s->pics_state->irq_request_opaque; |
230 | struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu; | 242 | struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu; |
243 | u8 irr = s->irr, isr = s->imr; | ||
231 | 244 | ||
232 | if (s == &s->pics_state->pics[0]) | ||
233 | irqbase = 0; | ||
234 | else | ||
235 | irqbase = 8; | ||
236 | |||
237 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { | ||
238 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) | ||
239 | if (s->irr & (1 << irq) || s->isr & (1 << irq)) { | ||
240 | n = irq + irqbase; | ||
241 | kvm_notify_acked_irq(kvm, SELECT_PIC(n), n); | ||
242 | } | ||
243 | } | ||
244 | s->last_irr = 0; | 245 | s->last_irr = 0; |
245 | s->irr = 0; | 246 | s->irr = 0; |
246 | s->imr = 0; | 247 | s->imr = 0; |
@@ -256,6 +257,13 @@ void kvm_pic_reset(struct kvm_kpic_state *s) | |||
256 | s->rotate_on_auto_eoi = 0; | 257 | s->rotate_on_auto_eoi = 0; |
257 | s->special_fully_nested_mode = 0; | 258 | s->special_fully_nested_mode = 0; |
258 | s->init4 = 0; | 259 | s->init4 = 0; |
260 | |||
261 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { | ||
262 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) | ||
263 | if (irr & (1 << irq) || isr & (1 << irq)) { | ||
264 | pic_clear_isr(s, irq); | ||
265 | } | ||
266 | } | ||
259 | } | 267 | } |
260 | 268 | ||
261 | static void pic_ioport_write(void *opaque, u32 addr, u32 val) | 269 | static void pic_ioport_write(void *opaque, u32 addr, u32 val) |
@@ -298,9 +306,9 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
298 | priority = get_priority(s, s->isr); | 306 | priority = get_priority(s, s->isr); |
299 | if (priority != 8) { | 307 | if (priority != 8) { |
300 | irq = (priority + s->priority_add) & 7; | 308 | irq = (priority + s->priority_add) & 7; |
301 | pic_clear_isr(s, irq); | ||
302 | if (cmd == 5) | 309 | if (cmd == 5) |
303 | s->priority_add = (irq + 1) & 7; | 310 | s->priority_add = (irq + 1) & 7; |
311 | pic_clear_isr(s, irq); | ||
304 | pic_update_irq(s->pics_state); | 312 | pic_update_irq(s->pics_state); |
305 | } | 313 | } |
306 | break; | 314 | break; |
@@ -436,7 +444,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
436 | printk(KERN_ERR "PIC: non byte write\n"); | 444 | printk(KERN_ERR "PIC: non byte write\n"); |
437 | return 0; | 445 | return 0; |
438 | } | 446 | } |
439 | spin_lock(&s->lock); | 447 | raw_spin_lock(&s->lock); |
440 | switch (addr) { | 448 | switch (addr) { |
441 | case 0x20: | 449 | case 0x20: |
442 | case 0x21: | 450 | case 0x21: |
@@ -449,7 +457,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
449 | elcr_ioport_write(&s->pics[addr & 1], addr, data); | 457 | elcr_ioport_write(&s->pics[addr & 1], addr, data); |
450 | break; | 458 | break; |
451 | } | 459 | } |
452 | spin_unlock(&s->lock); | 460 | raw_spin_unlock(&s->lock); |
453 | return 0; | 461 | return 0; |
454 | } | 462 | } |
455 | 463 | ||
@@ -466,7 +474,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
466 | printk(KERN_ERR "PIC: non byte read\n"); | 474 | printk(KERN_ERR "PIC: non byte read\n"); |
467 | return 0; | 475 | return 0; |
468 | } | 476 | } |
469 | spin_lock(&s->lock); | 477 | raw_spin_lock(&s->lock); |
470 | switch (addr) { | 478 | switch (addr) { |
471 | case 0x20: | 479 | case 0x20: |
472 | case 0x21: | 480 | case 0x21: |
@@ -480,7 +488,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
480 | break; | 488 | break; |
481 | } | 489 | } |
482 | *(unsigned char *)val = data; | 490 | *(unsigned char *)val = data; |
483 | spin_unlock(&s->lock); | 491 | raw_spin_unlock(&s->lock); |
484 | return 0; | 492 | return 0; |
485 | } | 493 | } |
486 | 494 | ||
@@ -514,7 +522,7 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
514 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); | 522 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); |
515 | if (!s) | 523 | if (!s) |
516 | return NULL; | 524 | return NULL; |
517 | spin_lock_init(&s->lock); | 525 | raw_spin_lock_init(&s->lock); |
518 | s->kvm = kvm; | 526 | s->kvm = kvm; |
519 | s->pics[0].elcr_mask = 0xf8; | 527 | s->pics[0].elcr_mask = 0xf8; |
520 | s->pics[1].elcr_mask = 0xde; | 528 | s->pics[1].elcr_mask = 0xde; |
@@ -527,7 +535,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
527 | * Initialize PIO device | 535 | * Initialize PIO device |
528 | */ | 536 | */ |
529 | kvm_iodevice_init(&s->dev, &picdev_ops); | 537 | kvm_iodevice_init(&s->dev, &picdev_ops); |
530 | ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev); | 538 | mutex_lock(&kvm->slots_lock); |
539 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev); | ||
540 | mutex_unlock(&kvm->slots_lock); | ||
531 | if (ret < 0) { | 541 | if (ret < 0) { |
532 | kfree(s); | 542 | kfree(s); |
533 | return NULL; | 543 | return NULL; |
@@ -535,3 +545,14 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
535 | 545 | ||
536 | return s; | 546 | return s; |
537 | } | 547 | } |
548 | |||
549 | void kvm_destroy_pic(struct kvm *kvm) | ||
550 | { | ||
551 | struct kvm_pic *vpic = kvm->arch.vpic; | ||
552 | |||
553 | if (vpic) { | ||
554 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev); | ||
555 | kvm->arch.vpic = NULL; | ||
556 | kfree(vpic); | ||
557 | } | ||
558 | } | ||
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 7d6058a2fd38..34b15915754d 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -62,7 +62,7 @@ struct kvm_kpic_state { | |||
62 | }; | 62 | }; |
63 | 63 | ||
64 | struct kvm_pic { | 64 | struct kvm_pic { |
65 | spinlock_t lock; | 65 | raw_spinlock_t lock; |
66 | unsigned pending_acks; | 66 | unsigned pending_acks; |
67 | struct kvm *kvm; | 67 | struct kvm *kvm; |
68 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 68 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
@@ -71,9 +71,11 @@ struct kvm_pic { | |||
71 | int output; /* intr from master PIC */ | 71 | int output; /* intr from master PIC */ |
72 | struct kvm_io_device dev; | 72 | struct kvm_io_device dev; |
73 | void (*ack_notifier)(void *opaque, int irq); | 73 | void (*ack_notifier)(void *opaque, int irq); |
74 | unsigned long irq_states[16]; | ||
74 | }; | 75 | }; |
75 | 76 | ||
76 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); | 77 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); |
78 | void kvm_destroy_pic(struct kvm *kvm); | ||
77 | int kvm_pic_read_irq(struct kvm *kvm); | 79 | int kvm_pic_read_irq(struct kvm *kvm); |
78 | void kvm_pic_update_irq(struct kvm_pic *s); | 80 | void kvm_pic_update_irq(struct kvm_pic *s); |
79 | void kvm_pic_clear_isr_ack(struct kvm *kvm); | 81 | void kvm_pic_clear_isr_ack(struct kvm *kvm); |
@@ -85,7 +87,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) | |||
85 | 87 | ||
86 | static inline int irqchip_in_kernel(struct kvm *kvm) | 88 | static inline int irqchip_in_kernel(struct kvm *kvm) |
87 | { | 89 | { |
88 | return pic_irqchip(kvm) != NULL; | 90 | int ret; |
91 | |||
92 | ret = (pic_irqchip(kvm) != NULL); | ||
93 | smp_rmb(); | ||
94 | return ret; | ||
89 | } | 95 | } |
90 | 96 | ||
91 | void kvm_pic_reset(struct kvm_kpic_state *s); | 97 | void kvm_pic_reset(struct kvm_kpic_state *s); |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 7bcc5b6a4403..cff851cf5322 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
@@ -1,6 +1,11 @@ | |||
1 | #ifndef ASM_KVM_CACHE_REGS_H | 1 | #ifndef ASM_KVM_CACHE_REGS_H |
2 | #define ASM_KVM_CACHE_REGS_H | 2 | #define ASM_KVM_CACHE_REGS_H |
3 | 3 | ||
4 | #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS | ||
5 | #define KVM_POSSIBLE_CR4_GUEST_BITS \ | ||
6 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | ||
7 | | X86_CR4_OSXMMEXCPT | X86_CR4_PGE) | ||
8 | |||
4 | static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, | 9 | static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, |
5 | enum kvm_reg reg) | 10 | enum kvm_reg reg) |
6 | { | 11 | { |
@@ -38,4 +43,30 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) | |||
38 | return vcpu->arch.pdptrs[index]; | 43 | return vcpu->arch.pdptrs[index]; |
39 | } | 44 | } |
40 | 45 | ||
46 | static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) | ||
47 | { | ||
48 | ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS; | ||
49 | if (tmask & vcpu->arch.cr0_guest_owned_bits) | ||
50 | kvm_x86_ops->decache_cr0_guest_bits(vcpu); | ||
51 | return vcpu->arch.cr0 & mask; | ||
52 | } | ||
53 | |||
54 | static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu) | ||
55 | { | ||
56 | return kvm_read_cr0_bits(vcpu, ~0UL); | ||
57 | } | ||
58 | |||
59 | static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) | ||
60 | { | ||
61 | ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS; | ||
62 | if (tmask & vcpu->arch.cr4_guest_owned_bits) | ||
63 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | ||
64 | return vcpu->arch.cr4 & mask; | ||
65 | } | ||
66 | |||
67 | static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) | ||
68 | { | ||
69 | return kvm_read_cr4_bits(vcpu, ~0UL); | ||
70 | } | ||
71 | |||
41 | #endif | 72 | #endif |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 23c217692ea9..1eb7a4ae0c9c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -26,13 +26,13 @@ | |||
26 | #include <linux/io.h> | 26 | #include <linux/io.h> |
27 | #include <linux/module.h> | 27 | #include <linux/module.h> |
28 | #include <linux/math64.h> | 28 | #include <linux/math64.h> |
29 | #include <linux/slab.h> | ||
29 | #include <asm/processor.h> | 30 | #include <asm/processor.h> |
30 | #include <asm/msr.h> | 31 | #include <asm/msr.h> |
31 | #include <asm/page.h> | 32 | #include <asm/page.h> |
32 | #include <asm/current.h> | 33 | #include <asm/current.h> |
33 | #include <asm/apicdef.h> | 34 | #include <asm/apicdef.h> |
34 | #include <asm/atomic.h> | 35 | #include <asm/atomic.h> |
35 | #include <asm/apicdef.h> | ||
36 | #include "kvm_cache_regs.h" | 36 | #include "kvm_cache_regs.h" |
37 | #include "irq.h" | 37 | #include "irq.h" |
38 | #include "trace.h" | 38 | #include "trace.h" |
@@ -374,6 +374,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
374 | if (unlikely(!apic_enabled(apic))) | 374 | if (unlikely(!apic_enabled(apic))) |
375 | break; | 375 | break; |
376 | 376 | ||
377 | if (trig_mode) { | ||
378 | apic_debug("level trig mode for vector %d", vector); | ||
379 | apic_set_vector(vector, apic->regs + APIC_TMR); | ||
380 | } else | ||
381 | apic_clear_vector(vector, apic->regs + APIC_TMR); | ||
382 | |||
377 | result = !apic_test_and_set_irr(vector, apic); | 383 | result = !apic_test_and_set_irr(vector, apic); |
378 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, | 384 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, |
379 | trig_mode, vector, !result); | 385 | trig_mode, vector, !result); |
@@ -384,11 +390,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
384 | break; | 390 | break; |
385 | } | 391 | } |
386 | 392 | ||
387 | if (trig_mode) { | ||
388 | apic_debug("level trig mode for vector %d", vector); | ||
389 | apic_set_vector(vector, apic->regs + APIC_TMR); | ||
390 | } else | ||
391 | apic_clear_vector(vector, apic->regs + APIC_TMR); | ||
392 | kvm_vcpu_kick(vcpu); | 393 | kvm_vcpu_kick(vcpu); |
393 | break; | 394 | break; |
394 | 395 | ||
@@ -471,11 +472,8 @@ static void apic_set_eoi(struct kvm_lapic *apic) | |||
471 | trigger_mode = IOAPIC_LEVEL_TRIG; | 472 | trigger_mode = IOAPIC_LEVEL_TRIG; |
472 | else | 473 | else |
473 | trigger_mode = IOAPIC_EDGE_TRIG; | 474 | trigger_mode = IOAPIC_EDGE_TRIG; |
474 | if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) { | 475 | if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) |
475 | mutex_lock(&apic->vcpu->kvm->irq_lock); | ||
476 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | 476 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); |
477 | mutex_unlock(&apic->vcpu->kvm->irq_lock); | ||
478 | } | ||
479 | } | 477 | } |
480 | 478 | ||
481 | static void apic_send_ipi(struct kvm_lapic *apic) | 479 | static void apic_send_ipi(struct kvm_lapic *apic) |
@@ -504,9 +502,7 @@ static void apic_send_ipi(struct kvm_lapic *apic) | |||
504 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, | 502 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, |
505 | irq.vector); | 503 | irq.vector); |
506 | 504 | ||
507 | mutex_lock(&apic->vcpu->kvm->irq_lock); | ||
508 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); | 505 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); |
509 | mutex_unlock(&apic->vcpu->kvm->irq_lock); | ||
510 | } | 506 | } |
511 | 507 | ||
512 | static u32 apic_get_tmcct(struct kvm_lapic *apic) | 508 | static u32 apic_get_tmcct(struct kvm_lapic *apic) |
@@ -1156,6 +1152,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) | |||
1156 | hrtimer_cancel(&apic->lapic_timer.timer); | 1152 | hrtimer_cancel(&apic->lapic_timer.timer); |
1157 | update_divide_count(apic); | 1153 | update_divide_count(apic); |
1158 | start_apic_timer(apic); | 1154 | start_apic_timer(apic); |
1155 | apic->irr_pending = true; | ||
1159 | } | 1156 | } |
1160 | 1157 | ||
1161 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | 1158 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) |
@@ -1250,3 +1247,34 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) | |||
1250 | 1247 | ||
1251 | return 0; | 1248 | return 0; |
1252 | } | 1249 | } |
1250 | |||
1251 | int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) | ||
1252 | { | ||
1253 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1254 | |||
1255 | if (!irqchip_in_kernel(vcpu->kvm)) | ||
1256 | return 1; | ||
1257 | |||
1258 | /* if this is ICR write vector before command */ | ||
1259 | if (reg == APIC_ICR) | ||
1260 | apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); | ||
1261 | return apic_reg_write(apic, reg, (u32)data); | ||
1262 | } | ||
1263 | |||
1264 | int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) | ||
1265 | { | ||
1266 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1267 | u32 low, high = 0; | ||
1268 | |||
1269 | if (!irqchip_in_kernel(vcpu->kvm)) | ||
1270 | return 1; | ||
1271 | |||
1272 | if (apic_reg_read(apic, reg, 4, &low)) | ||
1273 | return 1; | ||
1274 | if (reg == APIC_ICR) | ||
1275 | apic_reg_read(apic, APIC_ICR2, 4, &high); | ||
1276 | |||
1277 | *data = (((u64)high) << 32) | low; | ||
1278 | |||
1279 | return 0; | ||
1280 | } | ||
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 40010b09c4aa..f5fe32c5edad 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -48,4 +48,12 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); | |||
48 | 48 | ||
49 | int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); | 49 | int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); |
50 | int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | 50 | int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); |
51 | |||
52 | int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); | ||
53 | int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | ||
54 | |||
55 | static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) | ||
56 | { | ||
57 | return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; | ||
58 | } | ||
51 | #endif | 59 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 818b92ad82cf..19a8906bcaa2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -18,6 +18,7 @@ | |||
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include "mmu.h" | 20 | #include "mmu.h" |
21 | #include "x86.h" | ||
21 | #include "kvm_cache_regs.h" | 22 | #include "kvm_cache_regs.h" |
22 | 23 | ||
23 | #include <linux/kvm_host.h> | 24 | #include <linux/kvm_host.h> |
@@ -29,6 +30,8 @@ | |||
29 | #include <linux/swap.h> | 30 | #include <linux/swap.h> |
30 | #include <linux/hugetlb.h> | 31 | #include <linux/hugetlb.h> |
31 | #include <linux/compiler.h> | 32 | #include <linux/compiler.h> |
33 | #include <linux/srcu.h> | ||
34 | #include <linux/slab.h> | ||
32 | 35 | ||
33 | #include <asm/page.h> | 36 | #include <asm/page.h> |
34 | #include <asm/cmpxchg.h> | 37 | #include <asm/cmpxchg.h> |
@@ -136,16 +139,6 @@ module_param(oos_shadow, bool, 0644); | |||
136 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ | 139 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ |
137 | | PT64_NX_MASK) | 140 | | PT64_NX_MASK) |
138 | 141 | ||
139 | #define PFERR_PRESENT_MASK (1U << 0) | ||
140 | #define PFERR_WRITE_MASK (1U << 1) | ||
141 | #define PFERR_USER_MASK (1U << 2) | ||
142 | #define PFERR_RSVD_MASK (1U << 3) | ||
143 | #define PFERR_FETCH_MASK (1U << 4) | ||
144 | |||
145 | #define PT_PDPE_LEVEL 3 | ||
146 | #define PT_DIRECTORY_LEVEL 2 | ||
147 | #define PT_PAGE_TABLE_LEVEL 1 | ||
148 | |||
149 | #define RMAP_EXT 4 | 142 | #define RMAP_EXT 4 |
150 | 143 | ||
151 | #define ACC_EXEC_MASK 1 | 144 | #define ACC_EXEC_MASK 1 |
@@ -153,6 +146,9 @@ module_param(oos_shadow, bool, 0644); | |||
153 | #define ACC_USER_MASK PT_USER_MASK | 146 | #define ACC_USER_MASK PT_USER_MASK |
154 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) | 147 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) |
155 | 148 | ||
149 | #include <trace/events/kvm.h> | ||
150 | |||
151 | #undef TRACE_INCLUDE_FILE | ||
156 | #define CREATE_TRACE_POINTS | 152 | #define CREATE_TRACE_POINTS |
157 | #include "mmutrace.h" | 153 | #include "mmutrace.h" |
158 | 154 | ||
@@ -229,7 +225,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | |||
229 | 225 | ||
230 | static int is_write_protection(struct kvm_vcpu *vcpu) | 226 | static int is_write_protection(struct kvm_vcpu *vcpu) |
231 | { | 227 | { |
232 | return vcpu->arch.cr0 & X86_CR0_WP; | 228 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); |
233 | } | 229 | } |
234 | 230 | ||
235 | static int is_cpuid_PSE36(void) | 231 | static int is_cpuid_PSE36(void) |
@@ -239,7 +235,7 @@ static int is_cpuid_PSE36(void) | |||
239 | 235 | ||
240 | static int is_nx(struct kvm_vcpu *vcpu) | 236 | static int is_nx(struct kvm_vcpu *vcpu) |
241 | { | 237 | { |
242 | return vcpu->arch.shadow_efer & EFER_NX; | 238 | return vcpu->arch.efer & EFER_NX; |
243 | } | 239 | } |
244 | 240 | ||
245 | static int is_shadow_present_pte(u64 pte) | 241 | static int is_shadow_present_pte(u64 pte) |
@@ -253,7 +249,7 @@ static int is_large_pte(u64 pte) | |||
253 | return pte & PT_PAGE_SIZE_MASK; | 249 | return pte & PT_PAGE_SIZE_MASK; |
254 | } | 250 | } |
255 | 251 | ||
256 | static int is_writeble_pte(unsigned long pte) | 252 | static int is_writable_pte(unsigned long pte) |
257 | { | 253 | { |
258 | return pte & PT_WRITABLE_MASK; | 254 | return pte & PT_WRITABLE_MASK; |
259 | } | 255 | } |
@@ -470,24 +466,10 @@ static int has_wrprotected_page(struct kvm *kvm, | |||
470 | 466 | ||
471 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | 467 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) |
472 | { | 468 | { |
473 | unsigned long page_size = PAGE_SIZE; | 469 | unsigned long page_size; |
474 | struct vm_area_struct *vma; | ||
475 | unsigned long addr; | ||
476 | int i, ret = 0; | 470 | int i, ret = 0; |
477 | 471 | ||
478 | addr = gfn_to_hva(kvm, gfn); | 472 | page_size = kvm_host_page_size(kvm, gfn); |
479 | if (kvm_is_error_hva(addr)) | ||
480 | return page_size; | ||
481 | |||
482 | down_read(¤t->mm->mmap_sem); | ||
483 | vma = find_vma(current->mm, addr); | ||
484 | if (!vma) | ||
485 | goto out; | ||
486 | |||
487 | page_size = vma_kernel_pagesize(vma); | ||
488 | |||
489 | out: | ||
490 | up_read(¤t->mm->mmap_sem); | ||
491 | 473 | ||
492 | for (i = PT_PAGE_TABLE_LEVEL; | 474 | for (i = PT_PAGE_TABLE_LEVEL; |
493 | i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { | 475 | i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { |
@@ -503,8 +485,7 @@ out: | |||
503 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 485 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) |
504 | { | 486 | { |
505 | struct kvm_memory_slot *slot; | 487 | struct kvm_memory_slot *slot; |
506 | int host_level; | 488 | int host_level, level, max_level; |
507 | int level = PT_PAGE_TABLE_LEVEL; | ||
508 | 489 | ||
509 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); | 490 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); |
510 | if (slot && slot->dirty_bitmap) | 491 | if (slot && slot->dirty_bitmap) |
@@ -515,11 +496,12 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
515 | if (host_level == PT_PAGE_TABLE_LEVEL) | 496 | if (host_level == PT_PAGE_TABLE_LEVEL) |
516 | return host_level; | 497 | return host_level; |
517 | 498 | ||
518 | for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) { | 499 | max_level = kvm_x86_ops->get_lpage_level() < host_level ? |
500 | kvm_x86_ops->get_lpage_level() : host_level; | ||
519 | 501 | ||
502 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) | ||
520 | if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) | 503 | if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) |
521 | break; | 504 | break; |
522 | } | ||
523 | 505 | ||
524 | return level - 1; | 506 | return level - 1; |
525 | } | 507 | } |
@@ -635,7 +617,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
635 | pfn = spte_to_pfn(*spte); | 617 | pfn = spte_to_pfn(*spte); |
636 | if (*spte & shadow_accessed_mask) | 618 | if (*spte & shadow_accessed_mask) |
637 | kvm_set_pfn_accessed(pfn); | 619 | kvm_set_pfn_accessed(pfn); |
638 | if (is_writeble_pte(*spte)) | 620 | if (is_writable_pte(*spte)) |
639 | kvm_set_pfn_dirty(pfn); | 621 | kvm_set_pfn_dirty(pfn); |
640 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); | 622 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); |
641 | if (!*rmapp) { | 623 | if (!*rmapp) { |
@@ -664,6 +646,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
664 | prev_desc = desc; | 646 | prev_desc = desc; |
665 | desc = desc->more; | 647 | desc = desc->more; |
666 | } | 648 | } |
649 | pr_err("rmap_remove: %p %llx many->many\n", spte, *spte); | ||
667 | BUG(); | 650 | BUG(); |
668 | } | 651 | } |
669 | } | 652 | } |
@@ -710,7 +693,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
710 | BUG_ON(!spte); | 693 | BUG_ON(!spte); |
711 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 694 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
712 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 695 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
713 | if (is_writeble_pte(*spte)) { | 696 | if (is_writable_pte(*spte)) { |
714 | __set_spte(spte, *spte & ~PT_WRITABLE_MASK); | 697 | __set_spte(spte, *spte & ~PT_WRITABLE_MASK); |
715 | write_protected = 1; | 698 | write_protected = 1; |
716 | } | 699 | } |
@@ -734,7 +717,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
734 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 717 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
735 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); | 718 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); |
736 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | 719 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); |
737 | if (is_writeble_pte(*spte)) { | 720 | if (is_writable_pte(*spte)) { |
738 | rmap_remove(kvm, spte); | 721 | rmap_remove(kvm, spte); |
739 | --kvm->stat.lpages; | 722 | --kvm->stat.lpages; |
740 | __set_spte(spte, shadow_trap_nonpresent_pte); | 723 | __set_spte(spte, shadow_trap_nonpresent_pte); |
@@ -789,7 +772,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
789 | 772 | ||
790 | new_spte &= ~PT_WRITABLE_MASK; | 773 | new_spte &= ~PT_WRITABLE_MASK; |
791 | new_spte &= ~SPTE_HOST_WRITEABLE; | 774 | new_spte &= ~SPTE_HOST_WRITEABLE; |
792 | if (is_writeble_pte(*spte)) | 775 | if (is_writable_pte(*spte)) |
793 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); | 776 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); |
794 | __set_spte(spte, new_spte); | 777 | __set_spte(spte, new_spte); |
795 | spte = rmap_next(kvm, rmapp, spte); | 778 | spte = rmap_next(kvm, rmapp, spte); |
@@ -807,35 +790,32 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
807 | unsigned long data)) | 790 | unsigned long data)) |
808 | { | 791 | { |
809 | int i, j; | 792 | int i, j; |
793 | int ret; | ||
810 | int retval = 0; | 794 | int retval = 0; |
795 | struct kvm_memslots *slots; | ||
811 | 796 | ||
812 | /* | 797 | slots = rcu_dereference(kvm->memslots); |
813 | * If mmap_sem isn't taken, we can look the memslots with only | 798 | |
814 | * the mmu_lock by skipping over the slots with userspace_addr == 0. | 799 | for (i = 0; i < slots->nmemslots; i++) { |
815 | */ | 800 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
816 | for (i = 0; i < kvm->nmemslots; i++) { | ||
817 | struct kvm_memory_slot *memslot = &kvm->memslots[i]; | ||
818 | unsigned long start = memslot->userspace_addr; | 801 | unsigned long start = memslot->userspace_addr; |
819 | unsigned long end; | 802 | unsigned long end; |
820 | 803 | ||
821 | /* mmu_lock protects userspace_addr */ | ||
822 | if (!start) | ||
823 | continue; | ||
824 | |||
825 | end = start + (memslot->npages << PAGE_SHIFT); | 804 | end = start + (memslot->npages << PAGE_SHIFT); |
826 | if (hva >= start && hva < end) { | 805 | if (hva >= start && hva < end) { |
827 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 806 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; |
828 | 807 | ||
829 | retval |= handler(kvm, &memslot->rmap[gfn_offset], | 808 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); |
830 | data); | ||
831 | 809 | ||
832 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 810 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
833 | int idx = gfn_offset; | 811 | int idx = gfn_offset; |
834 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); | 812 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); |
835 | retval |= handler(kvm, | 813 | ret |= handler(kvm, |
836 | &memslot->lpage_info[j][idx].rmap_pde, | 814 | &memslot->lpage_info[j][idx].rmap_pde, |
837 | data); | 815 | data); |
838 | } | 816 | } |
817 | trace_kvm_age_page(hva, memslot, ret); | ||
818 | retval |= ret; | ||
839 | } | 819 | } |
840 | } | 820 | } |
841 | 821 | ||
@@ -858,9 +838,15 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
858 | u64 *spte; | 838 | u64 *spte; |
859 | int young = 0; | 839 | int young = 0; |
860 | 840 | ||
861 | /* always return old for EPT */ | 841 | /* |
842 | * Emulate the accessed bit for EPT, by checking if this page has | ||
843 | * an EPT mapping, and clearing it if it does. On the next access, | ||
844 | * a new EPT mapping will be established. | ||
845 | * This has some overhead, but not as much as the cost of swapping | ||
846 | * out actively used pages or breaking up actively used hugepages. | ||
847 | */ | ||
862 | if (!shadow_accessed_mask) | 848 | if (!shadow_accessed_mask) |
863 | return 0; | 849 | return kvm_unmap_rmapp(kvm, rmapp, data); |
864 | 850 | ||
865 | spte = rmap_next(kvm, rmapp, NULL); | 851 | spte = rmap_next(kvm, rmapp, NULL); |
866 | while (spte) { | 852 | while (spte) { |
@@ -1504,8 +1490,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm, | |||
1504 | for_each_sp(pages, sp, parents, i) { | 1490 | for_each_sp(pages, sp, parents, i) { |
1505 | kvm_mmu_zap_page(kvm, sp); | 1491 | kvm_mmu_zap_page(kvm, sp); |
1506 | mmu_pages_clear_parents(&parents); | 1492 | mmu_pages_clear_parents(&parents); |
1493 | zapped++; | ||
1507 | } | 1494 | } |
1508 | zapped += pages.nr; | ||
1509 | kvm_mmu_pages_init(parent, &parents, &pages); | 1495 | kvm_mmu_pages_init(parent, &parents, &pages); |
1510 | } | 1496 | } |
1511 | 1497 | ||
@@ -1556,14 +1542,16 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | |||
1556 | */ | 1542 | */ |
1557 | 1543 | ||
1558 | if (used_pages > kvm_nr_mmu_pages) { | 1544 | if (used_pages > kvm_nr_mmu_pages) { |
1559 | while (used_pages > kvm_nr_mmu_pages) { | 1545 | while (used_pages > kvm_nr_mmu_pages && |
1546 | !list_empty(&kvm->arch.active_mmu_pages)) { | ||
1560 | struct kvm_mmu_page *page; | 1547 | struct kvm_mmu_page *page; |
1561 | 1548 | ||
1562 | page = container_of(kvm->arch.active_mmu_pages.prev, | 1549 | page = container_of(kvm->arch.active_mmu_pages.prev, |
1563 | struct kvm_mmu_page, link); | 1550 | struct kvm_mmu_page, link); |
1564 | kvm_mmu_zap_page(kvm, page); | 1551 | used_pages -= kvm_mmu_zap_page(kvm, page); |
1565 | used_pages--; | 1552 | used_pages--; |
1566 | } | 1553 | } |
1554 | kvm_nr_mmu_pages = used_pages; | ||
1567 | kvm->arch.n_free_mmu_pages = 0; | 1555 | kvm->arch.n_free_mmu_pages = 0; |
1568 | } | 1556 | } |
1569 | else | 1557 | else |
@@ -1610,14 +1598,15 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | |||
1610 | && !sp->role.invalid) { | 1598 | && !sp->role.invalid) { |
1611 | pgprintk("%s: zap %lx %x\n", | 1599 | pgprintk("%s: zap %lx %x\n", |
1612 | __func__, gfn, sp->role.word); | 1600 | __func__, gfn, sp->role.word); |
1613 | kvm_mmu_zap_page(kvm, sp); | 1601 | if (kvm_mmu_zap_page(kvm, sp)) |
1602 | nn = bucket->first; | ||
1614 | } | 1603 | } |
1615 | } | 1604 | } |
1616 | } | 1605 | } |
1617 | 1606 | ||
1618 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | 1607 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) |
1619 | { | 1608 | { |
1620 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); | 1609 | int slot = memslot_id(kvm, gfn); |
1621 | struct kvm_mmu_page *sp = page_header(__pa(pte)); | 1610 | struct kvm_mmu_page *sp = page_header(__pa(pte)); |
1622 | 1611 | ||
1623 | __set_bit(slot, sp->slot_bitmap); | 1612 | __set_bit(slot, sp->slot_bitmap); |
@@ -1641,7 +1630,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | |||
1641 | { | 1630 | { |
1642 | struct page *page; | 1631 | struct page *page; |
1643 | 1632 | ||
1644 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); | 1633 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); |
1645 | 1634 | ||
1646 | if (gpa == UNMAPPED_GVA) | 1635 | if (gpa == UNMAPPED_GVA) |
1647 | return NULL; | 1636 | return NULL; |
@@ -1854,7 +1843,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1854 | * is responsibility of mmu_get_page / kvm_sync_page. | 1843 | * is responsibility of mmu_get_page / kvm_sync_page. |
1855 | * Same reasoning can be applied to dirty page accounting. | 1844 | * Same reasoning can be applied to dirty page accounting. |
1856 | */ | 1845 | */ |
1857 | if (!can_unsync && is_writeble_pte(*sptep)) | 1846 | if (!can_unsync && is_writable_pte(*sptep)) |
1858 | goto set_pte; | 1847 | goto set_pte; |
1859 | 1848 | ||
1860 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { | 1849 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { |
@@ -1862,7 +1851,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1862 | __func__, gfn); | 1851 | __func__, gfn); |
1863 | ret = 1; | 1852 | ret = 1; |
1864 | pte_access &= ~ACC_WRITE_MASK; | 1853 | pte_access &= ~ACC_WRITE_MASK; |
1865 | if (is_writeble_pte(spte)) | 1854 | if (is_writable_pte(spte)) |
1866 | spte &= ~PT_WRITABLE_MASK; | 1855 | spte &= ~PT_WRITABLE_MASK; |
1867 | } | 1856 | } |
1868 | } | 1857 | } |
@@ -1883,7 +1872,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1883 | bool reset_host_protection) | 1872 | bool reset_host_protection) |
1884 | { | 1873 | { |
1885 | int was_rmapped = 0; | 1874 | int was_rmapped = 0; |
1886 | int was_writeble = is_writeble_pte(*sptep); | 1875 | int was_writable = is_writable_pte(*sptep); |
1887 | int rmap_count; | 1876 | int rmap_count; |
1888 | 1877 | ||
1889 | pgprintk("%s: spte %llx access %x write_fault %d" | 1878 | pgprintk("%s: spte %llx access %x write_fault %d" |
@@ -1934,7 +1923,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1934 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 1923 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
1935 | rmap_recycle(vcpu, sptep, gfn); | 1924 | rmap_recycle(vcpu, sptep, gfn); |
1936 | } else { | 1925 | } else { |
1937 | if (was_writeble) | 1926 | if (was_writable) |
1938 | kvm_release_pfn_dirty(pfn); | 1927 | kvm_release_pfn_dirty(pfn); |
1939 | else | 1928 | else |
1940 | kvm_release_pfn_clean(pfn); | 1929 | kvm_release_pfn_clean(pfn); |
@@ -2164,8 +2153,11 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2164 | spin_unlock(&vcpu->kvm->mmu_lock); | 2153 | spin_unlock(&vcpu->kvm->mmu_lock); |
2165 | } | 2154 | } |
2166 | 2155 | ||
2167 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) | 2156 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, |
2157 | u32 access, u32 *error) | ||
2168 | { | 2158 | { |
2159 | if (error) | ||
2160 | *error = 0; | ||
2169 | return vaddr; | 2161 | return vaddr; |
2170 | } | 2162 | } |
2171 | 2163 | ||
@@ -2749,7 +2741,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
2749 | if (tdp_enabled) | 2741 | if (tdp_enabled) |
2750 | return 0; | 2742 | return 0; |
2751 | 2743 | ||
2752 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); | 2744 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); |
2753 | 2745 | ||
2754 | spin_lock(&vcpu->kvm->mmu_lock); | 2746 | spin_lock(&vcpu->kvm->mmu_lock); |
2755 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 2747 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
@@ -2789,7 +2781,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
2789 | if (r) | 2781 | if (r) |
2790 | goto out; | 2782 | goto out; |
2791 | 2783 | ||
2792 | er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0); | 2784 | er = emulate_instruction(vcpu, cr2, error_code, 0); |
2793 | 2785 | ||
2794 | switch (er) { | 2786 | switch (er) { |
2795 | case EMULATE_DONE: | 2787 | case EMULATE_DONE: |
@@ -2800,6 +2792,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
2800 | case EMULATE_FAIL: | 2792 | case EMULATE_FAIL: |
2801 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 2793 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
2802 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 2794 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
2795 | vcpu->run->internal.ndata = 0; | ||
2803 | return 0; | 2796 | return 0; |
2804 | default: | 2797 | default: |
2805 | BUG(); | 2798 | BUG(); |
@@ -2848,16 +2841,13 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | |||
2848 | */ | 2841 | */ |
2849 | page = alloc_page(GFP_KERNEL | __GFP_DMA32); | 2842 | page = alloc_page(GFP_KERNEL | __GFP_DMA32); |
2850 | if (!page) | 2843 | if (!page) |
2851 | goto error_1; | 2844 | return -ENOMEM; |
2845 | |||
2852 | vcpu->arch.mmu.pae_root = page_address(page); | 2846 | vcpu->arch.mmu.pae_root = page_address(page); |
2853 | for (i = 0; i < 4; ++i) | 2847 | for (i = 0; i < 4; ++i) |
2854 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; | 2848 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; |
2855 | 2849 | ||
2856 | return 0; | 2850 | return 0; |
2857 | |||
2858 | error_1: | ||
2859 | free_mmu_pages(vcpu); | ||
2860 | return -ENOMEM; | ||
2861 | } | 2851 | } |
2862 | 2852 | ||
2863 | int kvm_mmu_create(struct kvm_vcpu *vcpu) | 2853 | int kvm_mmu_create(struct kvm_vcpu *vcpu) |
@@ -2937,10 +2927,9 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2937 | spin_lock(&kvm_lock); | 2927 | spin_lock(&kvm_lock); |
2938 | 2928 | ||
2939 | list_for_each_entry(kvm, &vm_list, vm_list) { | 2929 | list_for_each_entry(kvm, &vm_list, vm_list) { |
2940 | int npages; | 2930 | int npages, idx; |
2941 | 2931 | ||
2942 | if (!down_read_trylock(&kvm->slots_lock)) | 2932 | idx = srcu_read_lock(&kvm->srcu); |
2943 | continue; | ||
2944 | spin_lock(&kvm->mmu_lock); | 2933 | spin_lock(&kvm->mmu_lock); |
2945 | npages = kvm->arch.n_alloc_mmu_pages - | 2934 | npages = kvm->arch.n_alloc_mmu_pages - |
2946 | kvm->arch.n_free_mmu_pages; | 2935 | kvm->arch.n_free_mmu_pages; |
@@ -2953,7 +2942,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2953 | nr_to_scan--; | 2942 | nr_to_scan--; |
2954 | 2943 | ||
2955 | spin_unlock(&kvm->mmu_lock); | 2944 | spin_unlock(&kvm->mmu_lock); |
2956 | up_read(&kvm->slots_lock); | 2945 | srcu_read_unlock(&kvm->srcu, idx); |
2957 | } | 2946 | } |
2958 | if (kvm_freed) | 2947 | if (kvm_freed) |
2959 | list_move_tail(&kvm_freed->vm_list, &vm_list); | 2948 | list_move_tail(&kvm_freed->vm_list, &vm_list); |
@@ -3020,9 +3009,11 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
3020 | int i; | 3009 | int i; |
3021 | unsigned int nr_mmu_pages; | 3010 | unsigned int nr_mmu_pages; |
3022 | unsigned int nr_pages = 0; | 3011 | unsigned int nr_pages = 0; |
3012 | struct kvm_memslots *slots; | ||
3023 | 3013 | ||
3024 | for (i = 0; i < kvm->nmemslots; i++) | 3014 | slots = rcu_dereference(kvm->memslots); |
3025 | nr_pages += kvm->memslots[i].npages; | 3015 | for (i = 0; i < slots->nmemslots; i++) |
3016 | nr_pages += slots->memslots[i].npages; | ||
3026 | 3017 | ||
3027 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; | 3018 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; |
3028 | nr_mmu_pages = max(nr_mmu_pages, | 3019 | nr_mmu_pages = max(nr_mmu_pages, |
@@ -3247,7 +3238,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
3247 | if (is_shadow_present_pte(ent) && !is_last_spte(ent, level)) | 3238 | if (is_shadow_present_pte(ent) && !is_last_spte(ent, level)) |
3248 | audit_mappings_page(vcpu, ent, va, level - 1); | 3239 | audit_mappings_page(vcpu, ent, va, level - 1); |
3249 | else { | 3240 | else { |
3250 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); | 3241 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL); |
3251 | gfn_t gfn = gpa >> PAGE_SHIFT; | 3242 | gfn_t gfn = gpa >> PAGE_SHIFT; |
3252 | pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); | 3243 | pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); |
3253 | hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; | 3244 | hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; |
@@ -3292,10 +3283,12 @@ static void audit_mappings(struct kvm_vcpu *vcpu) | |||
3292 | static int count_rmaps(struct kvm_vcpu *vcpu) | 3283 | static int count_rmaps(struct kvm_vcpu *vcpu) |
3293 | { | 3284 | { |
3294 | int nmaps = 0; | 3285 | int nmaps = 0; |
3295 | int i, j, k; | 3286 | int i, j, k, idx; |
3296 | 3287 | ||
3288 | idx = srcu_read_lock(&kvm->srcu); | ||
3289 | slots = rcu_dereference(kvm->memslots); | ||
3297 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 3290 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
3298 | struct kvm_memory_slot *m = &vcpu->kvm->memslots[i]; | 3291 | struct kvm_memory_slot *m = &slots->memslots[i]; |
3299 | struct kvm_rmap_desc *d; | 3292 | struct kvm_rmap_desc *d; |
3300 | 3293 | ||
3301 | for (j = 0; j < m->npages; ++j) { | 3294 | for (j = 0; j < m->npages; ++j) { |
@@ -3318,6 +3311,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) | |||
3318 | } | 3311 | } |
3319 | } | 3312 | } |
3320 | } | 3313 | } |
3314 | srcu_read_unlock(&kvm->srcu, idx); | ||
3321 | return nmaps; | 3315 | return nmaps; |
3322 | } | 3316 | } |
3323 | 3317 | ||
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 61a1b3884b49..be66759321a5 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define __KVM_X86_MMU_H | 2 | #define __KVM_X86_MMU_H |
3 | 3 | ||
4 | #include <linux/kvm_host.h> | 4 | #include <linux/kvm_host.h> |
5 | #include "kvm_cache_regs.h" | ||
5 | 6 | ||
6 | #define PT64_PT_BITS 9 | 7 | #define PT64_PT_BITS 9 |
7 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) | 8 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) |
@@ -37,6 +38,16 @@ | |||
37 | #define PT32_ROOT_LEVEL 2 | 38 | #define PT32_ROOT_LEVEL 2 |
38 | #define PT32E_ROOT_LEVEL 3 | 39 | #define PT32E_ROOT_LEVEL 3 |
39 | 40 | ||
41 | #define PT_PDPE_LEVEL 3 | ||
42 | #define PT_DIRECTORY_LEVEL 2 | ||
43 | #define PT_PAGE_TABLE_LEVEL 1 | ||
44 | |||
45 | #define PFERR_PRESENT_MASK (1U << 0) | ||
46 | #define PFERR_WRITE_MASK (1U << 1) | ||
47 | #define PFERR_USER_MASK (1U << 2) | ||
48 | #define PFERR_RSVD_MASK (1U << 3) | ||
49 | #define PFERR_FETCH_MASK (1U << 4) | ||
50 | |||
40 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); | 51 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); |
41 | 52 | ||
42 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 53 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
@@ -53,30 +64,6 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) | |||
53 | return kvm_mmu_load(vcpu); | 64 | return kvm_mmu_load(vcpu); |
54 | } | 65 | } |
55 | 66 | ||
56 | static inline int is_long_mode(struct kvm_vcpu *vcpu) | ||
57 | { | ||
58 | #ifdef CONFIG_X86_64 | ||
59 | return vcpu->arch.shadow_efer & EFER_LMA; | ||
60 | #else | ||
61 | return 0; | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | static inline int is_pae(struct kvm_vcpu *vcpu) | ||
66 | { | ||
67 | return vcpu->arch.cr4 & X86_CR4_PAE; | ||
68 | } | ||
69 | |||
70 | static inline int is_pse(struct kvm_vcpu *vcpu) | ||
71 | { | ||
72 | return vcpu->arch.cr4 & X86_CR4_PSE; | ||
73 | } | ||
74 | |||
75 | static inline int is_paging(struct kvm_vcpu *vcpu) | ||
76 | { | ||
77 | return vcpu->arch.cr0 & X86_CR0_PG; | ||
78 | } | ||
79 | |||
80 | static inline int is_present_gpte(unsigned long pte) | 67 | static inline int is_present_gpte(unsigned long pte) |
81 | { | 68 | { |
82 | return pte & PT_PRESENT_MASK; | 69 | return pte & PT_PRESENT_MASK; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 72558f8ff3f5..81eab9a50e6a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -150,7 +150,9 @@ walk: | |||
150 | walker->table_gfn[walker->level - 1] = table_gfn; | 150 | walker->table_gfn[walker->level - 1] = table_gfn; |
151 | walker->pte_gpa[walker->level - 1] = pte_gpa; | 151 | walker->pte_gpa[walker->level - 1] = pte_gpa; |
152 | 152 | ||
153 | kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); | 153 | if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) |
154 | goto not_present; | ||
155 | |||
154 | trace_kvm_mmu_paging_element(pte, walker->level); | 156 | trace_kvm_mmu_paging_element(pte, walker->level); |
155 | 157 | ||
156 | if (!is_present_gpte(pte)) | 158 | if (!is_present_gpte(pte)) |
@@ -160,7 +162,7 @@ walk: | |||
160 | if (rsvd_fault) | 162 | if (rsvd_fault) |
161 | goto access_error; | 163 | goto access_error; |
162 | 164 | ||
163 | if (write_fault && !is_writeble_pte(pte)) | 165 | if (write_fault && !is_writable_pte(pte)) |
164 | if (user_fault || is_write_protection(vcpu)) | 166 | if (user_fault || is_write_protection(vcpu)) |
165 | goto access_error; | 167 | goto access_error; |
166 | 168 | ||
@@ -455,8 +457,6 @@ out_unlock: | |||
455 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 457 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
456 | { | 458 | { |
457 | struct kvm_shadow_walk_iterator iterator; | 459 | struct kvm_shadow_walk_iterator iterator; |
458 | pt_element_t gpte; | ||
459 | gpa_t pte_gpa = -1; | ||
460 | int level; | 460 | int level; |
461 | u64 *sptep; | 461 | u64 *sptep; |
462 | int need_flush = 0; | 462 | int need_flush = 0; |
@@ -467,14 +467,9 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
467 | level = iterator.level; | 467 | level = iterator.level; |
468 | sptep = iterator.sptep; | 468 | sptep = iterator.sptep; |
469 | 469 | ||
470 | /* FIXME: properly handle invlpg on large guest pages */ | ||
471 | if (level == PT_PAGE_TABLE_LEVEL || | 470 | if (level == PT_PAGE_TABLE_LEVEL || |
472 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || | 471 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || |
473 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { | 472 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { |
474 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
475 | |||
476 | pte_gpa = (sp->gfn << PAGE_SHIFT); | ||
477 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | ||
478 | 473 | ||
479 | if (is_shadow_present_pte(*sptep)) { | 474 | if (is_shadow_present_pte(*sptep)) { |
480 | rmap_remove(vcpu->kvm, sptep); | 475 | rmap_remove(vcpu->kvm, sptep); |
@@ -493,32 +488,25 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
493 | if (need_flush) | 488 | if (need_flush) |
494 | kvm_flush_remote_tlbs(vcpu->kvm); | 489 | kvm_flush_remote_tlbs(vcpu->kvm); |
495 | spin_unlock(&vcpu->kvm->mmu_lock); | 490 | spin_unlock(&vcpu->kvm->mmu_lock); |
496 | |||
497 | if (pte_gpa == -1) | ||
498 | return; | ||
499 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | ||
500 | sizeof(pt_element_t))) | ||
501 | return; | ||
502 | if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) { | ||
503 | if (mmu_topup_memory_caches(vcpu)) | ||
504 | return; | ||
505 | kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte, | ||
506 | sizeof(pt_element_t), 0); | ||
507 | } | ||
508 | } | 491 | } |
509 | 492 | ||
510 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | 493 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, |
494 | u32 *error) | ||
511 | { | 495 | { |
512 | struct guest_walker walker; | 496 | struct guest_walker walker; |
513 | gpa_t gpa = UNMAPPED_GVA; | 497 | gpa_t gpa = UNMAPPED_GVA; |
514 | int r; | 498 | int r; |
515 | 499 | ||
516 | r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0); | 500 | r = FNAME(walk_addr)(&walker, vcpu, vaddr, |
501 | !!(access & PFERR_WRITE_MASK), | ||
502 | !!(access & PFERR_USER_MASK), | ||
503 | !!(access & PFERR_FETCH_MASK)); | ||
517 | 504 | ||
518 | if (r) { | 505 | if (r) { |
519 | gpa = gfn_to_gpa(walker.gfn); | 506 | gpa = gfn_to_gpa(walker.gfn); |
520 | gpa |= vaddr & ~PAGE_MASK; | 507 | gpa |= vaddr & ~PAGE_MASK; |
521 | } | 508 | } else if (error) |
509 | *error = walker.error_code; | ||
522 | 510 | ||
523 | return gpa; | 511 | return gpa; |
524 | } | 512 | } |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c17404add91f..737361fcd503 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/highmem.h> | 26 | #include <linux/highmem.h> |
27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
28 | #include <linux/ftrace_event.h> | 28 | #include <linux/ftrace_event.h> |
29 | #include <linux/slab.h> | ||
29 | 30 | ||
30 | #include <asm/desc.h> | 31 | #include <asm/desc.h> |
31 | 32 | ||
@@ -46,6 +47,7 @@ MODULE_LICENSE("GPL"); | |||
46 | #define SVM_FEATURE_NPT (1 << 0) | 47 | #define SVM_FEATURE_NPT (1 << 0) |
47 | #define SVM_FEATURE_LBRV (1 << 1) | 48 | #define SVM_FEATURE_LBRV (1 << 1) |
48 | #define SVM_FEATURE_SVML (1 << 2) | 49 | #define SVM_FEATURE_SVML (1 << 2) |
50 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | ||
49 | 51 | ||
50 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ | 52 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ |
51 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ | 53 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ |
@@ -53,15 +55,6 @@ MODULE_LICENSE("GPL"); | |||
53 | 55 | ||
54 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) | 56 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) |
55 | 57 | ||
56 | /* Turn on to get debugging output*/ | ||
57 | /* #define NESTED_DEBUG */ | ||
58 | |||
59 | #ifdef NESTED_DEBUG | ||
60 | #define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args) | ||
61 | #else | ||
62 | #define nsvm_printk(fmt, args...) do {} while(0) | ||
63 | #endif | ||
64 | |||
65 | static const u32 host_save_user_msrs[] = { | 58 | static const u32 host_save_user_msrs[] = { |
66 | #ifdef CONFIG_X86_64 | 59 | #ifdef CONFIG_X86_64 |
67 | MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, | 60 | MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, |
@@ -85,6 +78,9 @@ struct nested_state { | |||
85 | /* gpa pointers to the real vectors */ | 78 | /* gpa pointers to the real vectors */ |
86 | u64 vmcb_msrpm; | 79 | u64 vmcb_msrpm; |
87 | 80 | ||
81 | /* A VMEXIT is required but not yet emulated */ | ||
82 | bool exit_required; | ||
83 | |||
88 | /* cache for intercepts of the guest */ | 84 | /* cache for intercepts of the guest */ |
89 | u16 intercept_cr_read; | 85 | u16 intercept_cr_read; |
90 | u16 intercept_cr_write; | 86 | u16 intercept_cr_write; |
@@ -112,6 +108,8 @@ struct vcpu_svm { | |||
112 | u32 *msrpm; | 108 | u32 *msrpm; |
113 | 109 | ||
114 | struct nested_state nested; | 110 | struct nested_state nested; |
111 | |||
112 | bool nmi_singlestep; | ||
115 | }; | 113 | }; |
116 | 114 | ||
117 | /* enable NPT for AMD64 and X86 with PAE */ | 115 | /* enable NPT for AMD64 and X86 with PAE */ |
@@ -234,7 +232,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
234 | efer &= ~EFER_LME; | 232 | efer &= ~EFER_LME; |
235 | 233 | ||
236 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; | 234 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; |
237 | vcpu->arch.shadow_efer = efer; | 235 | vcpu->arch.efer = efer; |
238 | } | 236 | } |
239 | 237 | ||
240 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 238 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
@@ -286,7 +284,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
286 | struct vcpu_svm *svm = to_svm(vcpu); | 284 | struct vcpu_svm *svm = to_svm(vcpu); |
287 | 285 | ||
288 | if (!svm->next_rip) { | 286 | if (!svm->next_rip) { |
289 | if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) != | 287 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != |
290 | EMULATE_DONE) | 288 | EMULATE_DONE) |
291 | printk(KERN_DEBUG "%s: NOP\n", __func__); | 289 | printk(KERN_DEBUG "%s: NOP\n", __func__); |
292 | return; | 290 | return; |
@@ -316,75 +314,79 @@ static void svm_hardware_disable(void *garbage) | |||
316 | cpu_svm_disable(); | 314 | cpu_svm_disable(); |
317 | } | 315 | } |
318 | 316 | ||
319 | static void svm_hardware_enable(void *garbage) | 317 | static int svm_hardware_enable(void *garbage) |
320 | { | 318 | { |
321 | 319 | ||
322 | struct svm_cpu_data *svm_data; | 320 | struct svm_cpu_data *sd; |
323 | uint64_t efer; | 321 | uint64_t efer; |
324 | struct descriptor_table gdt_descr; | 322 | struct descriptor_table gdt_descr; |
325 | struct desc_struct *gdt; | 323 | struct desc_struct *gdt; |
326 | int me = raw_smp_processor_id(); | 324 | int me = raw_smp_processor_id(); |
327 | 325 | ||
326 | rdmsrl(MSR_EFER, efer); | ||
327 | if (efer & EFER_SVME) | ||
328 | return -EBUSY; | ||
329 | |||
328 | if (!has_svm()) { | 330 | if (!has_svm()) { |
329 | printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); | 331 | printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n", |
330 | return; | 332 | me); |
333 | return -EINVAL; | ||
331 | } | 334 | } |
332 | svm_data = per_cpu(svm_data, me); | 335 | sd = per_cpu(svm_data, me); |
333 | 336 | ||
334 | if (!svm_data) { | 337 | if (!sd) { |
335 | printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", | 338 | printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n", |
336 | me); | 339 | me); |
337 | return; | 340 | return -EINVAL; |
338 | } | 341 | } |
339 | 342 | ||
340 | svm_data->asid_generation = 1; | 343 | sd->asid_generation = 1; |
341 | svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; | 344 | sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; |
342 | svm_data->next_asid = svm_data->max_asid + 1; | 345 | sd->next_asid = sd->max_asid + 1; |
343 | 346 | ||
344 | kvm_get_gdt(&gdt_descr); | 347 | kvm_get_gdt(&gdt_descr); |
345 | gdt = (struct desc_struct *)gdt_descr.base; | 348 | gdt = (struct desc_struct *)gdt_descr.base; |
346 | svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); | 349 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); |
347 | 350 | ||
348 | rdmsrl(MSR_EFER, efer); | ||
349 | wrmsrl(MSR_EFER, efer | EFER_SVME); | 351 | wrmsrl(MSR_EFER, efer | EFER_SVME); |
350 | 352 | ||
351 | wrmsrl(MSR_VM_HSAVE_PA, | 353 | wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); |
352 | page_to_pfn(svm_data->save_area) << PAGE_SHIFT); | 354 | |
355 | return 0; | ||
353 | } | 356 | } |
354 | 357 | ||
355 | static void svm_cpu_uninit(int cpu) | 358 | static void svm_cpu_uninit(int cpu) |
356 | { | 359 | { |
357 | struct svm_cpu_data *svm_data | 360 | struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id()); |
358 | = per_cpu(svm_data, raw_smp_processor_id()); | ||
359 | 361 | ||
360 | if (!svm_data) | 362 | if (!sd) |
361 | return; | 363 | return; |
362 | 364 | ||
363 | per_cpu(svm_data, raw_smp_processor_id()) = NULL; | 365 | per_cpu(svm_data, raw_smp_processor_id()) = NULL; |
364 | __free_page(svm_data->save_area); | 366 | __free_page(sd->save_area); |
365 | kfree(svm_data); | 367 | kfree(sd); |
366 | } | 368 | } |
367 | 369 | ||
368 | static int svm_cpu_init(int cpu) | 370 | static int svm_cpu_init(int cpu) |
369 | { | 371 | { |
370 | struct svm_cpu_data *svm_data; | 372 | struct svm_cpu_data *sd; |
371 | int r; | 373 | int r; |
372 | 374 | ||
373 | svm_data = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); | 375 | sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); |
374 | if (!svm_data) | 376 | if (!sd) |
375 | return -ENOMEM; | 377 | return -ENOMEM; |
376 | svm_data->cpu = cpu; | 378 | sd->cpu = cpu; |
377 | svm_data->save_area = alloc_page(GFP_KERNEL); | 379 | sd->save_area = alloc_page(GFP_KERNEL); |
378 | r = -ENOMEM; | 380 | r = -ENOMEM; |
379 | if (!svm_data->save_area) | 381 | if (!sd->save_area) |
380 | goto err_1; | 382 | goto err_1; |
381 | 383 | ||
382 | per_cpu(svm_data, cpu) = svm_data; | 384 | per_cpu(svm_data, cpu) = sd; |
383 | 385 | ||
384 | return 0; | 386 | return 0; |
385 | 387 | ||
386 | err_1: | 388 | err_1: |
387 | kfree(svm_data); | 389 | kfree(sd); |
388 | return r; | 390 | return r; |
389 | 391 | ||
390 | } | 392 | } |
@@ -476,7 +478,7 @@ static __init int svm_hardware_setup(void) | |||
476 | kvm_enable_efer_bits(EFER_SVME); | 478 | kvm_enable_efer_bits(EFER_SVME); |
477 | } | 479 | } |
478 | 480 | ||
479 | for_each_online_cpu(cpu) { | 481 | for_each_possible_cpu(cpu) { |
480 | r = svm_cpu_init(cpu); | 482 | r = svm_cpu_init(cpu); |
481 | if (r) | 483 | if (r) |
482 | goto err; | 484 | goto err; |
@@ -510,7 +512,7 @@ static __exit void svm_hardware_unsetup(void) | |||
510 | { | 512 | { |
511 | int cpu; | 513 | int cpu; |
512 | 514 | ||
513 | for_each_online_cpu(cpu) | 515 | for_each_possible_cpu(cpu) |
514 | svm_cpu_uninit(cpu); | 516 | svm_cpu_uninit(cpu); |
515 | 517 | ||
516 | __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); | 518 | __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); |
@@ -539,6 +541,8 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
539 | struct vmcb_control_area *control = &svm->vmcb->control; | 541 | struct vmcb_control_area *control = &svm->vmcb->control; |
540 | struct vmcb_save_area *save = &svm->vmcb->save; | 542 | struct vmcb_save_area *save = &svm->vmcb->save; |
541 | 543 | ||
544 | svm->vcpu.fpu_active = 1; | ||
545 | |||
542 | control->intercept_cr_read = INTERCEPT_CR0_MASK | | 546 | control->intercept_cr_read = INTERCEPT_CR0_MASK | |
543 | INTERCEPT_CR3_MASK | | 547 | INTERCEPT_CR3_MASK | |
544 | INTERCEPT_CR4_MASK; | 548 | INTERCEPT_CR4_MASK; |
@@ -551,13 +555,19 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
551 | control->intercept_dr_read = INTERCEPT_DR0_MASK | | 555 | control->intercept_dr_read = INTERCEPT_DR0_MASK | |
552 | INTERCEPT_DR1_MASK | | 556 | INTERCEPT_DR1_MASK | |
553 | INTERCEPT_DR2_MASK | | 557 | INTERCEPT_DR2_MASK | |
554 | INTERCEPT_DR3_MASK; | 558 | INTERCEPT_DR3_MASK | |
559 | INTERCEPT_DR4_MASK | | ||
560 | INTERCEPT_DR5_MASK | | ||
561 | INTERCEPT_DR6_MASK | | ||
562 | INTERCEPT_DR7_MASK; | ||
555 | 563 | ||
556 | control->intercept_dr_write = INTERCEPT_DR0_MASK | | 564 | control->intercept_dr_write = INTERCEPT_DR0_MASK | |
557 | INTERCEPT_DR1_MASK | | 565 | INTERCEPT_DR1_MASK | |
558 | INTERCEPT_DR2_MASK | | 566 | INTERCEPT_DR2_MASK | |
559 | INTERCEPT_DR3_MASK | | 567 | INTERCEPT_DR3_MASK | |
568 | INTERCEPT_DR4_MASK | | ||
560 | INTERCEPT_DR5_MASK | | 569 | INTERCEPT_DR5_MASK | |
570 | INTERCEPT_DR6_MASK | | ||
561 | INTERCEPT_DR7_MASK; | 571 | INTERCEPT_DR7_MASK; |
562 | 572 | ||
563 | control->intercept_exceptions = (1 << PF_VECTOR) | | 573 | control->intercept_exceptions = (1 << PF_VECTOR) | |
@@ -568,6 +578,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
568 | control->intercept = (1ULL << INTERCEPT_INTR) | | 578 | control->intercept = (1ULL << INTERCEPT_INTR) | |
569 | (1ULL << INTERCEPT_NMI) | | 579 | (1ULL << INTERCEPT_NMI) | |
570 | (1ULL << INTERCEPT_SMI) | | 580 | (1ULL << INTERCEPT_SMI) | |
581 | (1ULL << INTERCEPT_SELECTIVE_CR0) | | ||
571 | (1ULL << INTERCEPT_CPUID) | | 582 | (1ULL << INTERCEPT_CPUID) | |
572 | (1ULL << INTERCEPT_INVD) | | 583 | (1ULL << INTERCEPT_INVD) | |
573 | (1ULL << INTERCEPT_HLT) | | 584 | (1ULL << INTERCEPT_HLT) | |
@@ -625,11 +636,12 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
625 | save->rip = 0x0000fff0; | 636 | save->rip = 0x0000fff0; |
626 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | 637 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; |
627 | 638 | ||
628 | /* | 639 | /* This is the guest-visible cr0 value. |
629 | * cr0 val on cpu init should be 0x60000010, we enable cpu | 640 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. |
630 | * cache by default. the orderly way is to enable cache in bios. | ||
631 | */ | 641 | */ |
632 | save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; | 642 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
643 | kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); | ||
644 | |||
633 | save->cr4 = X86_CR4_PAE; | 645 | save->cr4 = X86_CR4_PAE; |
634 | /* rdx = ?? */ | 646 | /* rdx = ?? */ |
635 | 647 | ||
@@ -639,13 +651,9 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
639 | control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) | | 651 | control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) | |
640 | (1ULL << INTERCEPT_INVLPG)); | 652 | (1ULL << INTERCEPT_INVLPG)); |
641 | control->intercept_exceptions &= ~(1 << PF_VECTOR); | 653 | control->intercept_exceptions &= ~(1 << PF_VECTOR); |
642 | control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK| | 654 | control->intercept_cr_read &= ~INTERCEPT_CR3_MASK; |
643 | INTERCEPT_CR3_MASK); | 655 | control->intercept_cr_write &= ~INTERCEPT_CR3_MASK; |
644 | control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| | ||
645 | INTERCEPT_CR3_MASK); | ||
646 | save->g_pat = 0x0007040600070406ULL; | 656 | save->g_pat = 0x0007040600070406ULL; |
647 | /* enable caching because the QEMU Bios doesn't enable it */ | ||
648 | save->cr0 = X86_CR0_ET; | ||
649 | save->cr3 = 0; | 657 | save->cr3 = 0; |
650 | save->cr4 = 0; | 658 | save->cr4 = 0; |
651 | } | 659 | } |
@@ -654,6 +662,11 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
654 | svm->nested.vmcb = 0; | 662 | svm->nested.vmcb = 0; |
655 | svm->vcpu.arch.hflags = 0; | 663 | svm->vcpu.arch.hflags = 0; |
656 | 664 | ||
665 | if (svm_has(SVM_FEATURE_PAUSE_FILTER)) { | ||
666 | control->pause_filter_count = 3000; | ||
667 | control->intercept |= (1ULL << INTERCEPT_PAUSE); | ||
668 | } | ||
669 | |||
657 | enable_gif(svm); | 670 | enable_gif(svm); |
658 | } | 671 | } |
659 | 672 | ||
@@ -693,29 +706,28 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
693 | if (err) | 706 | if (err) |
694 | goto free_svm; | 707 | goto free_svm; |
695 | 708 | ||
709 | err = -ENOMEM; | ||
696 | page = alloc_page(GFP_KERNEL); | 710 | page = alloc_page(GFP_KERNEL); |
697 | if (!page) { | 711 | if (!page) |
698 | err = -ENOMEM; | ||
699 | goto uninit; | 712 | goto uninit; |
700 | } | ||
701 | 713 | ||
702 | err = -ENOMEM; | ||
703 | msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); | 714 | msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); |
704 | if (!msrpm_pages) | 715 | if (!msrpm_pages) |
705 | goto uninit; | 716 | goto free_page1; |
706 | 717 | ||
707 | nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); | 718 | nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); |
708 | if (!nested_msrpm_pages) | 719 | if (!nested_msrpm_pages) |
709 | goto uninit; | 720 | goto free_page2; |
710 | |||
711 | svm->msrpm = page_address(msrpm_pages); | ||
712 | svm_vcpu_init_msrpm(svm->msrpm); | ||
713 | 721 | ||
714 | hsave_page = alloc_page(GFP_KERNEL); | 722 | hsave_page = alloc_page(GFP_KERNEL); |
715 | if (!hsave_page) | 723 | if (!hsave_page) |
716 | goto uninit; | 724 | goto free_page3; |
725 | |||
717 | svm->nested.hsave = page_address(hsave_page); | 726 | svm->nested.hsave = page_address(hsave_page); |
718 | 727 | ||
728 | svm->msrpm = page_address(msrpm_pages); | ||
729 | svm_vcpu_init_msrpm(svm->msrpm); | ||
730 | |||
719 | svm->nested.msrpm = page_address(nested_msrpm_pages); | 731 | svm->nested.msrpm = page_address(nested_msrpm_pages); |
720 | 732 | ||
721 | svm->vmcb = page_address(page); | 733 | svm->vmcb = page_address(page); |
@@ -725,13 +737,18 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
725 | init_vmcb(svm); | 737 | init_vmcb(svm); |
726 | 738 | ||
727 | fx_init(&svm->vcpu); | 739 | fx_init(&svm->vcpu); |
728 | svm->vcpu.fpu_active = 1; | ||
729 | svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | 740 | svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; |
730 | if (kvm_vcpu_is_bsp(&svm->vcpu)) | 741 | if (kvm_vcpu_is_bsp(&svm->vcpu)) |
731 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; | 742 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; |
732 | 743 | ||
733 | return &svm->vcpu; | 744 | return &svm->vcpu; |
734 | 745 | ||
746 | free_page3: | ||
747 | __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); | ||
748 | free_page2: | ||
749 | __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); | ||
750 | free_page1: | ||
751 | __free_page(page); | ||
735 | uninit: | 752 | uninit: |
736 | kvm_vcpu_uninit(&svm->vcpu); | 753 | kvm_vcpu_uninit(&svm->vcpu); |
737 | free_svm: | 754 | free_svm: |
@@ -758,17 +775,18 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
758 | int i; | 775 | int i; |
759 | 776 | ||
760 | if (unlikely(cpu != vcpu->cpu)) { | 777 | if (unlikely(cpu != vcpu->cpu)) { |
761 | u64 tsc_this, delta; | 778 | u64 delta; |
762 | 779 | ||
763 | /* | 780 | if (check_tsc_unstable()) { |
764 | * Make sure that the guest sees a monotonically | 781 | /* |
765 | * increasing TSC. | 782 | * Make sure that the guest sees a monotonically |
766 | */ | 783 | * increasing TSC. |
767 | rdtscll(tsc_this); | 784 | */ |
768 | delta = vcpu->arch.host_tsc - tsc_this; | 785 | delta = vcpu->arch.host_tsc - native_read_tsc(); |
769 | svm->vmcb->control.tsc_offset += delta; | 786 | svm->vmcb->control.tsc_offset += delta; |
770 | if (is_nested(svm)) | 787 | if (is_nested(svm)) |
771 | svm->nested.hsave->control.tsc_offset += delta; | 788 | svm->nested.hsave->control.tsc_offset += delta; |
789 | } | ||
772 | vcpu->cpu = cpu; | 790 | vcpu->cpu = cpu; |
773 | kvm_migrate_timers(vcpu); | 791 | kvm_migrate_timers(vcpu); |
774 | svm->asid_generation = 0; | 792 | svm->asid_generation = 0; |
@@ -787,7 +805,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
787 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 805 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
788 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 806 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
789 | 807 | ||
790 | rdtscll(vcpu->arch.host_tsc); | 808 | vcpu->arch.host_tsc = native_read_tsc(); |
791 | } | 809 | } |
792 | 810 | ||
793 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) | 811 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) |
@@ -950,42 +968,59 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | |||
950 | svm->vmcb->save.gdtr.base = dt->base ; | 968 | svm->vmcb->save.gdtr.base = dt->base ; |
951 | } | 969 | } |
952 | 970 | ||
971 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | ||
972 | { | ||
973 | } | ||
974 | |||
953 | static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 975 | static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
954 | { | 976 | { |
955 | } | 977 | } |
956 | 978 | ||
979 | static void update_cr0_intercept(struct vcpu_svm *svm) | ||
980 | { | ||
981 | ulong gcr0 = svm->vcpu.arch.cr0; | ||
982 | u64 *hcr0 = &svm->vmcb->save.cr0; | ||
983 | |||
984 | if (!svm->vcpu.fpu_active) | ||
985 | *hcr0 |= SVM_CR0_SELECTIVE_MASK; | ||
986 | else | ||
987 | *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) | ||
988 | | (gcr0 & SVM_CR0_SELECTIVE_MASK); | ||
989 | |||
990 | |||
991 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { | ||
992 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | ||
993 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | ||
994 | } else { | ||
995 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | ||
996 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | ||
997 | } | ||
998 | } | ||
999 | |||
957 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 1000 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
958 | { | 1001 | { |
959 | struct vcpu_svm *svm = to_svm(vcpu); | 1002 | struct vcpu_svm *svm = to_svm(vcpu); |
960 | 1003 | ||
961 | #ifdef CONFIG_X86_64 | 1004 | #ifdef CONFIG_X86_64 |
962 | if (vcpu->arch.shadow_efer & EFER_LME) { | 1005 | if (vcpu->arch.efer & EFER_LME) { |
963 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 1006 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
964 | vcpu->arch.shadow_efer |= EFER_LMA; | 1007 | vcpu->arch.efer |= EFER_LMA; |
965 | svm->vmcb->save.efer |= EFER_LMA | EFER_LME; | 1008 | svm->vmcb->save.efer |= EFER_LMA | EFER_LME; |
966 | } | 1009 | } |
967 | 1010 | ||
968 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { | 1011 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { |
969 | vcpu->arch.shadow_efer &= ~EFER_LMA; | 1012 | vcpu->arch.efer &= ~EFER_LMA; |
970 | svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); | 1013 | svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); |
971 | } | 1014 | } |
972 | } | 1015 | } |
973 | #endif | 1016 | #endif |
974 | if (npt_enabled) | 1017 | vcpu->arch.cr0 = cr0; |
975 | goto set; | ||
976 | 1018 | ||
977 | if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { | 1019 | if (!npt_enabled) |
978 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1020 | cr0 |= X86_CR0_PG | X86_CR0_WP; |
979 | vcpu->fpu_active = 1; | ||
980 | } | ||
981 | 1021 | ||
982 | vcpu->arch.cr0 = cr0; | 1022 | if (!vcpu->fpu_active) |
983 | cr0 |= X86_CR0_PG | X86_CR0_WP; | ||
984 | if (!vcpu->fpu_active) { | ||
985 | svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); | ||
986 | cr0 |= X86_CR0_TS; | 1023 | cr0 |= X86_CR0_TS; |
987 | } | ||
988 | set: | ||
989 | /* | 1024 | /* |
990 | * re-enable caching here because the QEMU bios | 1025 | * re-enable caching here because the QEMU bios |
991 | * does not do it - this results in some delay at | 1026 | * does not do it - this results in some delay at |
@@ -993,6 +1028,7 @@ set: | |||
993 | */ | 1028 | */ |
994 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); | 1029 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); |
995 | svm->vmcb->save.cr0 = cr0; | 1030 | svm->vmcb->save.cr0 = cr0; |
1031 | update_cr0_intercept(svm); | ||
996 | } | 1032 | } |
997 | 1033 | ||
998 | static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 1034 | static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
@@ -1045,7 +1081,7 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) | |||
1045 | svm->vmcb->control.intercept_exceptions &= | 1081 | svm->vmcb->control.intercept_exceptions &= |
1046 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); | 1082 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); |
1047 | 1083 | ||
1048 | if (vcpu->arch.singlestep) | 1084 | if (svm->nmi_singlestep) |
1049 | svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); | 1085 | svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); |
1050 | 1086 | ||
1051 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | 1087 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { |
@@ -1060,26 +1096,16 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) | |||
1060 | vcpu->guest_debug = 0; | 1096 | vcpu->guest_debug = 0; |
1061 | } | 1097 | } |
1062 | 1098 | ||
1063 | static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | 1099 | static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) |
1064 | { | 1100 | { |
1065 | int old_debug = vcpu->guest_debug; | ||
1066 | struct vcpu_svm *svm = to_svm(vcpu); | 1101 | struct vcpu_svm *svm = to_svm(vcpu); |
1067 | 1102 | ||
1068 | vcpu->guest_debug = dbg->control; | ||
1069 | |||
1070 | update_db_intercept(vcpu); | ||
1071 | |||
1072 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1103 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
1073 | svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; | 1104 | svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; |
1074 | else | 1105 | else |
1075 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | 1106 | svm->vmcb->save.dr7 = vcpu->arch.dr7; |
1076 | 1107 | ||
1077 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 1108 | update_db_intercept(vcpu); |
1078 | svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
1079 | else if (old_debug & KVM_GUESTDBG_SINGLESTEP) | ||
1080 | svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
1081 | |||
1082 | return 0; | ||
1083 | } | 1109 | } |
1084 | 1110 | ||
1085 | static void load_host_msrs(struct kvm_vcpu *vcpu) | 1111 | static void load_host_msrs(struct kvm_vcpu *vcpu) |
@@ -1096,91 +1122,85 @@ static void save_host_msrs(struct kvm_vcpu *vcpu) | |||
1096 | #endif | 1122 | #endif |
1097 | } | 1123 | } |
1098 | 1124 | ||
1099 | static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) | 1125 | static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) |
1100 | { | 1126 | { |
1101 | if (svm_data->next_asid > svm_data->max_asid) { | 1127 | if (sd->next_asid > sd->max_asid) { |
1102 | ++svm_data->asid_generation; | 1128 | ++sd->asid_generation; |
1103 | svm_data->next_asid = 1; | 1129 | sd->next_asid = 1; |
1104 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; | 1130 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; |
1105 | } | 1131 | } |
1106 | 1132 | ||
1107 | svm->asid_generation = svm_data->asid_generation; | 1133 | svm->asid_generation = sd->asid_generation; |
1108 | svm->vmcb->control.asid = svm_data->next_asid++; | 1134 | svm->vmcb->control.asid = sd->next_asid++; |
1109 | } | 1135 | } |
1110 | 1136 | ||
1111 | static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) | 1137 | static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest) |
1112 | { | 1138 | { |
1113 | struct vcpu_svm *svm = to_svm(vcpu); | 1139 | struct vcpu_svm *svm = to_svm(vcpu); |
1114 | unsigned long val; | ||
1115 | 1140 | ||
1116 | switch (dr) { | 1141 | switch (dr) { |
1117 | case 0 ... 3: | 1142 | case 0 ... 3: |
1118 | val = vcpu->arch.db[dr]; | 1143 | *dest = vcpu->arch.db[dr]; |
1119 | break; | 1144 | break; |
1145 | case 4: | ||
1146 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1147 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1148 | /* fall through */ | ||
1120 | case 6: | 1149 | case 6: |
1121 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1150 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
1122 | val = vcpu->arch.dr6; | 1151 | *dest = vcpu->arch.dr6; |
1123 | else | 1152 | else |
1124 | val = svm->vmcb->save.dr6; | 1153 | *dest = svm->vmcb->save.dr6; |
1125 | break; | 1154 | break; |
1155 | case 5: | ||
1156 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1157 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1158 | /* fall through */ | ||
1126 | case 7: | 1159 | case 7: |
1127 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1160 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
1128 | val = vcpu->arch.dr7; | 1161 | *dest = vcpu->arch.dr7; |
1129 | else | 1162 | else |
1130 | val = svm->vmcb->save.dr7; | 1163 | *dest = svm->vmcb->save.dr7; |
1131 | break; | 1164 | break; |
1132 | default: | ||
1133 | val = 0; | ||
1134 | } | 1165 | } |
1135 | 1166 | ||
1136 | return val; | 1167 | return EMULATE_DONE; |
1137 | } | 1168 | } |
1138 | 1169 | ||
1139 | static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, | 1170 | static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value) |
1140 | int *exception) | ||
1141 | { | 1171 | { |
1142 | struct vcpu_svm *svm = to_svm(vcpu); | 1172 | struct vcpu_svm *svm = to_svm(vcpu); |
1143 | 1173 | ||
1144 | *exception = 0; | ||
1145 | |||
1146 | switch (dr) { | 1174 | switch (dr) { |
1147 | case 0 ... 3: | 1175 | case 0 ... 3: |
1148 | vcpu->arch.db[dr] = value; | 1176 | vcpu->arch.db[dr] = value; |
1149 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | 1177 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) |
1150 | vcpu->arch.eff_db[dr] = value; | 1178 | vcpu->arch.eff_db[dr] = value; |
1151 | return; | 1179 | break; |
1152 | case 4 ... 5: | 1180 | case 4: |
1153 | if (vcpu->arch.cr4 & X86_CR4_DE) | 1181 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
1154 | *exception = UD_VECTOR; | 1182 | return EMULATE_FAIL; /* will re-inject UD */ |
1155 | return; | 1183 | /* fall through */ |
1156 | case 6: | 1184 | case 6: |
1157 | if (value & 0xffffffff00000000ULL) { | ||
1158 | *exception = GP_VECTOR; | ||
1159 | return; | ||
1160 | } | ||
1161 | vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; | 1185 | vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; |
1162 | return; | 1186 | break; |
1187 | case 5: | ||
1188 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1189 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1190 | /* fall through */ | ||
1163 | case 7: | 1191 | case 7: |
1164 | if (value & 0xffffffff00000000ULL) { | ||
1165 | *exception = GP_VECTOR; | ||
1166 | return; | ||
1167 | } | ||
1168 | vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; | 1192 | vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; |
1169 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | 1193 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { |
1170 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | 1194 | svm->vmcb->save.dr7 = vcpu->arch.dr7; |
1171 | vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); | 1195 | vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); |
1172 | } | 1196 | } |
1173 | return; | 1197 | break; |
1174 | default: | ||
1175 | /* FIXME: Possible case? */ | ||
1176 | printk(KERN_DEBUG "%s: unexpected dr %u\n", | ||
1177 | __func__, dr); | ||
1178 | *exception = UD_VECTOR; | ||
1179 | return; | ||
1180 | } | 1198 | } |
1199 | |||
1200 | return EMULATE_DONE; | ||
1181 | } | 1201 | } |
1182 | 1202 | ||
1183 | static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1203 | static int pf_interception(struct vcpu_svm *svm) |
1184 | { | 1204 | { |
1185 | u64 fault_address; | 1205 | u64 fault_address; |
1186 | u32 error_code; | 1206 | u32 error_code; |
@@ -1194,17 +1214,19 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1194 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); | 1214 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); |
1195 | } | 1215 | } |
1196 | 1216 | ||
1197 | static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1217 | static int db_interception(struct vcpu_svm *svm) |
1198 | { | 1218 | { |
1219 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
1220 | |||
1199 | if (!(svm->vcpu.guest_debug & | 1221 | if (!(svm->vcpu.guest_debug & |
1200 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && | 1222 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && |
1201 | !svm->vcpu.arch.singlestep) { | 1223 | !svm->nmi_singlestep) { |
1202 | kvm_queue_exception(&svm->vcpu, DB_VECTOR); | 1224 | kvm_queue_exception(&svm->vcpu, DB_VECTOR); |
1203 | return 1; | 1225 | return 1; |
1204 | } | 1226 | } |
1205 | 1227 | ||
1206 | if (svm->vcpu.arch.singlestep) { | 1228 | if (svm->nmi_singlestep) { |
1207 | svm->vcpu.arch.singlestep = false; | 1229 | svm->nmi_singlestep = false; |
1208 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) | 1230 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) |
1209 | svm->vmcb->save.rflags &= | 1231 | svm->vmcb->save.rflags &= |
1210 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | 1232 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); |
@@ -1223,35 +1245,41 @@ static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1223 | return 1; | 1245 | return 1; |
1224 | } | 1246 | } |
1225 | 1247 | ||
1226 | static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1248 | static int bp_interception(struct vcpu_svm *svm) |
1227 | { | 1249 | { |
1250 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
1251 | |||
1228 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 1252 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
1229 | kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; | 1253 | kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; |
1230 | kvm_run->debug.arch.exception = BP_VECTOR; | 1254 | kvm_run->debug.arch.exception = BP_VECTOR; |
1231 | return 0; | 1255 | return 0; |
1232 | } | 1256 | } |
1233 | 1257 | ||
1234 | static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1258 | static int ud_interception(struct vcpu_svm *svm) |
1235 | { | 1259 | { |
1236 | int er; | 1260 | int er; |
1237 | 1261 | ||
1238 | er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); | 1262 | er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD); |
1239 | if (er != EMULATE_DONE) | 1263 | if (er != EMULATE_DONE) |
1240 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 1264 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
1241 | return 1; | 1265 | return 1; |
1242 | } | 1266 | } |
1243 | 1267 | ||
1244 | static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1268 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) |
1245 | { | 1269 | { |
1270 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1246 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1271 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); |
1247 | if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) | ||
1248 | svm->vmcb->save.cr0 &= ~X86_CR0_TS; | ||
1249 | svm->vcpu.fpu_active = 1; | 1272 | svm->vcpu.fpu_active = 1; |
1273 | update_cr0_intercept(svm); | ||
1274 | } | ||
1250 | 1275 | ||
1276 | static int nm_interception(struct vcpu_svm *svm) | ||
1277 | { | ||
1278 | svm_fpu_activate(&svm->vcpu); | ||
1251 | return 1; | 1279 | return 1; |
1252 | } | 1280 | } |
1253 | 1281 | ||
1254 | static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1282 | static int mc_interception(struct vcpu_svm *svm) |
1255 | { | 1283 | { |
1256 | /* | 1284 | /* |
1257 | * On an #MC intercept the MCE handler is not called automatically in | 1285 | * On an #MC intercept the MCE handler is not called automatically in |
@@ -1264,8 +1292,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1264 | return 1; | 1292 | return 1; |
1265 | } | 1293 | } |
1266 | 1294 | ||
1267 | static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1295 | static int shutdown_interception(struct vcpu_svm *svm) |
1268 | { | 1296 | { |
1297 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
1298 | |||
1269 | /* | 1299 | /* |
1270 | * VMCB is undefined after a SHUTDOWN intercept | 1300 | * VMCB is undefined after a SHUTDOWN intercept |
1271 | * so reinitialize it. | 1301 | * so reinitialize it. |
@@ -1277,7 +1307,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1277 | return 0; | 1307 | return 0; |
1278 | } | 1308 | } |
1279 | 1309 | ||
1280 | static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1310 | static int io_interception(struct vcpu_svm *svm) |
1281 | { | 1311 | { |
1282 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ | 1312 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ |
1283 | int size, in, string; | 1313 | int size, in, string; |
@@ -1291,7 +1321,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1291 | 1321 | ||
1292 | if (string) { | 1322 | if (string) { |
1293 | if (emulate_instruction(&svm->vcpu, | 1323 | if (emulate_instruction(&svm->vcpu, |
1294 | kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) | 1324 | 0, 0, 0) == EMULATE_DO_MMIO) |
1295 | return 0; | 1325 | return 0; |
1296 | return 1; | 1326 | return 1; |
1297 | } | 1327 | } |
@@ -1301,33 +1331,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1301 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1331 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
1302 | 1332 | ||
1303 | skip_emulated_instruction(&svm->vcpu); | 1333 | skip_emulated_instruction(&svm->vcpu); |
1304 | return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); | 1334 | return kvm_emulate_pio(&svm->vcpu, in, size, port); |
1305 | } | 1335 | } |
1306 | 1336 | ||
1307 | static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1337 | static int nmi_interception(struct vcpu_svm *svm) |
1308 | { | 1338 | { |
1309 | return 1; | 1339 | return 1; |
1310 | } | 1340 | } |
1311 | 1341 | ||
1312 | static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1342 | static int intr_interception(struct vcpu_svm *svm) |
1313 | { | 1343 | { |
1314 | ++svm->vcpu.stat.irq_exits; | 1344 | ++svm->vcpu.stat.irq_exits; |
1315 | return 1; | 1345 | return 1; |
1316 | } | 1346 | } |
1317 | 1347 | ||
1318 | static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1348 | static int nop_on_interception(struct vcpu_svm *svm) |
1319 | { | 1349 | { |
1320 | return 1; | 1350 | return 1; |
1321 | } | 1351 | } |
1322 | 1352 | ||
1323 | static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1353 | static int halt_interception(struct vcpu_svm *svm) |
1324 | { | 1354 | { |
1325 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; | 1355 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; |
1326 | skip_emulated_instruction(&svm->vcpu); | 1356 | skip_emulated_instruction(&svm->vcpu); |
1327 | return kvm_emulate_halt(&svm->vcpu); | 1357 | return kvm_emulate_halt(&svm->vcpu); |
1328 | } | 1358 | } |
1329 | 1359 | ||
1330 | static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1360 | static int vmmcall_interception(struct vcpu_svm *svm) |
1331 | { | 1361 | { |
1332 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 1362 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
1333 | skip_emulated_instruction(&svm->vcpu); | 1363 | skip_emulated_instruction(&svm->vcpu); |
@@ -1337,7 +1367,7 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1337 | 1367 | ||
1338 | static int nested_svm_check_permissions(struct vcpu_svm *svm) | 1368 | static int nested_svm_check_permissions(struct vcpu_svm *svm) |
1339 | { | 1369 | { |
1340 | if (!(svm->vcpu.arch.shadow_efer & EFER_SVME) | 1370 | if (!(svm->vcpu.arch.efer & EFER_SVME) |
1341 | || !is_paging(&svm->vcpu)) { | 1371 | || !is_paging(&svm->vcpu)) { |
1342 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 1372 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
1343 | return 1; | 1373 | return 1; |
@@ -1378,8 +1408,15 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) | |||
1378 | 1408 | ||
1379 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; | 1409 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; |
1380 | 1410 | ||
1381 | if (nested_svm_exit_handled(svm)) { | 1411 | if (svm->nested.intercept & 1ULL) { |
1382 | nsvm_printk("VMexit -> INTR\n"); | 1412 | /* |
1413 | * The #vmexit can't be emulated here directly because this | ||
1414 | * code path runs with irqs and preemtion disabled. A | ||
1415 | * #vmexit emulation might sleep. Only signal request for | ||
1416 | * the #vmexit here. | ||
1417 | */ | ||
1418 | svm->nested.exit_required = true; | ||
1419 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); | ||
1383 | return 1; | 1420 | return 1; |
1384 | } | 1421 | } |
1385 | 1422 | ||
@@ -1390,10 +1427,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) | |||
1390 | { | 1427 | { |
1391 | struct page *page; | 1428 | struct page *page; |
1392 | 1429 | ||
1393 | down_read(¤t->mm->mmap_sem); | ||
1394 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); | 1430 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); |
1395 | up_read(¤t->mm->mmap_sem); | ||
1396 | |||
1397 | if (is_error_page(page)) | 1431 | if (is_error_page(page)) |
1398 | goto error; | 1432 | goto error; |
1399 | 1433 | ||
@@ -1532,14 +1566,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
1532 | } | 1566 | } |
1533 | default: { | 1567 | default: { |
1534 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); | 1568 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); |
1535 | nsvm_printk("exit code: 0x%x\n", exit_code); | ||
1536 | if (svm->nested.intercept & exit_bits) | 1569 | if (svm->nested.intercept & exit_bits) |
1537 | vmexit = NESTED_EXIT_DONE; | 1570 | vmexit = NESTED_EXIT_DONE; |
1538 | } | 1571 | } |
1539 | } | 1572 | } |
1540 | 1573 | ||
1541 | if (vmexit == NESTED_EXIT_DONE) { | 1574 | if (vmexit == NESTED_EXIT_DONE) { |
1542 | nsvm_printk("#VMEXIT reason=%04x\n", exit_code); | ||
1543 | nested_svm_vmexit(svm); | 1575 | nested_svm_vmexit(svm); |
1544 | } | 1576 | } |
1545 | 1577 | ||
@@ -1584,6 +1616,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1584 | struct vmcb *hsave = svm->nested.hsave; | 1616 | struct vmcb *hsave = svm->nested.hsave; |
1585 | struct vmcb *vmcb = svm->vmcb; | 1617 | struct vmcb *vmcb = svm->vmcb; |
1586 | 1618 | ||
1619 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, | ||
1620 | vmcb->control.exit_info_1, | ||
1621 | vmcb->control.exit_info_2, | ||
1622 | vmcb->control.exit_int_info, | ||
1623 | vmcb->control.exit_int_info_err); | ||
1624 | |||
1587 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); | 1625 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); |
1588 | if (!nested_vmcb) | 1626 | if (!nested_vmcb) |
1589 | return 1; | 1627 | return 1; |
@@ -1617,6 +1655,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1617 | nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; | 1655 | nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; |
1618 | nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; | 1656 | nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; |
1619 | nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; | 1657 | nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; |
1658 | |||
1659 | /* | ||
1660 | * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have | ||
1661 | * to make sure that we do not lose injected events. So check event_inj | ||
1662 | * here and copy it to exit_int_info if it is valid. | ||
1663 | * Exit_int_info and event_inj can't be both valid because the case | ||
1664 | * below only happens on a VMRUN instruction intercept which has | ||
1665 | * no valid exit_int_info set. | ||
1666 | */ | ||
1667 | if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { | ||
1668 | struct vmcb_control_area *nc = &nested_vmcb->control; | ||
1669 | |||
1670 | nc->exit_int_info = vmcb->control.event_inj; | ||
1671 | nc->exit_int_info_err = vmcb->control.event_inj_err; | ||
1672 | } | ||
1673 | |||
1620 | nested_vmcb->control.tlb_ctl = 0; | 1674 | nested_vmcb->control.tlb_ctl = 0; |
1621 | nested_vmcb->control.event_inj = 0; | 1675 | nested_vmcb->control.event_inj = 0; |
1622 | nested_vmcb->control.event_inj_err = 0; | 1676 | nested_vmcb->control.event_inj_err = 0; |
@@ -1628,10 +1682,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1628 | /* Restore the original control entries */ | 1682 | /* Restore the original control entries */ |
1629 | copy_vmcb_control_area(vmcb, hsave); | 1683 | copy_vmcb_control_area(vmcb, hsave); |
1630 | 1684 | ||
1631 | /* Kill any pending exceptions */ | ||
1632 | if (svm->vcpu.arch.exception.pending == true) | ||
1633 | nsvm_printk("WARNING: Pending Exception\n"); | ||
1634 | |||
1635 | kvm_clear_exception_queue(&svm->vcpu); | 1685 | kvm_clear_exception_queue(&svm->vcpu); |
1636 | kvm_clear_interrupt_queue(&svm->vcpu); | 1686 | kvm_clear_interrupt_queue(&svm->vcpu); |
1637 | 1687 | ||
@@ -1702,6 +1752,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1702 | /* nested_vmcb is our indicator if nested SVM is activated */ | 1752 | /* nested_vmcb is our indicator if nested SVM is activated */ |
1703 | svm->nested.vmcb = svm->vmcb->save.rax; | 1753 | svm->nested.vmcb = svm->vmcb->save.rax; |
1704 | 1754 | ||
1755 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, | ||
1756 | nested_vmcb->save.rip, | ||
1757 | nested_vmcb->control.int_ctl, | ||
1758 | nested_vmcb->control.event_inj, | ||
1759 | nested_vmcb->control.nested_ctl); | ||
1760 | |||
1705 | /* Clear internal status */ | 1761 | /* Clear internal status */ |
1706 | kvm_clear_exception_queue(&svm->vcpu); | 1762 | kvm_clear_exception_queue(&svm->vcpu); |
1707 | kvm_clear_interrupt_queue(&svm->vcpu); | 1763 | kvm_clear_interrupt_queue(&svm->vcpu); |
@@ -1714,8 +1770,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1714 | hsave->save.ds = vmcb->save.ds; | 1770 | hsave->save.ds = vmcb->save.ds; |
1715 | hsave->save.gdtr = vmcb->save.gdtr; | 1771 | hsave->save.gdtr = vmcb->save.gdtr; |
1716 | hsave->save.idtr = vmcb->save.idtr; | 1772 | hsave->save.idtr = vmcb->save.idtr; |
1717 | hsave->save.efer = svm->vcpu.arch.shadow_efer; | 1773 | hsave->save.efer = svm->vcpu.arch.efer; |
1718 | hsave->save.cr0 = svm->vcpu.arch.cr0; | 1774 | hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); |
1719 | hsave->save.cr4 = svm->vcpu.arch.cr4; | 1775 | hsave->save.cr4 = svm->vcpu.arch.cr4; |
1720 | hsave->save.rflags = vmcb->save.rflags; | 1776 | hsave->save.rflags = vmcb->save.rflags; |
1721 | hsave->save.rip = svm->next_rip; | 1777 | hsave->save.rip = svm->next_rip; |
@@ -1789,28 +1845,15 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1789 | svm->nested.intercept = nested_vmcb->control.intercept; | 1845 | svm->nested.intercept = nested_vmcb->control.intercept; |
1790 | 1846 | ||
1791 | force_new_asid(&svm->vcpu); | 1847 | force_new_asid(&svm->vcpu); |
1792 | svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info; | ||
1793 | svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err; | ||
1794 | svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; | 1848 | svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; |
1795 | if (nested_vmcb->control.int_ctl & V_IRQ_MASK) { | ||
1796 | nsvm_printk("nSVM Injecting Interrupt: 0x%x\n", | ||
1797 | nested_vmcb->control.int_ctl); | ||
1798 | } | ||
1799 | if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) | 1849 | if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) |
1800 | svm->vcpu.arch.hflags |= HF_VINTR_MASK; | 1850 | svm->vcpu.arch.hflags |= HF_VINTR_MASK; |
1801 | else | 1851 | else |
1802 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; | 1852 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; |
1803 | 1853 | ||
1804 | nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n", | ||
1805 | nested_vmcb->control.exit_int_info, | ||
1806 | nested_vmcb->control.int_state); | ||
1807 | |||
1808 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; | 1854 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; |
1809 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; | 1855 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; |
1810 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; | 1856 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; |
1811 | if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID) | ||
1812 | nsvm_printk("Injecting Event: 0x%x\n", | ||
1813 | nested_vmcb->control.event_inj); | ||
1814 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; | 1857 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; |
1815 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; | 1858 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; |
1816 | 1859 | ||
@@ -1837,7 +1880,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) | |||
1837 | to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; | 1880 | to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; |
1838 | } | 1881 | } |
1839 | 1882 | ||
1840 | static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1883 | static int vmload_interception(struct vcpu_svm *svm) |
1841 | { | 1884 | { |
1842 | struct vmcb *nested_vmcb; | 1885 | struct vmcb *nested_vmcb; |
1843 | 1886 | ||
@@ -1857,7 +1900,7 @@ static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1857 | return 1; | 1900 | return 1; |
1858 | } | 1901 | } |
1859 | 1902 | ||
1860 | static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1903 | static int vmsave_interception(struct vcpu_svm *svm) |
1861 | { | 1904 | { |
1862 | struct vmcb *nested_vmcb; | 1905 | struct vmcb *nested_vmcb; |
1863 | 1906 | ||
@@ -1877,10 +1920,8 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1877 | return 1; | 1920 | return 1; |
1878 | } | 1921 | } |
1879 | 1922 | ||
1880 | static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1923 | static int vmrun_interception(struct vcpu_svm *svm) |
1881 | { | 1924 | { |
1882 | nsvm_printk("VMrun\n"); | ||
1883 | |||
1884 | if (nested_svm_check_permissions(svm)) | 1925 | if (nested_svm_check_permissions(svm)) |
1885 | return 1; | 1926 | return 1; |
1886 | 1927 | ||
@@ -1907,7 +1948,7 @@ failed: | |||
1907 | return 1; | 1948 | return 1; |
1908 | } | 1949 | } |
1909 | 1950 | ||
1910 | static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1951 | static int stgi_interception(struct vcpu_svm *svm) |
1911 | { | 1952 | { |
1912 | if (nested_svm_check_permissions(svm)) | 1953 | if (nested_svm_check_permissions(svm)) |
1913 | return 1; | 1954 | return 1; |
@@ -1920,7 +1961,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1920 | return 1; | 1961 | return 1; |
1921 | } | 1962 | } |
1922 | 1963 | ||
1923 | static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1964 | static int clgi_interception(struct vcpu_svm *svm) |
1924 | { | 1965 | { |
1925 | if (nested_svm_check_permissions(svm)) | 1966 | if (nested_svm_check_permissions(svm)) |
1926 | return 1; | 1967 | return 1; |
@@ -1937,10 +1978,12 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1937 | return 1; | 1978 | return 1; |
1938 | } | 1979 | } |
1939 | 1980 | ||
1940 | static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1981 | static int invlpga_interception(struct vcpu_svm *svm) |
1941 | { | 1982 | { |
1942 | struct kvm_vcpu *vcpu = &svm->vcpu; | 1983 | struct kvm_vcpu *vcpu = &svm->vcpu; |
1943 | nsvm_printk("INVLPGA\n"); | 1984 | |
1985 | trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], | ||
1986 | vcpu->arch.regs[VCPU_REGS_RAX]); | ||
1944 | 1987 | ||
1945 | /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ | 1988 | /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ |
1946 | kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); | 1989 | kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); |
@@ -1950,15 +1993,21 @@ static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1950 | return 1; | 1993 | return 1; |
1951 | } | 1994 | } |
1952 | 1995 | ||
1953 | static int invalid_op_interception(struct vcpu_svm *svm, | 1996 | static int skinit_interception(struct vcpu_svm *svm) |
1954 | struct kvm_run *kvm_run) | 1997 | { |
1998 | trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]); | ||
1999 | |||
2000 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | ||
2001 | return 1; | ||
2002 | } | ||
2003 | |||
2004 | static int invalid_op_interception(struct vcpu_svm *svm) | ||
1955 | { | 2005 | { |
1956 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 2006 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
1957 | return 1; | 2007 | return 1; |
1958 | } | 2008 | } |
1959 | 2009 | ||
1960 | static int task_switch_interception(struct vcpu_svm *svm, | 2010 | static int task_switch_interception(struct vcpu_svm *svm) |
1961 | struct kvm_run *kvm_run) | ||
1962 | { | 2011 | { |
1963 | u16 tss_selector; | 2012 | u16 tss_selector; |
1964 | int reason; | 2013 | int reason; |
@@ -2008,41 +2057,42 @@ static int task_switch_interception(struct vcpu_svm *svm, | |||
2008 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); | 2057 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); |
2009 | } | 2058 | } |
2010 | 2059 | ||
2011 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2060 | static int cpuid_interception(struct vcpu_svm *svm) |
2012 | { | 2061 | { |
2013 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; | 2062 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
2014 | kvm_emulate_cpuid(&svm->vcpu); | 2063 | kvm_emulate_cpuid(&svm->vcpu); |
2015 | return 1; | 2064 | return 1; |
2016 | } | 2065 | } |
2017 | 2066 | ||
2018 | static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2067 | static int iret_interception(struct vcpu_svm *svm) |
2019 | { | 2068 | { |
2020 | ++svm->vcpu.stat.nmi_window_exits; | 2069 | ++svm->vcpu.stat.nmi_window_exits; |
2021 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | 2070 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); |
2022 | svm->vcpu.arch.hflags |= HF_IRET_MASK; | 2071 | svm->vcpu.arch.hflags |= HF_IRET_MASK; |
2023 | return 1; | 2072 | return 1; |
2024 | } | 2073 | } |
2025 | 2074 | ||
2026 | static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2075 | static int invlpg_interception(struct vcpu_svm *svm) |
2027 | { | 2076 | { |
2028 | if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) | 2077 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) |
2029 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | 2078 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); |
2030 | return 1; | 2079 | return 1; |
2031 | } | 2080 | } |
2032 | 2081 | ||
2033 | static int emulate_on_interception(struct vcpu_svm *svm, | 2082 | static int emulate_on_interception(struct vcpu_svm *svm) |
2034 | struct kvm_run *kvm_run) | ||
2035 | { | 2083 | { |
2036 | if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) | 2084 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) |
2037 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | 2085 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); |
2038 | return 1; | 2086 | return 1; |
2039 | } | 2087 | } |
2040 | 2088 | ||
2041 | static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2089 | static int cr8_write_interception(struct vcpu_svm *svm) |
2042 | { | 2090 | { |
2091 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
2092 | |||
2043 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); | 2093 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); |
2044 | /* instruction emulation calls kvm_set_cr8() */ | 2094 | /* instruction emulation calls kvm_set_cr8() */ |
2045 | emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); | 2095 | emulate_instruction(&svm->vcpu, 0, 0, 0); |
2046 | if (irqchip_in_kernel(svm->vcpu.kvm)) { | 2096 | if (irqchip_in_kernel(svm->vcpu.kvm)) { |
2047 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | 2097 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; |
2048 | return 1; | 2098 | return 1; |
@@ -2128,14 +2178,15 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2128 | return 0; | 2178 | return 0; |
2129 | } | 2179 | } |
2130 | 2180 | ||
2131 | static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2181 | static int rdmsr_interception(struct vcpu_svm *svm) |
2132 | { | 2182 | { |
2133 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 2183 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
2134 | u64 data; | 2184 | u64 data; |
2135 | 2185 | ||
2136 | if (svm_get_msr(&svm->vcpu, ecx, &data)) | 2186 | if (svm_get_msr(&svm->vcpu, ecx, &data)) { |
2187 | trace_kvm_msr_read_ex(ecx); | ||
2137 | kvm_inject_gp(&svm->vcpu, 0); | 2188 | kvm_inject_gp(&svm->vcpu, 0); |
2138 | else { | 2189 | } else { |
2139 | trace_kvm_msr_read(ecx, data); | 2190 | trace_kvm_msr_read(ecx, data); |
2140 | 2191 | ||
2141 | svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; | 2192 | svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; |
@@ -2221,33 +2272,36 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2221 | return 0; | 2272 | return 0; |
2222 | } | 2273 | } |
2223 | 2274 | ||
2224 | static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2275 | static int wrmsr_interception(struct vcpu_svm *svm) |
2225 | { | 2276 | { |
2226 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 2277 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
2227 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) | 2278 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) |
2228 | | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); | 2279 | | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); |
2229 | 2280 | ||
2230 | trace_kvm_msr_write(ecx, data); | ||
2231 | 2281 | ||
2232 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; | 2282 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
2233 | if (svm_set_msr(&svm->vcpu, ecx, data)) | 2283 | if (svm_set_msr(&svm->vcpu, ecx, data)) { |
2284 | trace_kvm_msr_write_ex(ecx, data); | ||
2234 | kvm_inject_gp(&svm->vcpu, 0); | 2285 | kvm_inject_gp(&svm->vcpu, 0); |
2235 | else | 2286 | } else { |
2287 | trace_kvm_msr_write(ecx, data); | ||
2236 | skip_emulated_instruction(&svm->vcpu); | 2288 | skip_emulated_instruction(&svm->vcpu); |
2289 | } | ||
2237 | return 1; | 2290 | return 1; |
2238 | } | 2291 | } |
2239 | 2292 | ||
2240 | static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2293 | static int msr_interception(struct vcpu_svm *svm) |
2241 | { | 2294 | { |
2242 | if (svm->vmcb->control.exit_info_1) | 2295 | if (svm->vmcb->control.exit_info_1) |
2243 | return wrmsr_interception(svm, kvm_run); | 2296 | return wrmsr_interception(svm); |
2244 | else | 2297 | else |
2245 | return rdmsr_interception(svm, kvm_run); | 2298 | return rdmsr_interception(svm); |
2246 | } | 2299 | } |
2247 | 2300 | ||
2248 | static int interrupt_window_interception(struct vcpu_svm *svm, | 2301 | static int interrupt_window_interception(struct vcpu_svm *svm) |
2249 | struct kvm_run *kvm_run) | ||
2250 | { | 2302 | { |
2303 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
2304 | |||
2251 | svm_clear_vintr(svm); | 2305 | svm_clear_vintr(svm); |
2252 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | 2306 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; |
2253 | /* | 2307 | /* |
@@ -2265,13 +2319,18 @@ static int interrupt_window_interception(struct vcpu_svm *svm, | |||
2265 | return 1; | 2319 | return 1; |
2266 | } | 2320 | } |
2267 | 2321 | ||
2268 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | 2322 | static int pause_interception(struct vcpu_svm *svm) |
2269 | struct kvm_run *kvm_run) = { | 2323 | { |
2324 | kvm_vcpu_on_spin(&(svm->vcpu)); | ||
2325 | return 1; | ||
2326 | } | ||
2327 | |||
2328 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | ||
2270 | [SVM_EXIT_READ_CR0] = emulate_on_interception, | 2329 | [SVM_EXIT_READ_CR0] = emulate_on_interception, |
2271 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 2330 | [SVM_EXIT_READ_CR3] = emulate_on_interception, |
2272 | [SVM_EXIT_READ_CR4] = emulate_on_interception, | 2331 | [SVM_EXIT_READ_CR4] = emulate_on_interception, |
2273 | [SVM_EXIT_READ_CR8] = emulate_on_interception, | 2332 | [SVM_EXIT_READ_CR8] = emulate_on_interception, |
2274 | /* for now: */ | 2333 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, |
2275 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, | 2334 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, |
2276 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, | 2335 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, |
2277 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, | 2336 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, |
@@ -2280,11 +2339,17 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
2280 | [SVM_EXIT_READ_DR1] = emulate_on_interception, | 2339 | [SVM_EXIT_READ_DR1] = emulate_on_interception, |
2281 | [SVM_EXIT_READ_DR2] = emulate_on_interception, | 2340 | [SVM_EXIT_READ_DR2] = emulate_on_interception, |
2282 | [SVM_EXIT_READ_DR3] = emulate_on_interception, | 2341 | [SVM_EXIT_READ_DR3] = emulate_on_interception, |
2342 | [SVM_EXIT_READ_DR4] = emulate_on_interception, | ||
2343 | [SVM_EXIT_READ_DR5] = emulate_on_interception, | ||
2344 | [SVM_EXIT_READ_DR6] = emulate_on_interception, | ||
2345 | [SVM_EXIT_READ_DR7] = emulate_on_interception, | ||
2283 | [SVM_EXIT_WRITE_DR0] = emulate_on_interception, | 2346 | [SVM_EXIT_WRITE_DR0] = emulate_on_interception, |
2284 | [SVM_EXIT_WRITE_DR1] = emulate_on_interception, | 2347 | [SVM_EXIT_WRITE_DR1] = emulate_on_interception, |
2285 | [SVM_EXIT_WRITE_DR2] = emulate_on_interception, | 2348 | [SVM_EXIT_WRITE_DR2] = emulate_on_interception, |
2286 | [SVM_EXIT_WRITE_DR3] = emulate_on_interception, | 2349 | [SVM_EXIT_WRITE_DR3] = emulate_on_interception, |
2350 | [SVM_EXIT_WRITE_DR4] = emulate_on_interception, | ||
2287 | [SVM_EXIT_WRITE_DR5] = emulate_on_interception, | 2351 | [SVM_EXIT_WRITE_DR5] = emulate_on_interception, |
2352 | [SVM_EXIT_WRITE_DR6] = emulate_on_interception, | ||
2288 | [SVM_EXIT_WRITE_DR7] = emulate_on_interception, | 2353 | [SVM_EXIT_WRITE_DR7] = emulate_on_interception, |
2289 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, | 2354 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, |
2290 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, | 2355 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, |
@@ -2301,6 +2366,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
2301 | [SVM_EXIT_CPUID] = cpuid_interception, | 2366 | [SVM_EXIT_CPUID] = cpuid_interception, |
2302 | [SVM_EXIT_IRET] = iret_interception, | 2367 | [SVM_EXIT_IRET] = iret_interception, |
2303 | [SVM_EXIT_INVD] = emulate_on_interception, | 2368 | [SVM_EXIT_INVD] = emulate_on_interception, |
2369 | [SVM_EXIT_PAUSE] = pause_interception, | ||
2304 | [SVM_EXIT_HLT] = halt_interception, | 2370 | [SVM_EXIT_HLT] = halt_interception, |
2305 | [SVM_EXIT_INVLPG] = invlpg_interception, | 2371 | [SVM_EXIT_INVLPG] = invlpg_interception, |
2306 | [SVM_EXIT_INVLPGA] = invlpga_interception, | 2372 | [SVM_EXIT_INVLPGA] = invlpga_interception, |
@@ -2314,26 +2380,36 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
2314 | [SVM_EXIT_VMSAVE] = vmsave_interception, | 2380 | [SVM_EXIT_VMSAVE] = vmsave_interception, |
2315 | [SVM_EXIT_STGI] = stgi_interception, | 2381 | [SVM_EXIT_STGI] = stgi_interception, |
2316 | [SVM_EXIT_CLGI] = clgi_interception, | 2382 | [SVM_EXIT_CLGI] = clgi_interception, |
2317 | [SVM_EXIT_SKINIT] = invalid_op_interception, | 2383 | [SVM_EXIT_SKINIT] = skinit_interception, |
2318 | [SVM_EXIT_WBINVD] = emulate_on_interception, | 2384 | [SVM_EXIT_WBINVD] = emulate_on_interception, |
2319 | [SVM_EXIT_MONITOR] = invalid_op_interception, | 2385 | [SVM_EXIT_MONITOR] = invalid_op_interception, |
2320 | [SVM_EXIT_MWAIT] = invalid_op_interception, | 2386 | [SVM_EXIT_MWAIT] = invalid_op_interception, |
2321 | [SVM_EXIT_NPF] = pf_interception, | 2387 | [SVM_EXIT_NPF] = pf_interception, |
2322 | }; | 2388 | }; |
2323 | 2389 | ||
2324 | static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 2390 | static int handle_exit(struct kvm_vcpu *vcpu) |
2325 | { | 2391 | { |
2326 | struct vcpu_svm *svm = to_svm(vcpu); | 2392 | struct vcpu_svm *svm = to_svm(vcpu); |
2393 | struct kvm_run *kvm_run = vcpu->run; | ||
2327 | u32 exit_code = svm->vmcb->control.exit_code; | 2394 | u32 exit_code = svm->vmcb->control.exit_code; |
2328 | 2395 | ||
2329 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); | 2396 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); |
2330 | 2397 | ||
2398 | if (unlikely(svm->nested.exit_required)) { | ||
2399 | nested_svm_vmexit(svm); | ||
2400 | svm->nested.exit_required = false; | ||
2401 | |||
2402 | return 1; | ||
2403 | } | ||
2404 | |||
2331 | if (is_nested(svm)) { | 2405 | if (is_nested(svm)) { |
2332 | int vmexit; | 2406 | int vmexit; |
2333 | 2407 | ||
2334 | nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", | 2408 | trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, |
2335 | exit_code, svm->vmcb->control.exit_info_1, | 2409 | svm->vmcb->control.exit_info_1, |
2336 | svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); | 2410 | svm->vmcb->control.exit_info_2, |
2411 | svm->vmcb->control.exit_int_info, | ||
2412 | svm->vmcb->control.exit_int_info_err); | ||
2337 | 2413 | ||
2338 | vmexit = nested_svm_exit_special(svm); | 2414 | vmexit = nested_svm_exit_special(svm); |
2339 | 2415 | ||
@@ -2346,20 +2422,10 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2346 | 2422 | ||
2347 | svm_complete_interrupts(svm); | 2423 | svm_complete_interrupts(svm); |
2348 | 2424 | ||
2349 | if (npt_enabled) { | 2425 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) |
2350 | int mmu_reload = 0; | ||
2351 | if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { | ||
2352 | svm_set_cr0(vcpu, svm->vmcb->save.cr0); | ||
2353 | mmu_reload = 1; | ||
2354 | } | ||
2355 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | 2426 | vcpu->arch.cr0 = svm->vmcb->save.cr0; |
2427 | if (npt_enabled) | ||
2356 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | 2428 | vcpu->arch.cr3 = svm->vmcb->save.cr3; |
2357 | if (mmu_reload) { | ||
2358 | kvm_mmu_reset_context(vcpu); | ||
2359 | kvm_mmu_load(vcpu); | ||
2360 | } | ||
2361 | } | ||
2362 | |||
2363 | 2429 | ||
2364 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { | 2430 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { |
2365 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2431 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
@@ -2383,15 +2449,15 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2383 | return 0; | 2449 | return 0; |
2384 | } | 2450 | } |
2385 | 2451 | ||
2386 | return svm_exit_handlers[exit_code](svm, kvm_run); | 2452 | return svm_exit_handlers[exit_code](svm); |
2387 | } | 2453 | } |
2388 | 2454 | ||
2389 | static void reload_tss(struct kvm_vcpu *vcpu) | 2455 | static void reload_tss(struct kvm_vcpu *vcpu) |
2390 | { | 2456 | { |
2391 | int cpu = raw_smp_processor_id(); | 2457 | int cpu = raw_smp_processor_id(); |
2392 | 2458 | ||
2393 | struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); | 2459 | struct svm_cpu_data *sd = per_cpu(svm_data, cpu); |
2394 | svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */ | 2460 | sd->tss_desc->type = 9; /* available 32/64-bit TSS */ |
2395 | load_TR_desc(); | 2461 | load_TR_desc(); |
2396 | } | 2462 | } |
2397 | 2463 | ||
@@ -2399,12 +2465,12 @@ static void pre_svm_run(struct vcpu_svm *svm) | |||
2399 | { | 2465 | { |
2400 | int cpu = raw_smp_processor_id(); | 2466 | int cpu = raw_smp_processor_id(); |
2401 | 2467 | ||
2402 | struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); | 2468 | struct svm_cpu_data *sd = per_cpu(svm_data, cpu); |
2403 | 2469 | ||
2404 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; | 2470 | svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; |
2405 | /* FIXME: handle wraparound of asid_generation */ | 2471 | /* FIXME: handle wraparound of asid_generation */ |
2406 | if (svm->asid_generation != svm_data->asid_generation) | 2472 | if (svm->asid_generation != sd->asid_generation) |
2407 | new_asid(svm, svm_data); | 2473 | new_asid(svm, sd); |
2408 | } | 2474 | } |
2409 | 2475 | ||
2410 | static void svm_inject_nmi(struct kvm_vcpu *vcpu) | 2476 | static void svm_inject_nmi(struct kvm_vcpu *vcpu) |
@@ -2413,7 +2479,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) | |||
2413 | 2479 | ||
2414 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; | 2480 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; |
2415 | vcpu->arch.hflags |= HF_NMI_MASK; | 2481 | vcpu->arch.hflags |= HF_NMI_MASK; |
2416 | svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); | 2482 | svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); |
2417 | ++vcpu->stat.nmi_injections; | 2483 | ++vcpu->stat.nmi_injections; |
2418 | } | 2484 | } |
2419 | 2485 | ||
@@ -2460,20 +2526,47 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2460 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | 2526 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); |
2461 | } | 2527 | } |
2462 | 2528 | ||
2529 | static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) | ||
2530 | { | ||
2531 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2532 | |||
2533 | return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); | ||
2534 | } | ||
2535 | |||
2536 | static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | ||
2537 | { | ||
2538 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2539 | |||
2540 | if (masked) { | ||
2541 | svm->vcpu.arch.hflags |= HF_NMI_MASK; | ||
2542 | svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); | ||
2543 | } else { | ||
2544 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; | ||
2545 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); | ||
2546 | } | ||
2547 | } | ||
2548 | |||
2463 | static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) | 2549 | static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) |
2464 | { | 2550 | { |
2465 | struct vcpu_svm *svm = to_svm(vcpu); | 2551 | struct vcpu_svm *svm = to_svm(vcpu); |
2466 | struct vmcb *vmcb = svm->vmcb; | 2552 | struct vmcb *vmcb = svm->vmcb; |
2467 | return (vmcb->save.rflags & X86_EFLAGS_IF) && | 2553 | int ret; |
2468 | !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && | 2554 | |
2469 | gif_set(svm) && | 2555 | if (!gif_set(svm) || |
2470 | !(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK)); | 2556 | (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) |
2557 | return 0; | ||
2558 | |||
2559 | ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); | ||
2560 | |||
2561 | if (is_nested(svm)) | ||
2562 | return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); | ||
2563 | |||
2564 | return ret; | ||
2471 | } | 2565 | } |
2472 | 2566 | ||
2473 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 2567 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
2474 | { | 2568 | { |
2475 | struct vcpu_svm *svm = to_svm(vcpu); | 2569 | struct vcpu_svm *svm = to_svm(vcpu); |
2476 | nsvm_printk("Trying to open IRQ window\n"); | ||
2477 | 2570 | ||
2478 | nested_svm_intr(svm); | 2571 | nested_svm_intr(svm); |
2479 | 2572 | ||
@@ -2498,7 +2591,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
2498 | /* Something prevents NMI from been injected. Single step over | 2591 | /* Something prevents NMI from been injected. Single step over |
2499 | possible problem (IRET or exception injection or interrupt | 2592 | possible problem (IRET or exception injection or interrupt |
2500 | shadow) */ | 2593 | shadow) */ |
2501 | vcpu->arch.singlestep = true; | 2594 | svm->nmi_singlestep = true; |
2502 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 2595 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
2503 | update_db_intercept(vcpu); | 2596 | update_db_intercept(vcpu); |
2504 | } | 2597 | } |
@@ -2588,13 +2681,20 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
2588 | #define R "e" | 2681 | #define R "e" |
2589 | #endif | 2682 | #endif |
2590 | 2683 | ||
2591 | static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2684 | static void svm_vcpu_run(struct kvm_vcpu *vcpu) |
2592 | { | 2685 | { |
2593 | struct vcpu_svm *svm = to_svm(vcpu); | 2686 | struct vcpu_svm *svm = to_svm(vcpu); |
2594 | u16 fs_selector; | 2687 | u16 fs_selector; |
2595 | u16 gs_selector; | 2688 | u16 gs_selector; |
2596 | u16 ldt_selector; | 2689 | u16 ldt_selector; |
2597 | 2690 | ||
2691 | /* | ||
2692 | * A vmexit emulation is required before the vcpu can be executed | ||
2693 | * again. | ||
2694 | */ | ||
2695 | if (unlikely(svm->nested.exit_required)) | ||
2696 | return; | ||
2697 | |||
2598 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | 2698 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; |
2599 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | 2699 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; |
2600 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | 2700 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; |
@@ -2727,12 +2827,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) | |||
2727 | 2827 | ||
2728 | svm->vmcb->save.cr3 = root; | 2828 | svm->vmcb->save.cr3 = root; |
2729 | force_new_asid(vcpu); | 2829 | force_new_asid(vcpu); |
2730 | |||
2731 | if (vcpu->fpu_active) { | ||
2732 | svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); | ||
2733 | svm->vmcb->save.cr0 |= X86_CR0_TS; | ||
2734 | vcpu->fpu_active = 0; | ||
2735 | } | ||
2736 | } | 2830 | } |
2737 | 2831 | ||
2738 | static int is_disabled(void) | 2832 | static int is_disabled(void) |
@@ -2781,6 +2875,10 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
2781 | return 0; | 2875 | return 0; |
2782 | } | 2876 | } |
2783 | 2877 | ||
2878 | static void svm_cpuid_update(struct kvm_vcpu *vcpu) | ||
2879 | { | ||
2880 | } | ||
2881 | |||
2784 | static const struct trace_print_flags svm_exit_reasons_str[] = { | 2882 | static const struct trace_print_flags svm_exit_reasons_str[] = { |
2785 | { SVM_EXIT_READ_CR0, "read_cr0" }, | 2883 | { SVM_EXIT_READ_CR0, "read_cr0" }, |
2786 | { SVM_EXIT_READ_CR3, "read_cr3" }, | 2884 | { SVM_EXIT_READ_CR3, "read_cr3" }, |
@@ -2834,9 +2932,22 @@ static const struct trace_print_flags svm_exit_reasons_str[] = { | |||
2834 | { -1, NULL } | 2932 | { -1, NULL } |
2835 | }; | 2933 | }; |
2836 | 2934 | ||
2837 | static bool svm_gb_page_enable(void) | 2935 | static int svm_get_lpage_level(void) |
2838 | { | 2936 | { |
2839 | return true; | 2937 | return PT_PDPE_LEVEL; |
2938 | } | ||
2939 | |||
2940 | static bool svm_rdtscp_supported(void) | ||
2941 | { | ||
2942 | return false; | ||
2943 | } | ||
2944 | |||
2945 | static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | ||
2946 | { | ||
2947 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2948 | |||
2949 | update_cr0_intercept(svm); | ||
2950 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; | ||
2840 | } | 2951 | } |
2841 | 2952 | ||
2842 | static struct kvm_x86_ops svm_x86_ops = { | 2953 | static struct kvm_x86_ops svm_x86_ops = { |
@@ -2865,6 +2976,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2865 | .set_segment = svm_set_segment, | 2976 | .set_segment = svm_set_segment, |
2866 | .get_cpl = svm_get_cpl, | 2977 | .get_cpl = svm_get_cpl, |
2867 | .get_cs_db_l_bits = kvm_get_cs_db_l_bits, | 2978 | .get_cs_db_l_bits = kvm_get_cs_db_l_bits, |
2979 | .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, | ||
2868 | .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, | 2980 | .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, |
2869 | .set_cr0 = svm_set_cr0, | 2981 | .set_cr0 = svm_set_cr0, |
2870 | .set_cr3 = svm_set_cr3, | 2982 | .set_cr3 = svm_set_cr3, |
@@ -2879,6 +2991,8 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2879 | .cache_reg = svm_cache_reg, | 2991 | .cache_reg = svm_cache_reg, |
2880 | .get_rflags = svm_get_rflags, | 2992 | .get_rflags = svm_get_rflags, |
2881 | .set_rflags = svm_set_rflags, | 2993 | .set_rflags = svm_set_rflags, |
2994 | .fpu_activate = svm_fpu_activate, | ||
2995 | .fpu_deactivate = svm_fpu_deactivate, | ||
2882 | 2996 | ||
2883 | .tlb_flush = svm_flush_tlb, | 2997 | .tlb_flush = svm_flush_tlb, |
2884 | 2998 | ||
@@ -2893,6 +3007,8 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2893 | .queue_exception = svm_queue_exception, | 3007 | .queue_exception = svm_queue_exception, |
2894 | .interrupt_allowed = svm_interrupt_allowed, | 3008 | .interrupt_allowed = svm_interrupt_allowed, |
2895 | .nmi_allowed = svm_nmi_allowed, | 3009 | .nmi_allowed = svm_nmi_allowed, |
3010 | .get_nmi_mask = svm_get_nmi_mask, | ||
3011 | .set_nmi_mask = svm_set_nmi_mask, | ||
2896 | .enable_nmi_window = enable_nmi_window, | 3012 | .enable_nmi_window = enable_nmi_window, |
2897 | .enable_irq_window = enable_irq_window, | 3013 | .enable_irq_window = enable_irq_window, |
2898 | .update_cr8_intercept = update_cr8_intercept, | 3014 | .update_cr8_intercept = update_cr8_intercept, |
@@ -2902,7 +3018,11 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2902 | .get_mt_mask = svm_get_mt_mask, | 3018 | .get_mt_mask = svm_get_mt_mask, |
2903 | 3019 | ||
2904 | .exit_reasons_str = svm_exit_reasons_str, | 3020 | .exit_reasons_str = svm_exit_reasons_str, |
2905 | .gb_page_enable = svm_gb_page_enable, | 3021 | .get_lpage_level = svm_get_lpage_level, |
3022 | |||
3023 | .cpuid_update = svm_cpuid_update, | ||
3024 | |||
3025 | .rdtscp_supported = svm_rdtscp_supported, | ||
2906 | }; | 3026 | }; |
2907 | 3027 | ||
2908 | static int __init svm_init(void) | 3028 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 0d480e77eacf..6ad30a29f044 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -56,6 +56,38 @@ TRACE_EVENT(kvm_hypercall, | |||
56 | ); | 56 | ); |
57 | 57 | ||
58 | /* | 58 | /* |
59 | * Tracepoint for hypercall. | ||
60 | */ | ||
61 | TRACE_EVENT(kvm_hv_hypercall, | ||
62 | TP_PROTO(__u16 code, bool fast, __u16 rep_cnt, __u16 rep_idx, | ||
63 | __u64 ingpa, __u64 outgpa), | ||
64 | TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa), | ||
65 | |||
66 | TP_STRUCT__entry( | ||
67 | __field( __u16, code ) | ||
68 | __field( bool, fast ) | ||
69 | __field( __u16, rep_cnt ) | ||
70 | __field( __u16, rep_idx ) | ||
71 | __field( __u64, ingpa ) | ||
72 | __field( __u64, outgpa ) | ||
73 | ), | ||
74 | |||
75 | TP_fast_assign( | ||
76 | __entry->code = code; | ||
77 | __entry->fast = fast; | ||
78 | __entry->rep_cnt = rep_cnt; | ||
79 | __entry->rep_idx = rep_idx; | ||
80 | __entry->ingpa = ingpa; | ||
81 | __entry->outgpa = outgpa; | ||
82 | ), | ||
83 | |||
84 | TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx", | ||
85 | __entry->code, __entry->fast ? "fast" : "slow", | ||
86 | __entry->rep_cnt, __entry->rep_idx, __entry->ingpa, | ||
87 | __entry->outgpa) | ||
88 | ); | ||
89 | |||
90 | /* | ||
59 | * Tracepoint for PIO. | 91 | * Tracepoint for PIO. |
60 | */ | 92 | */ |
61 | TRACE_EVENT(kvm_pio, | 93 | TRACE_EVENT(kvm_pio, |
@@ -214,28 +246,33 @@ TRACE_EVENT(kvm_page_fault, | |||
214 | * Tracepoint for guest MSR access. | 246 | * Tracepoint for guest MSR access. |
215 | */ | 247 | */ |
216 | TRACE_EVENT(kvm_msr, | 248 | TRACE_EVENT(kvm_msr, |
217 | TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data), | 249 | TP_PROTO(unsigned write, u32 ecx, u64 data, bool exception), |
218 | TP_ARGS(rw, ecx, data), | 250 | TP_ARGS(write, ecx, data, exception), |
219 | 251 | ||
220 | TP_STRUCT__entry( | 252 | TP_STRUCT__entry( |
221 | __field( unsigned int, rw ) | 253 | __field( unsigned, write ) |
222 | __field( unsigned int, ecx ) | 254 | __field( u32, ecx ) |
223 | __field( unsigned long, data ) | 255 | __field( u64, data ) |
256 | __field( u8, exception ) | ||
224 | ), | 257 | ), |
225 | 258 | ||
226 | TP_fast_assign( | 259 | TP_fast_assign( |
227 | __entry->rw = rw; | 260 | __entry->write = write; |
228 | __entry->ecx = ecx; | 261 | __entry->ecx = ecx; |
229 | __entry->data = data; | 262 | __entry->data = data; |
263 | __entry->exception = exception; | ||
230 | ), | 264 | ), |
231 | 265 | ||
232 | TP_printk("msr_%s %x = 0x%lx", | 266 | TP_printk("msr_%s %x = 0x%llx%s", |
233 | __entry->rw ? "write" : "read", | 267 | __entry->write ? "write" : "read", |
234 | __entry->ecx, __entry->data) | 268 | __entry->ecx, __entry->data, |
269 | __entry->exception ? " (#GP)" : "") | ||
235 | ); | 270 | ); |
236 | 271 | ||
237 | #define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data) | 272 | #define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data, false) |
238 | #define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data) | 273 | #define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data, false) |
274 | #define trace_kvm_msr_read_ex(ecx) trace_kvm_msr(0, ecx, 0, true) | ||
275 | #define trace_kvm_msr_write_ex(ecx, data) trace_kvm_msr(1, ecx, data, true) | ||
239 | 276 | ||
240 | /* | 277 | /* |
241 | * Tracepoint for guest CR access. | 278 | * Tracepoint for guest CR access. |
@@ -349,6 +386,171 @@ TRACE_EVENT(kvm_apic_accept_irq, | |||
349 | __entry->coalesced ? " (coalesced)" : "") | 386 | __entry->coalesced ? " (coalesced)" : "") |
350 | ); | 387 | ); |
351 | 388 | ||
389 | /* | ||
390 | * Tracepoint for nested VMRUN | ||
391 | */ | ||
392 | TRACE_EVENT(kvm_nested_vmrun, | ||
393 | TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl, | ||
394 | __u32 event_inj, bool npt), | ||
395 | TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt), | ||
396 | |||
397 | TP_STRUCT__entry( | ||
398 | __field( __u64, rip ) | ||
399 | __field( __u64, vmcb ) | ||
400 | __field( __u64, nested_rip ) | ||
401 | __field( __u32, int_ctl ) | ||
402 | __field( __u32, event_inj ) | ||
403 | __field( bool, npt ) | ||
404 | ), | ||
405 | |||
406 | TP_fast_assign( | ||
407 | __entry->rip = rip; | ||
408 | __entry->vmcb = vmcb; | ||
409 | __entry->nested_rip = nested_rip; | ||
410 | __entry->int_ctl = int_ctl; | ||
411 | __entry->event_inj = event_inj; | ||
412 | __entry->npt = npt; | ||
413 | ), | ||
414 | |||
415 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " | ||
416 | "event_inj: 0x%08x npt: %s\n", | ||
417 | __entry->rip, __entry->vmcb, __entry->nested_rip, | ||
418 | __entry->int_ctl, __entry->event_inj, | ||
419 | __entry->npt ? "on" : "off") | ||
420 | ); | ||
421 | |||
422 | /* | ||
423 | * Tracepoint for #VMEXIT while nested | ||
424 | */ | ||
425 | TRACE_EVENT(kvm_nested_vmexit, | ||
426 | TP_PROTO(__u64 rip, __u32 exit_code, | ||
427 | __u64 exit_info1, __u64 exit_info2, | ||
428 | __u32 exit_int_info, __u32 exit_int_info_err), | ||
429 | TP_ARGS(rip, exit_code, exit_info1, exit_info2, | ||
430 | exit_int_info, exit_int_info_err), | ||
431 | |||
432 | TP_STRUCT__entry( | ||
433 | __field( __u64, rip ) | ||
434 | __field( __u32, exit_code ) | ||
435 | __field( __u64, exit_info1 ) | ||
436 | __field( __u64, exit_info2 ) | ||
437 | __field( __u32, exit_int_info ) | ||
438 | __field( __u32, exit_int_info_err ) | ||
439 | ), | ||
440 | |||
441 | TP_fast_assign( | ||
442 | __entry->rip = rip; | ||
443 | __entry->exit_code = exit_code; | ||
444 | __entry->exit_info1 = exit_info1; | ||
445 | __entry->exit_info2 = exit_info2; | ||
446 | __entry->exit_int_info = exit_int_info; | ||
447 | __entry->exit_int_info_err = exit_int_info_err; | ||
448 | ), | ||
449 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " | ||
450 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | ||
451 | __entry->rip, | ||
452 | ftrace_print_symbols_seq(p, __entry->exit_code, | ||
453 | kvm_x86_ops->exit_reasons_str), | ||
454 | __entry->exit_info1, __entry->exit_info2, | ||
455 | __entry->exit_int_info, __entry->exit_int_info_err) | ||
456 | ); | ||
457 | |||
458 | /* | ||
459 | * Tracepoint for #VMEXIT reinjected to the guest | ||
460 | */ | ||
461 | TRACE_EVENT(kvm_nested_vmexit_inject, | ||
462 | TP_PROTO(__u32 exit_code, | ||
463 | __u64 exit_info1, __u64 exit_info2, | ||
464 | __u32 exit_int_info, __u32 exit_int_info_err), | ||
465 | TP_ARGS(exit_code, exit_info1, exit_info2, | ||
466 | exit_int_info, exit_int_info_err), | ||
467 | |||
468 | TP_STRUCT__entry( | ||
469 | __field( __u32, exit_code ) | ||
470 | __field( __u64, exit_info1 ) | ||
471 | __field( __u64, exit_info2 ) | ||
472 | __field( __u32, exit_int_info ) | ||
473 | __field( __u32, exit_int_info_err ) | ||
474 | ), | ||
475 | |||
476 | TP_fast_assign( | ||
477 | __entry->exit_code = exit_code; | ||
478 | __entry->exit_info1 = exit_info1; | ||
479 | __entry->exit_info2 = exit_info2; | ||
480 | __entry->exit_int_info = exit_int_info; | ||
481 | __entry->exit_int_info_err = exit_int_info_err; | ||
482 | ), | ||
483 | |||
484 | TP_printk("reason: %s ext_inf1: 0x%016llx " | ||
485 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | ||
486 | ftrace_print_symbols_seq(p, __entry->exit_code, | ||
487 | kvm_x86_ops->exit_reasons_str), | ||
488 | __entry->exit_info1, __entry->exit_info2, | ||
489 | __entry->exit_int_info, __entry->exit_int_info_err) | ||
490 | ); | ||
491 | |||
492 | /* | ||
493 | * Tracepoint for nested #vmexit because of interrupt pending | ||
494 | */ | ||
495 | TRACE_EVENT(kvm_nested_intr_vmexit, | ||
496 | TP_PROTO(__u64 rip), | ||
497 | TP_ARGS(rip), | ||
498 | |||
499 | TP_STRUCT__entry( | ||
500 | __field( __u64, rip ) | ||
501 | ), | ||
502 | |||
503 | TP_fast_assign( | ||
504 | __entry->rip = rip | ||
505 | ), | ||
506 | |||
507 | TP_printk("rip: 0x%016llx\n", __entry->rip) | ||
508 | ); | ||
509 | |||
510 | /* | ||
511 | * Tracepoint for nested #vmexit because of interrupt pending | ||
512 | */ | ||
513 | TRACE_EVENT(kvm_invlpga, | ||
514 | TP_PROTO(__u64 rip, int asid, u64 address), | ||
515 | TP_ARGS(rip, asid, address), | ||
516 | |||
517 | TP_STRUCT__entry( | ||
518 | __field( __u64, rip ) | ||
519 | __field( int, asid ) | ||
520 | __field( __u64, address ) | ||
521 | ), | ||
522 | |||
523 | TP_fast_assign( | ||
524 | __entry->rip = rip; | ||
525 | __entry->asid = asid; | ||
526 | __entry->address = address; | ||
527 | ), | ||
528 | |||
529 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", | ||
530 | __entry->rip, __entry->asid, __entry->address) | ||
531 | ); | ||
532 | |||
533 | /* | ||
534 | * Tracepoint for nested #vmexit because of interrupt pending | ||
535 | */ | ||
536 | TRACE_EVENT(kvm_skinit, | ||
537 | TP_PROTO(__u64 rip, __u32 slb), | ||
538 | TP_ARGS(rip, slb), | ||
539 | |||
540 | TP_STRUCT__entry( | ||
541 | __field( __u64, rip ) | ||
542 | __field( __u32, slb ) | ||
543 | ), | ||
544 | |||
545 | TP_fast_assign( | ||
546 | __entry->rip = rip; | ||
547 | __entry->slb = slb; | ||
548 | ), | ||
549 | |||
550 | TP_printk("rip: 0x%016llx slb: 0x%08x\n", | ||
551 | __entry->rip, __entry->slb) | ||
552 | ); | ||
553 | |||
352 | #endif /* _TRACE_KVM_H */ | 554 | #endif /* _TRACE_KVM_H */ |
353 | 555 | ||
354 | /* This part must be outside protection */ | 556 | /* This part must be outside protection */ |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ed53b42caba1..2f8db0ec8ae4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/sched.h> | 26 | #include <linux/sched.h> |
27 | #include <linux/moduleparam.h> | 27 | #include <linux/moduleparam.h> |
28 | #include <linux/ftrace_event.h> | 28 | #include <linux/ftrace_event.h> |
29 | #include <linux/slab.h> | ||
29 | #include "kvm_cache_regs.h" | 30 | #include "kvm_cache_regs.h" |
30 | #include "x86.h" | 31 | #include "x86.h" |
31 | 32 | ||
@@ -61,12 +62,54 @@ module_param_named(unrestricted_guest, | |||
61 | static int __read_mostly emulate_invalid_guest_state = 0; | 62 | static int __read_mostly emulate_invalid_guest_state = 0; |
62 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 63 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
63 | 64 | ||
65 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | ||
66 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | ||
67 | #define KVM_GUEST_CR0_MASK \ | ||
68 | (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | ||
69 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \ | ||
70 | (X86_CR0_WP | X86_CR0_NE) | ||
71 | #define KVM_VM_CR0_ALWAYS_ON \ | ||
72 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | ||
73 | #define KVM_CR4_GUEST_OWNED_BITS \ | ||
74 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | ||
75 | | X86_CR4_OSXMMEXCPT) | ||
76 | |||
77 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) | ||
78 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) | ||
79 | |||
80 | #define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) | ||
81 | |||
82 | /* | ||
83 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | ||
84 | * ple_gap: upper bound on the amount of time between two successive | ||
85 | * executions of PAUSE in a loop. Also indicate if ple enabled. | ||
86 | * According to test, this time is usually small than 41 cycles. | ||
87 | * ple_window: upper bound on the amount of time a guest is allowed to execute | ||
88 | * in a PAUSE loop. Tests indicate that most spinlocks are held for | ||
89 | * less than 2^12 cycles | ||
90 | * Time is measured based on a counter that runs at the same rate as the TSC, | ||
91 | * refer SDM volume 3b section 21.6.13 & 22.1.3. | ||
92 | */ | ||
93 | #define KVM_VMX_DEFAULT_PLE_GAP 41 | ||
94 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 | ||
95 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; | ||
96 | module_param(ple_gap, int, S_IRUGO); | ||
97 | |||
98 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | ||
99 | module_param(ple_window, int, S_IRUGO); | ||
100 | |||
64 | struct vmcs { | 101 | struct vmcs { |
65 | u32 revision_id; | 102 | u32 revision_id; |
66 | u32 abort; | 103 | u32 abort; |
67 | char data[0]; | 104 | char data[0]; |
68 | }; | 105 | }; |
69 | 106 | ||
107 | struct shared_msr_entry { | ||
108 | unsigned index; | ||
109 | u64 data; | ||
110 | u64 mask; | ||
111 | }; | ||
112 | |||
70 | struct vcpu_vmx { | 113 | struct vcpu_vmx { |
71 | struct kvm_vcpu vcpu; | 114 | struct kvm_vcpu vcpu; |
72 | struct list_head local_vcpus_link; | 115 | struct list_head local_vcpus_link; |
@@ -74,13 +117,12 @@ struct vcpu_vmx { | |||
74 | int launched; | 117 | int launched; |
75 | u8 fail; | 118 | u8 fail; |
76 | u32 idt_vectoring_info; | 119 | u32 idt_vectoring_info; |
77 | struct kvm_msr_entry *guest_msrs; | 120 | struct shared_msr_entry *guest_msrs; |
78 | struct kvm_msr_entry *host_msrs; | ||
79 | int nmsrs; | 121 | int nmsrs; |
80 | int save_nmsrs; | 122 | int save_nmsrs; |
81 | int msr_offset_efer; | ||
82 | #ifdef CONFIG_X86_64 | 123 | #ifdef CONFIG_X86_64 |
83 | int msr_offset_kernel_gs_base; | 124 | u64 msr_host_kernel_gs_base; |
125 | u64 msr_guest_kernel_gs_base; | ||
84 | #endif | 126 | #endif |
85 | struct vmcs *vmcs; | 127 | struct vmcs *vmcs; |
86 | struct { | 128 | struct { |
@@ -88,11 +130,10 @@ struct vcpu_vmx { | |||
88 | u16 fs_sel, gs_sel, ldt_sel; | 130 | u16 fs_sel, gs_sel, ldt_sel; |
89 | int gs_ldt_reload_needed; | 131 | int gs_ldt_reload_needed; |
90 | int fs_reload_needed; | 132 | int fs_reload_needed; |
91 | int guest_efer_loaded; | ||
92 | } host_state; | 133 | } host_state; |
93 | struct { | 134 | struct { |
94 | int vm86_active; | 135 | int vm86_active; |
95 | u8 save_iopl; | 136 | ulong save_rflags; |
96 | struct kvm_save_segment { | 137 | struct kvm_save_segment { |
97 | u16 selector; | 138 | u16 selector; |
98 | unsigned long base; | 139 | unsigned long base; |
@@ -107,13 +148,14 @@ struct vcpu_vmx { | |||
107 | } rmode; | 148 | } rmode; |
108 | int vpid; | 149 | int vpid; |
109 | bool emulation_required; | 150 | bool emulation_required; |
110 | enum emulation_result invalid_state_emulation_result; | ||
111 | 151 | ||
112 | /* Support for vnmi-less CPUs */ | 152 | /* Support for vnmi-less CPUs */ |
113 | int soft_vnmi_blocked; | 153 | int soft_vnmi_blocked; |
114 | ktime_t entry_time; | 154 | ktime_t entry_time; |
115 | s64 vnmi_blocked_time; | 155 | s64 vnmi_blocked_time; |
116 | u32 exit_reason; | 156 | u32 exit_reason; |
157 | |||
158 | bool rdtscp_enabled; | ||
117 | }; | 159 | }; |
118 | 160 | ||
119 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | 161 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
@@ -176,6 +218,8 @@ static struct kvm_vmx_segment_field { | |||
176 | VMX_SEGMENT_FIELD(LDTR), | 218 | VMX_SEGMENT_FIELD(LDTR), |
177 | }; | 219 | }; |
178 | 220 | ||
221 | static u64 host_efer; | ||
222 | |||
179 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu); | 223 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu); |
180 | 224 | ||
181 | /* | 225 | /* |
@@ -184,28 +228,12 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu); | |||
184 | */ | 228 | */ |
185 | static const u32 vmx_msr_index[] = { | 229 | static const u32 vmx_msr_index[] = { |
186 | #ifdef CONFIG_X86_64 | 230 | #ifdef CONFIG_X86_64 |
187 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE, | 231 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, |
188 | #endif | 232 | #endif |
189 | MSR_EFER, MSR_K6_STAR, | 233 | MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR, |
190 | }; | 234 | }; |
191 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 235 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
192 | 236 | ||
193 | static void load_msrs(struct kvm_msr_entry *e, int n) | ||
194 | { | ||
195 | int i; | ||
196 | |||
197 | for (i = 0; i < n; ++i) | ||
198 | wrmsrl(e[i].index, e[i].data); | ||
199 | } | ||
200 | |||
201 | static void save_msrs(struct kvm_msr_entry *e, int n) | ||
202 | { | ||
203 | int i; | ||
204 | |||
205 | for (i = 0; i < n; ++i) | ||
206 | rdmsrl(e[i].index, e[i].data); | ||
207 | } | ||
208 | |||
209 | static inline int is_page_fault(u32 intr_info) | 237 | static inline int is_page_fault(u32 intr_info) |
210 | { | 238 | { |
211 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 239 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
@@ -293,6 +321,11 @@ static inline bool cpu_has_vmx_ept_2m_page(void) | |||
293 | return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); | 321 | return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); |
294 | } | 322 | } |
295 | 323 | ||
324 | static inline bool cpu_has_vmx_ept_1g_page(void) | ||
325 | { | ||
326 | return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); | ||
327 | } | ||
328 | |||
296 | static inline int cpu_has_vmx_invept_individual_addr(void) | 329 | static inline int cpu_has_vmx_invept_individual_addr(void) |
297 | { | 330 | { |
298 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); | 331 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); |
@@ -320,11 +353,15 @@ static inline int cpu_has_vmx_unrestricted_guest(void) | |||
320 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 353 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
321 | } | 354 | } |
322 | 355 | ||
356 | static inline int cpu_has_vmx_ple(void) | ||
357 | { | ||
358 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
359 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
360 | } | ||
361 | |||
323 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 362 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) |
324 | { | 363 | { |
325 | return flexpriority_enabled && | 364 | return flexpriority_enabled && irqchip_in_kernel(kvm); |
326 | (cpu_has_vmx_virtualize_apic_accesses()) && | ||
327 | (irqchip_in_kernel(kvm)); | ||
328 | } | 365 | } |
329 | 366 | ||
330 | static inline int cpu_has_vmx_vpid(void) | 367 | static inline int cpu_has_vmx_vpid(void) |
@@ -333,6 +370,12 @@ static inline int cpu_has_vmx_vpid(void) | |||
333 | SECONDARY_EXEC_ENABLE_VPID; | 370 | SECONDARY_EXEC_ENABLE_VPID; |
334 | } | 371 | } |
335 | 372 | ||
373 | static inline int cpu_has_vmx_rdtscp(void) | ||
374 | { | ||
375 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
376 | SECONDARY_EXEC_RDTSCP; | ||
377 | } | ||
378 | |||
336 | static inline int cpu_has_virtual_nmis(void) | 379 | static inline int cpu_has_virtual_nmis(void) |
337 | { | 380 | { |
338 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 381 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
@@ -348,7 +391,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) | |||
348 | int i; | 391 | int i; |
349 | 392 | ||
350 | for (i = 0; i < vmx->nmsrs; ++i) | 393 | for (i = 0; i < vmx->nmsrs; ++i) |
351 | if (vmx->guest_msrs[i].index == msr) | 394 | if (vmx_msr_index[vmx->guest_msrs[i].index] == msr) |
352 | return i; | 395 | return i; |
353 | return -1; | 396 | return -1; |
354 | } | 397 | } |
@@ -379,7 +422,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa) | |||
379 | : : "a" (&operand), "c" (ext) : "cc", "memory"); | 422 | : : "a" (&operand), "c" (ext) : "cc", "memory"); |
380 | } | 423 | } |
381 | 424 | ||
382 | static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) | 425 | static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) |
383 | { | 426 | { |
384 | int i; | 427 | int i; |
385 | 428 | ||
@@ -537,22 +580,18 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
537 | { | 580 | { |
538 | u32 eb; | 581 | u32 eb; |
539 | 582 | ||
540 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR); | 583 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | |
541 | if (!vcpu->fpu_active) | 584 | (1u << NM_VECTOR) | (1u << DB_VECTOR); |
542 | eb |= 1u << NM_VECTOR; | 585 | if ((vcpu->guest_debug & |
543 | /* | 586 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == |
544 | * Unconditionally intercept #DB so we can maintain dr6 without | 587 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) |
545 | * reading it every exit. | 588 | eb |= 1u << BP_VECTOR; |
546 | */ | ||
547 | eb |= 1u << DB_VECTOR; | ||
548 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | ||
549 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | ||
550 | eb |= 1u << BP_VECTOR; | ||
551 | } | ||
552 | if (to_vmx(vcpu)->rmode.vm86_active) | 589 | if (to_vmx(vcpu)->rmode.vm86_active) |
553 | eb = ~0; | 590 | eb = ~0; |
554 | if (enable_ept) | 591 | if (enable_ept) |
555 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ | 592 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ |
593 | if (vcpu->fpu_active) | ||
594 | eb &= ~(1u << NM_VECTOR); | ||
556 | vmcs_write32(EXCEPTION_BITMAP, eb); | 595 | vmcs_write32(EXCEPTION_BITMAP, eb); |
557 | } | 596 | } |
558 | 597 | ||
@@ -570,17 +609,12 @@ static void reload_tss(void) | |||
570 | load_TR_desc(); | 609 | load_TR_desc(); |
571 | } | 610 | } |
572 | 611 | ||
573 | static void load_transition_efer(struct vcpu_vmx *vmx) | 612 | static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) |
574 | { | 613 | { |
575 | int efer_offset = vmx->msr_offset_efer; | ||
576 | u64 host_efer; | ||
577 | u64 guest_efer; | 614 | u64 guest_efer; |
578 | u64 ignore_bits; | 615 | u64 ignore_bits; |
579 | 616 | ||
580 | if (efer_offset < 0) | 617 | guest_efer = vmx->vcpu.arch.efer; |
581 | return; | ||
582 | host_efer = vmx->host_msrs[efer_offset].data; | ||
583 | guest_efer = vmx->guest_msrs[efer_offset].data; | ||
584 | 618 | ||
585 | /* | 619 | /* |
586 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless | 620 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless |
@@ -593,27 +627,17 @@ static void load_transition_efer(struct vcpu_vmx *vmx) | |||
593 | if (guest_efer & EFER_LMA) | 627 | if (guest_efer & EFER_LMA) |
594 | ignore_bits &= ~(u64)EFER_SCE; | 628 | ignore_bits &= ~(u64)EFER_SCE; |
595 | #endif | 629 | #endif |
596 | if ((guest_efer & ~ignore_bits) == (host_efer & ~ignore_bits)) | ||
597 | return; | ||
598 | |||
599 | vmx->host_state.guest_efer_loaded = 1; | ||
600 | guest_efer &= ~ignore_bits; | 630 | guest_efer &= ~ignore_bits; |
601 | guest_efer |= host_efer & ignore_bits; | 631 | guest_efer |= host_efer & ignore_bits; |
602 | wrmsrl(MSR_EFER, guest_efer); | 632 | vmx->guest_msrs[efer_offset].data = guest_efer; |
603 | vmx->vcpu.stat.efer_reload++; | 633 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; |
604 | } | 634 | return true; |
605 | |||
606 | static void reload_host_efer(struct vcpu_vmx *vmx) | ||
607 | { | ||
608 | if (vmx->host_state.guest_efer_loaded) { | ||
609 | vmx->host_state.guest_efer_loaded = 0; | ||
610 | load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1); | ||
611 | } | ||
612 | } | 635 | } |
613 | 636 | ||
614 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) | 637 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) |
615 | { | 638 | { |
616 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 639 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
640 | int i; | ||
617 | 641 | ||
618 | if (vmx->host_state.loaded) | 642 | if (vmx->host_state.loaded) |
619 | return; | 643 | return; |
@@ -650,13 +674,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
650 | #endif | 674 | #endif |
651 | 675 | ||
652 | #ifdef CONFIG_X86_64 | 676 | #ifdef CONFIG_X86_64 |
653 | if (is_long_mode(&vmx->vcpu)) | 677 | if (is_long_mode(&vmx->vcpu)) { |
654 | save_msrs(vmx->host_msrs + | 678 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
655 | vmx->msr_offset_kernel_gs_base, 1); | 679 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
656 | 680 | } | |
657 | #endif | 681 | #endif |
658 | load_msrs(vmx->guest_msrs, vmx->save_nmsrs); | 682 | for (i = 0; i < vmx->save_nmsrs; ++i) |
659 | load_transition_efer(vmx); | 683 | kvm_set_shared_msr(vmx->guest_msrs[i].index, |
684 | vmx->guest_msrs[i].data, | ||
685 | vmx->guest_msrs[i].mask); | ||
660 | } | 686 | } |
661 | 687 | ||
662 | static void __vmx_load_host_state(struct vcpu_vmx *vmx) | 688 | static void __vmx_load_host_state(struct vcpu_vmx *vmx) |
@@ -684,9 +710,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
684 | local_irq_restore(flags); | 710 | local_irq_restore(flags); |
685 | } | 711 | } |
686 | reload_tss(); | 712 | reload_tss(); |
687 | save_msrs(vmx->guest_msrs, vmx->save_nmsrs); | 713 | #ifdef CONFIG_X86_64 |
688 | load_msrs(vmx->host_msrs, vmx->save_nmsrs); | 714 | if (is_long_mode(&vmx->vcpu)) { |
689 | reload_host_efer(vmx); | 715 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
716 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | ||
717 | } | ||
718 | #endif | ||
690 | } | 719 | } |
691 | 720 | ||
692 | static void vmx_load_host_state(struct vcpu_vmx *vmx) | 721 | static void vmx_load_host_state(struct vcpu_vmx *vmx) |
@@ -763,38 +792,51 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | |||
763 | 792 | ||
764 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) | 793 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) |
765 | { | 794 | { |
795 | ulong cr0; | ||
796 | |||
766 | if (vcpu->fpu_active) | 797 | if (vcpu->fpu_active) |
767 | return; | 798 | return; |
768 | vcpu->fpu_active = 1; | 799 | vcpu->fpu_active = 1; |
769 | vmcs_clear_bits(GUEST_CR0, X86_CR0_TS); | 800 | cr0 = vmcs_readl(GUEST_CR0); |
770 | if (vcpu->arch.cr0 & X86_CR0_TS) | 801 | cr0 &= ~(X86_CR0_TS | X86_CR0_MP); |
771 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS); | 802 | cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP); |
803 | vmcs_writel(GUEST_CR0, cr0); | ||
772 | update_exception_bitmap(vcpu); | 804 | update_exception_bitmap(vcpu); |
805 | vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; | ||
806 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
773 | } | 807 | } |
774 | 808 | ||
809 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); | ||
810 | |||
775 | static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) | 811 | static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) |
776 | { | 812 | { |
777 | if (!vcpu->fpu_active) | 813 | vmx_decache_cr0_guest_bits(vcpu); |
778 | return; | 814 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP); |
779 | vcpu->fpu_active = 0; | ||
780 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS); | ||
781 | update_exception_bitmap(vcpu); | 815 | update_exception_bitmap(vcpu); |
816 | vcpu->arch.cr0_guest_owned_bits = 0; | ||
817 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
818 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | ||
782 | } | 819 | } |
783 | 820 | ||
784 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | 821 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) |
785 | { | 822 | { |
786 | unsigned long rflags; | 823 | unsigned long rflags, save_rflags; |
787 | 824 | ||
788 | rflags = vmcs_readl(GUEST_RFLAGS); | 825 | rflags = vmcs_readl(GUEST_RFLAGS); |
789 | if (to_vmx(vcpu)->rmode.vm86_active) | 826 | if (to_vmx(vcpu)->rmode.vm86_active) { |
790 | rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM); | 827 | rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; |
828 | save_rflags = to_vmx(vcpu)->rmode.save_rflags; | ||
829 | rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; | ||
830 | } | ||
791 | return rflags; | 831 | return rflags; |
792 | } | 832 | } |
793 | 833 | ||
794 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 834 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
795 | { | 835 | { |
796 | if (to_vmx(vcpu)->rmode.vm86_active) | 836 | if (to_vmx(vcpu)->rmode.vm86_active) { |
837 | to_vmx(vcpu)->rmode.save_rflags = rflags; | ||
797 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | 838 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; |
839 | } | ||
798 | vmcs_writel(GUEST_RFLAGS, rflags); | 840 | vmcs_writel(GUEST_RFLAGS, rflags); |
799 | } | 841 | } |
800 | 842 | ||
@@ -874,22 +916,22 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
874 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); | 916 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); |
875 | } | 917 | } |
876 | 918 | ||
919 | static bool vmx_rdtscp_supported(void) | ||
920 | { | ||
921 | return cpu_has_vmx_rdtscp(); | ||
922 | } | ||
923 | |||
877 | /* | 924 | /* |
878 | * Swap MSR entry in host/guest MSR entry array. | 925 | * Swap MSR entry in host/guest MSR entry array. |
879 | */ | 926 | */ |
880 | #ifdef CONFIG_X86_64 | ||
881 | static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | 927 | static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) |
882 | { | 928 | { |
883 | struct kvm_msr_entry tmp; | 929 | struct shared_msr_entry tmp; |
884 | 930 | ||
885 | tmp = vmx->guest_msrs[to]; | 931 | tmp = vmx->guest_msrs[to]; |
886 | vmx->guest_msrs[to] = vmx->guest_msrs[from]; | 932 | vmx->guest_msrs[to] = vmx->guest_msrs[from]; |
887 | vmx->guest_msrs[from] = tmp; | 933 | vmx->guest_msrs[from] = tmp; |
888 | tmp = vmx->host_msrs[to]; | ||
889 | vmx->host_msrs[to] = vmx->host_msrs[from]; | ||
890 | vmx->host_msrs[from] = tmp; | ||
891 | } | 934 | } |
892 | #endif | ||
893 | 935 | ||
894 | /* | 936 | /* |
895 | * Set up the vmcs to automatically save and restore system | 937 | * Set up the vmcs to automatically save and restore system |
@@ -898,15 +940,13 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | |||
898 | */ | 940 | */ |
899 | static void setup_msrs(struct vcpu_vmx *vmx) | 941 | static void setup_msrs(struct vcpu_vmx *vmx) |
900 | { | 942 | { |
901 | int save_nmsrs; | 943 | int save_nmsrs, index; |
902 | unsigned long *msr_bitmap; | 944 | unsigned long *msr_bitmap; |
903 | 945 | ||
904 | vmx_load_host_state(vmx); | 946 | vmx_load_host_state(vmx); |
905 | save_nmsrs = 0; | 947 | save_nmsrs = 0; |
906 | #ifdef CONFIG_X86_64 | 948 | #ifdef CONFIG_X86_64 |
907 | if (is_long_mode(&vmx->vcpu)) { | 949 | if (is_long_mode(&vmx->vcpu)) { |
908 | int index; | ||
909 | |||
910 | index = __find_msr_index(vmx, MSR_SYSCALL_MASK); | 950 | index = __find_msr_index(vmx, MSR_SYSCALL_MASK); |
911 | if (index >= 0) | 951 | if (index >= 0) |
912 | move_msr_up(vmx, index, save_nmsrs++); | 952 | move_msr_up(vmx, index, save_nmsrs++); |
@@ -916,25 +956,23 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
916 | index = __find_msr_index(vmx, MSR_CSTAR); | 956 | index = __find_msr_index(vmx, MSR_CSTAR); |
917 | if (index >= 0) | 957 | if (index >= 0) |
918 | move_msr_up(vmx, index, save_nmsrs++); | 958 | move_msr_up(vmx, index, save_nmsrs++); |
919 | index = __find_msr_index(vmx, MSR_KERNEL_GS_BASE); | 959 | index = __find_msr_index(vmx, MSR_TSC_AUX); |
920 | if (index >= 0) | 960 | if (index >= 0 && vmx->rdtscp_enabled) |
921 | move_msr_up(vmx, index, save_nmsrs++); | 961 | move_msr_up(vmx, index, save_nmsrs++); |
922 | /* | 962 | /* |
923 | * MSR_K6_STAR is only needed on long mode guests, and only | 963 | * MSR_K6_STAR is only needed on long mode guests, and only |
924 | * if efer.sce is enabled. | 964 | * if efer.sce is enabled. |
925 | */ | 965 | */ |
926 | index = __find_msr_index(vmx, MSR_K6_STAR); | 966 | index = __find_msr_index(vmx, MSR_K6_STAR); |
927 | if ((index >= 0) && (vmx->vcpu.arch.shadow_efer & EFER_SCE)) | 967 | if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE)) |
928 | move_msr_up(vmx, index, save_nmsrs++); | 968 | move_msr_up(vmx, index, save_nmsrs++); |
929 | } | 969 | } |
930 | #endif | 970 | #endif |
931 | vmx->save_nmsrs = save_nmsrs; | 971 | index = __find_msr_index(vmx, MSR_EFER); |
972 | if (index >= 0 && update_transition_efer(vmx, index)) | ||
973 | move_msr_up(vmx, index, save_nmsrs++); | ||
932 | 974 | ||
933 | #ifdef CONFIG_X86_64 | 975 | vmx->save_nmsrs = save_nmsrs; |
934 | vmx->msr_offset_kernel_gs_base = | ||
935 | __find_msr_index(vmx, MSR_KERNEL_GS_BASE); | ||
936 | #endif | ||
937 | vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER); | ||
938 | 976 | ||
939 | if (cpu_has_vmx_msr_bitmap()) { | 977 | if (cpu_has_vmx_msr_bitmap()) { |
940 | if (is_long_mode(&vmx->vcpu)) | 978 | if (is_long_mode(&vmx->vcpu)) |
@@ -976,7 +1014,7 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) | |||
976 | static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | 1014 | static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) |
977 | { | 1015 | { |
978 | u64 data; | 1016 | u64 data; |
979 | struct kvm_msr_entry *msr; | 1017 | struct shared_msr_entry *msr; |
980 | 1018 | ||
981 | if (!pdata) { | 1019 | if (!pdata) { |
982 | printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); | 1020 | printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); |
@@ -991,9 +1029,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
991 | case MSR_GS_BASE: | 1029 | case MSR_GS_BASE: |
992 | data = vmcs_readl(GUEST_GS_BASE); | 1030 | data = vmcs_readl(GUEST_GS_BASE); |
993 | break; | 1031 | break; |
1032 | case MSR_KERNEL_GS_BASE: | ||
1033 | vmx_load_host_state(to_vmx(vcpu)); | ||
1034 | data = to_vmx(vcpu)->msr_guest_kernel_gs_base; | ||
1035 | break; | ||
1036 | #endif | ||
994 | case MSR_EFER: | 1037 | case MSR_EFER: |
995 | return kvm_get_msr_common(vcpu, msr_index, pdata); | 1038 | return kvm_get_msr_common(vcpu, msr_index, pdata); |
996 | #endif | ||
997 | case MSR_IA32_TSC: | 1039 | case MSR_IA32_TSC: |
998 | data = guest_read_tsc(); | 1040 | data = guest_read_tsc(); |
999 | break; | 1041 | break; |
@@ -1006,7 +1048,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
1006 | case MSR_IA32_SYSENTER_ESP: | 1048 | case MSR_IA32_SYSENTER_ESP: |
1007 | data = vmcs_readl(GUEST_SYSENTER_ESP); | 1049 | data = vmcs_readl(GUEST_SYSENTER_ESP); |
1008 | break; | 1050 | break; |
1051 | case MSR_TSC_AUX: | ||
1052 | if (!to_vmx(vcpu)->rdtscp_enabled) | ||
1053 | return 1; | ||
1054 | /* Otherwise falls through */ | ||
1009 | default: | 1055 | default: |
1056 | vmx_load_host_state(to_vmx(vcpu)); | ||
1010 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 1057 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
1011 | if (msr) { | 1058 | if (msr) { |
1012 | vmx_load_host_state(to_vmx(vcpu)); | 1059 | vmx_load_host_state(to_vmx(vcpu)); |
@@ -1028,7 +1075,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
1028 | static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | 1075 | static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) |
1029 | { | 1076 | { |
1030 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1077 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1031 | struct kvm_msr_entry *msr; | 1078 | struct shared_msr_entry *msr; |
1032 | u64 host_tsc; | 1079 | u64 host_tsc; |
1033 | int ret = 0; | 1080 | int ret = 0; |
1034 | 1081 | ||
@@ -1044,6 +1091,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1044 | case MSR_GS_BASE: | 1091 | case MSR_GS_BASE: |
1045 | vmcs_writel(GUEST_GS_BASE, data); | 1092 | vmcs_writel(GUEST_GS_BASE, data); |
1046 | break; | 1093 | break; |
1094 | case MSR_KERNEL_GS_BASE: | ||
1095 | vmx_load_host_state(vmx); | ||
1096 | vmx->msr_guest_kernel_gs_base = data; | ||
1097 | break; | ||
1047 | #endif | 1098 | #endif |
1048 | case MSR_IA32_SYSENTER_CS: | 1099 | case MSR_IA32_SYSENTER_CS: |
1049 | vmcs_write32(GUEST_SYSENTER_CS, data); | 1100 | vmcs_write32(GUEST_SYSENTER_CS, data); |
@@ -1064,7 +1115,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1064 | vcpu->arch.pat = data; | 1115 | vcpu->arch.pat = data; |
1065 | break; | 1116 | break; |
1066 | } | 1117 | } |
1067 | /* Otherwise falls through to kvm_set_msr_common */ | 1118 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
1119 | break; | ||
1120 | case MSR_TSC_AUX: | ||
1121 | if (!vmx->rdtscp_enabled) | ||
1122 | return 1; | ||
1123 | /* Check reserved bit, higher 32 bits should be zero */ | ||
1124 | if ((data >> 32) != 0) | ||
1125 | return 1; | ||
1126 | /* Otherwise falls through */ | ||
1068 | default: | 1127 | default: |
1069 | msr = find_msr_entry(vmx, msr_index); | 1128 | msr = find_msr_entry(vmx, msr_index); |
1070 | if (msr) { | 1129 | if (msr) { |
@@ -1097,30 +1156,14 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | |||
1097 | } | 1156 | } |
1098 | } | 1157 | } |
1099 | 1158 | ||
1100 | static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | 1159 | static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) |
1101 | { | 1160 | { |
1102 | int old_debug = vcpu->guest_debug; | ||
1103 | unsigned long flags; | ||
1104 | |||
1105 | vcpu->guest_debug = dbg->control; | ||
1106 | if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) | ||
1107 | vcpu->guest_debug = 0; | ||
1108 | |||
1109 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1161 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
1110 | vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); | 1162 | vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); |
1111 | else | 1163 | else |
1112 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); | 1164 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); |
1113 | 1165 | ||
1114 | flags = vmcs_readl(GUEST_RFLAGS); | ||
1115 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
1116 | flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
1117 | else if (old_debug & KVM_GUESTDBG_SINGLESTEP) | ||
1118 | flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
1119 | vmcs_writel(GUEST_RFLAGS, flags); | ||
1120 | |||
1121 | update_exception_bitmap(vcpu); | 1166 | update_exception_bitmap(vcpu); |
1122 | |||
1123 | return 0; | ||
1124 | } | 1167 | } |
1125 | 1168 | ||
1126 | static __init int cpu_has_kvm_support(void) | 1169 | static __init int cpu_has_kvm_support(void) |
@@ -1139,12 +1182,15 @@ static __init int vmx_disabled_by_bios(void) | |||
1139 | /* locked but not enabled */ | 1182 | /* locked but not enabled */ |
1140 | } | 1183 | } |
1141 | 1184 | ||
1142 | static void hardware_enable(void *garbage) | 1185 | static int hardware_enable(void *garbage) |
1143 | { | 1186 | { |
1144 | int cpu = raw_smp_processor_id(); | 1187 | int cpu = raw_smp_processor_id(); |
1145 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | 1188 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); |
1146 | u64 old; | 1189 | u64 old; |
1147 | 1190 | ||
1191 | if (read_cr4() & X86_CR4_VMXE) | ||
1192 | return -EBUSY; | ||
1193 | |||
1148 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); | 1194 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); |
1149 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); | 1195 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); |
1150 | if ((old & (FEATURE_CONTROL_LOCKED | | 1196 | if ((old & (FEATURE_CONTROL_LOCKED | |
@@ -1159,6 +1205,10 @@ static void hardware_enable(void *garbage) | |||
1159 | asm volatile (ASM_VMX_VMXON_RAX | 1205 | asm volatile (ASM_VMX_VMXON_RAX |
1160 | : : "a"(&phys_addr), "m"(phys_addr) | 1206 | : : "a"(&phys_addr), "m"(phys_addr) |
1161 | : "memory", "cc"); | 1207 | : "memory", "cc"); |
1208 | |||
1209 | ept_sync_global(); | ||
1210 | |||
1211 | return 0; | ||
1162 | } | 1212 | } |
1163 | 1213 | ||
1164 | static void vmclear_local_vcpus(void) | 1214 | static void vmclear_local_vcpus(void) |
@@ -1232,6 +1282,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1232 | CPU_BASED_USE_IO_BITMAPS | | 1282 | CPU_BASED_USE_IO_BITMAPS | |
1233 | CPU_BASED_MOV_DR_EXITING | | 1283 | CPU_BASED_MOV_DR_EXITING | |
1234 | CPU_BASED_USE_TSC_OFFSETING | | 1284 | CPU_BASED_USE_TSC_OFFSETING | |
1285 | CPU_BASED_MWAIT_EXITING | | ||
1286 | CPU_BASED_MONITOR_EXITING | | ||
1235 | CPU_BASED_INVLPG_EXITING; | 1287 | CPU_BASED_INVLPG_EXITING; |
1236 | opt = CPU_BASED_TPR_SHADOW | | 1288 | opt = CPU_BASED_TPR_SHADOW | |
1237 | CPU_BASED_USE_MSR_BITMAPS | | 1289 | CPU_BASED_USE_MSR_BITMAPS | |
@@ -1250,7 +1302,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1250 | SECONDARY_EXEC_WBINVD_EXITING | | 1302 | SECONDARY_EXEC_WBINVD_EXITING | |
1251 | SECONDARY_EXEC_ENABLE_VPID | | 1303 | SECONDARY_EXEC_ENABLE_VPID | |
1252 | SECONDARY_EXEC_ENABLE_EPT | | 1304 | SECONDARY_EXEC_ENABLE_EPT | |
1253 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 1305 | SECONDARY_EXEC_UNRESTRICTED_GUEST | |
1306 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | | ||
1307 | SECONDARY_EXEC_RDTSCP; | ||
1254 | if (adjust_vmx_controls(min2, opt2, | 1308 | if (adjust_vmx_controls(min2, opt2, |
1255 | MSR_IA32_VMX_PROCBASED_CTLS2, | 1309 | MSR_IA32_VMX_PROCBASED_CTLS2, |
1256 | &_cpu_based_2nd_exec_control) < 0) | 1310 | &_cpu_based_2nd_exec_control) < 0) |
@@ -1344,15 +1398,17 @@ static void free_kvm_area(void) | |||
1344 | { | 1398 | { |
1345 | int cpu; | 1399 | int cpu; |
1346 | 1400 | ||
1347 | for_each_online_cpu(cpu) | 1401 | for_each_possible_cpu(cpu) { |
1348 | free_vmcs(per_cpu(vmxarea, cpu)); | 1402 | free_vmcs(per_cpu(vmxarea, cpu)); |
1403 | per_cpu(vmxarea, cpu) = NULL; | ||
1404 | } | ||
1349 | } | 1405 | } |
1350 | 1406 | ||
1351 | static __init int alloc_kvm_area(void) | 1407 | static __init int alloc_kvm_area(void) |
1352 | { | 1408 | { |
1353 | int cpu; | 1409 | int cpu; |
1354 | 1410 | ||
1355 | for_each_online_cpu(cpu) { | 1411 | for_each_possible_cpu(cpu) { |
1356 | struct vmcs *vmcs; | 1412 | struct vmcs *vmcs; |
1357 | 1413 | ||
1358 | vmcs = alloc_vmcs_cpu(cpu); | 1414 | vmcs = alloc_vmcs_cpu(cpu); |
@@ -1394,6 +1450,9 @@ static __init int hardware_setup(void) | |||
1394 | if (enable_ept && !cpu_has_vmx_ept_2m_page()) | 1450 | if (enable_ept && !cpu_has_vmx_ept_2m_page()) |
1395 | kvm_disable_largepages(); | 1451 | kvm_disable_largepages(); |
1396 | 1452 | ||
1453 | if (!cpu_has_vmx_ple()) | ||
1454 | ple_gap = 0; | ||
1455 | |||
1397 | return alloc_kvm_area(); | 1456 | return alloc_kvm_area(); |
1398 | } | 1457 | } |
1399 | 1458 | ||
@@ -1431,8 +1490,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1431 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); | 1490 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); |
1432 | 1491 | ||
1433 | flags = vmcs_readl(GUEST_RFLAGS); | 1492 | flags = vmcs_readl(GUEST_RFLAGS); |
1434 | flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); | 1493 | flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; |
1435 | flags |= (vmx->rmode.save_iopl << IOPL_SHIFT); | 1494 | flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; |
1436 | vmcs_writel(GUEST_RFLAGS, flags); | 1495 | vmcs_writel(GUEST_RFLAGS, flags); |
1437 | 1496 | ||
1438 | vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | | 1497 | vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | |
@@ -1459,8 +1518,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1459 | static gva_t rmode_tss_base(struct kvm *kvm) | 1518 | static gva_t rmode_tss_base(struct kvm *kvm) |
1460 | { | 1519 | { |
1461 | if (!kvm->arch.tss_addr) { | 1520 | if (!kvm->arch.tss_addr) { |
1462 | gfn_t base_gfn = kvm->memslots[0].base_gfn + | 1521 | struct kvm_memslots *slots; |
1463 | kvm->memslots[0].npages - 3; | 1522 | gfn_t base_gfn; |
1523 | |||
1524 | slots = rcu_dereference(kvm->memslots); | ||
1525 | base_gfn = kvm->memslots->memslots[0].base_gfn + | ||
1526 | kvm->memslots->memslots[0].npages - 3; | ||
1464 | return base_gfn << PAGE_SHIFT; | 1527 | return base_gfn << PAGE_SHIFT; |
1465 | } | 1528 | } |
1466 | return kvm->arch.tss_addr; | 1529 | return kvm->arch.tss_addr; |
@@ -1501,8 +1564,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1501 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); | 1564 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); |
1502 | 1565 | ||
1503 | flags = vmcs_readl(GUEST_RFLAGS); | 1566 | flags = vmcs_readl(GUEST_RFLAGS); |
1504 | vmx->rmode.save_iopl | 1567 | vmx->rmode.save_rflags = flags; |
1505 | = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | ||
1506 | 1568 | ||
1507 | flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | 1569 | flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; |
1508 | 1570 | ||
@@ -1536,11 +1598,17 @@ continue_rmode: | |||
1536 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 1598 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
1537 | { | 1599 | { |
1538 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1600 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1539 | struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); | 1601 | struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); |
1540 | 1602 | ||
1541 | vcpu->arch.shadow_efer = efer; | ||
1542 | if (!msr) | 1603 | if (!msr) |
1543 | return; | 1604 | return; |
1605 | |||
1606 | /* | ||
1607 | * Force kernel_gs_base reloading before EFER changes, as control | ||
1608 | * of this msr depends on is_long_mode(). | ||
1609 | */ | ||
1610 | vmx_load_host_state(to_vmx(vcpu)); | ||
1611 | vcpu->arch.efer = efer; | ||
1544 | if (efer & EFER_LMA) { | 1612 | if (efer & EFER_LMA) { |
1545 | vmcs_write32(VM_ENTRY_CONTROLS, | 1613 | vmcs_write32(VM_ENTRY_CONTROLS, |
1546 | vmcs_read32(VM_ENTRY_CONTROLS) | | 1614 | vmcs_read32(VM_ENTRY_CONTROLS) | |
@@ -1570,13 +1638,13 @@ static void enter_lmode(struct kvm_vcpu *vcpu) | |||
1570 | (guest_tr_ar & ~AR_TYPE_MASK) | 1638 | (guest_tr_ar & ~AR_TYPE_MASK) |
1571 | | AR_TYPE_BUSY_64_TSS); | 1639 | | AR_TYPE_BUSY_64_TSS); |
1572 | } | 1640 | } |
1573 | vcpu->arch.shadow_efer |= EFER_LMA; | 1641 | vcpu->arch.efer |= EFER_LMA; |
1574 | vmx_set_efer(vcpu, vcpu->arch.shadow_efer); | 1642 | vmx_set_efer(vcpu, vcpu->arch.efer); |
1575 | } | 1643 | } |
1576 | 1644 | ||
1577 | static void exit_lmode(struct kvm_vcpu *vcpu) | 1645 | static void exit_lmode(struct kvm_vcpu *vcpu) |
1578 | { | 1646 | { |
1579 | vcpu->arch.shadow_efer &= ~EFER_LMA; | 1647 | vcpu->arch.efer &= ~EFER_LMA; |
1580 | 1648 | ||
1581 | vmcs_write32(VM_ENTRY_CONTROLS, | 1649 | vmcs_write32(VM_ENTRY_CONTROLS, |
1582 | vmcs_read32(VM_ENTRY_CONTROLS) | 1650 | vmcs_read32(VM_ENTRY_CONTROLS) |
@@ -1592,10 +1660,20 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | |||
1592 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); | 1660 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); |
1593 | } | 1661 | } |
1594 | 1662 | ||
1663 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | ||
1664 | { | ||
1665 | ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; | ||
1666 | |||
1667 | vcpu->arch.cr0 &= ~cr0_guest_owned_bits; | ||
1668 | vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; | ||
1669 | } | ||
1670 | |||
1595 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 1671 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
1596 | { | 1672 | { |
1597 | vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK; | 1673 | ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; |
1598 | vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; | 1674 | |
1675 | vcpu->arch.cr4 &= ~cr4_guest_owned_bits; | ||
1676 | vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits; | ||
1599 | } | 1677 | } |
1600 | 1678 | ||
1601 | static void ept_load_pdptrs(struct kvm_vcpu *vcpu) | 1679 | static void ept_load_pdptrs(struct kvm_vcpu *vcpu) |
@@ -1640,7 +1718,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1640 | (CPU_BASED_CR3_LOAD_EXITING | | 1718 | (CPU_BASED_CR3_LOAD_EXITING | |
1641 | CPU_BASED_CR3_STORE_EXITING)); | 1719 | CPU_BASED_CR3_STORE_EXITING)); |
1642 | vcpu->arch.cr0 = cr0; | 1720 | vcpu->arch.cr0 = cr0; |
1643 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | 1721 | vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); |
1644 | } else if (!is_paging(vcpu)) { | 1722 | } else if (!is_paging(vcpu)) { |
1645 | /* From nonpaging to paging */ | 1723 | /* From nonpaging to paging */ |
1646 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | 1724 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, |
@@ -1648,23 +1726,13 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1648 | ~(CPU_BASED_CR3_LOAD_EXITING | | 1726 | ~(CPU_BASED_CR3_LOAD_EXITING | |
1649 | CPU_BASED_CR3_STORE_EXITING)); | 1727 | CPU_BASED_CR3_STORE_EXITING)); |
1650 | vcpu->arch.cr0 = cr0; | 1728 | vcpu->arch.cr0 = cr0; |
1651 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | 1729 | vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); |
1652 | } | 1730 | } |
1653 | 1731 | ||
1654 | if (!(cr0 & X86_CR0_WP)) | 1732 | if (!(cr0 & X86_CR0_WP)) |
1655 | *hw_cr0 &= ~X86_CR0_WP; | 1733 | *hw_cr0 &= ~X86_CR0_WP; |
1656 | } | 1734 | } |
1657 | 1735 | ||
1658 | static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, | ||
1659 | struct kvm_vcpu *vcpu) | ||
1660 | { | ||
1661 | if (!is_paging(vcpu)) { | ||
1662 | *hw_cr4 &= ~X86_CR4_PAE; | ||
1663 | *hw_cr4 |= X86_CR4_PSE; | ||
1664 | } else if (!(vcpu->arch.cr4 & X86_CR4_PAE)) | ||
1665 | *hw_cr4 &= ~X86_CR4_PAE; | ||
1666 | } | ||
1667 | |||
1668 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 1736 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
1669 | { | 1737 | { |
1670 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1738 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -1676,8 +1744,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1676 | else | 1744 | else |
1677 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; | 1745 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; |
1678 | 1746 | ||
1679 | vmx_fpu_deactivate(vcpu); | ||
1680 | |||
1681 | if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) | 1747 | if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) |
1682 | enter_pmode(vcpu); | 1748 | enter_pmode(vcpu); |
1683 | 1749 | ||
@@ -1685,7 +1751,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1685 | enter_rmode(vcpu); | 1751 | enter_rmode(vcpu); |
1686 | 1752 | ||
1687 | #ifdef CONFIG_X86_64 | 1753 | #ifdef CONFIG_X86_64 |
1688 | if (vcpu->arch.shadow_efer & EFER_LME) { | 1754 | if (vcpu->arch.efer & EFER_LME) { |
1689 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) | 1755 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) |
1690 | enter_lmode(vcpu); | 1756 | enter_lmode(vcpu); |
1691 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) | 1757 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) |
@@ -1696,12 +1762,12 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1696 | if (enable_ept) | 1762 | if (enable_ept) |
1697 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); | 1763 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); |
1698 | 1764 | ||
1765 | if (!vcpu->fpu_active) | ||
1766 | hw_cr0 |= X86_CR0_TS | X86_CR0_MP; | ||
1767 | |||
1699 | vmcs_writel(CR0_READ_SHADOW, cr0); | 1768 | vmcs_writel(CR0_READ_SHADOW, cr0); |
1700 | vmcs_writel(GUEST_CR0, hw_cr0); | 1769 | vmcs_writel(GUEST_CR0, hw_cr0); |
1701 | vcpu->arch.cr0 = cr0; | 1770 | vcpu->arch.cr0 = cr0; |
1702 | |||
1703 | if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) | ||
1704 | vmx_fpu_activate(vcpu); | ||
1705 | } | 1771 | } |
1706 | 1772 | ||
1707 | static u64 construct_eptp(unsigned long root_hpa) | 1773 | static u64 construct_eptp(unsigned long root_hpa) |
@@ -1727,12 +1793,11 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
1727 | vmcs_write64(EPT_POINTER, eptp); | 1793 | vmcs_write64(EPT_POINTER, eptp); |
1728 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : | 1794 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : |
1729 | vcpu->kvm->arch.ept_identity_map_addr; | 1795 | vcpu->kvm->arch.ept_identity_map_addr; |
1796 | ept_load_pdptrs(vcpu); | ||
1730 | } | 1797 | } |
1731 | 1798 | ||
1732 | vmx_flush_tlb(vcpu); | 1799 | vmx_flush_tlb(vcpu); |
1733 | vmcs_writel(GUEST_CR3, guest_cr3); | 1800 | vmcs_writel(GUEST_CR3, guest_cr3); |
1734 | if (vcpu->arch.cr0 & X86_CR0_PE) | ||
1735 | vmx_fpu_deactivate(vcpu); | ||
1736 | } | 1801 | } |
1737 | 1802 | ||
1738 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 1803 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
@@ -1741,8 +1806,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
1741 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); | 1806 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); |
1742 | 1807 | ||
1743 | vcpu->arch.cr4 = cr4; | 1808 | vcpu->arch.cr4 = cr4; |
1744 | if (enable_ept) | 1809 | if (enable_ept) { |
1745 | ept_update_paging_mode_cr4(&hw_cr4, vcpu); | 1810 | if (!is_paging(vcpu)) { |
1811 | hw_cr4 &= ~X86_CR4_PAE; | ||
1812 | hw_cr4 |= X86_CR4_PSE; | ||
1813 | } else if (!(cr4 & X86_CR4_PAE)) { | ||
1814 | hw_cr4 &= ~X86_CR4_PAE; | ||
1815 | } | ||
1816 | } | ||
1746 | 1817 | ||
1747 | vmcs_writel(CR4_READ_SHADOW, cr4); | 1818 | vmcs_writel(CR4_READ_SHADOW, cr4); |
1748 | vmcs_writel(GUEST_CR4, hw_cr4); | 1819 | vmcs_writel(GUEST_CR4, hw_cr4); |
@@ -1780,7 +1851,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
1780 | 1851 | ||
1781 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | 1852 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) |
1782 | { | 1853 | { |
1783 | if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */ | 1854 | if (!is_protmode(vcpu)) |
1784 | return 0; | 1855 | return 0; |
1785 | 1856 | ||
1786 | if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ | 1857 | if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ |
@@ -2035,7 +2106,7 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) | |||
2035 | static bool guest_state_valid(struct kvm_vcpu *vcpu) | 2106 | static bool guest_state_valid(struct kvm_vcpu *vcpu) |
2036 | { | 2107 | { |
2037 | /* real mode guest state checks */ | 2108 | /* real mode guest state checks */ |
2038 | if (!(vcpu->arch.cr0 & X86_CR0_PE)) { | 2109 | if (!is_protmode(vcpu)) { |
2039 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) | 2110 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) |
2040 | return false; | 2111 | return false; |
2041 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) | 2112 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) |
@@ -2168,7 +2239,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
2168 | struct kvm_userspace_memory_region kvm_userspace_mem; | 2239 | struct kvm_userspace_memory_region kvm_userspace_mem; |
2169 | int r = 0; | 2240 | int r = 0; |
2170 | 2241 | ||
2171 | down_write(&kvm->slots_lock); | 2242 | mutex_lock(&kvm->slots_lock); |
2172 | if (kvm->arch.apic_access_page) | 2243 | if (kvm->arch.apic_access_page) |
2173 | goto out; | 2244 | goto out; |
2174 | kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; | 2245 | kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; |
@@ -2181,7 +2252,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
2181 | 2252 | ||
2182 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); | 2253 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); |
2183 | out: | 2254 | out: |
2184 | up_write(&kvm->slots_lock); | 2255 | mutex_unlock(&kvm->slots_lock); |
2185 | return r; | 2256 | return r; |
2186 | } | 2257 | } |
2187 | 2258 | ||
@@ -2190,7 +2261,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
2190 | struct kvm_userspace_memory_region kvm_userspace_mem; | 2261 | struct kvm_userspace_memory_region kvm_userspace_mem; |
2191 | int r = 0; | 2262 | int r = 0; |
2192 | 2263 | ||
2193 | down_write(&kvm->slots_lock); | 2264 | mutex_lock(&kvm->slots_lock); |
2194 | if (kvm->arch.ept_identity_pagetable) | 2265 | if (kvm->arch.ept_identity_pagetable) |
2195 | goto out; | 2266 | goto out; |
2196 | kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; | 2267 | kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; |
@@ -2205,7 +2276,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
2205 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, | 2276 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, |
2206 | kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); | 2277 | kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); |
2207 | out: | 2278 | out: |
2208 | up_write(&kvm->slots_lock); | 2279 | mutex_unlock(&kvm->slots_lock); |
2209 | return r; | 2280 | return r; |
2210 | } | 2281 | } |
2211 | 2282 | ||
@@ -2302,13 +2373,22 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2302 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 2373 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
2303 | if (vmx->vpid == 0) | 2374 | if (vmx->vpid == 0) |
2304 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | 2375 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
2305 | if (!enable_ept) | 2376 | if (!enable_ept) { |
2306 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | 2377 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; |
2378 | enable_unrestricted_guest = 0; | ||
2379 | } | ||
2307 | if (!enable_unrestricted_guest) | 2380 | if (!enable_unrestricted_guest) |
2308 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 2381 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
2382 | if (!ple_gap) | ||
2383 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
2309 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 2384 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
2310 | } | 2385 | } |
2311 | 2386 | ||
2387 | if (ple_gap) { | ||
2388 | vmcs_write32(PLE_GAP, ple_gap); | ||
2389 | vmcs_write32(PLE_WINDOW, ple_window); | ||
2390 | } | ||
2391 | |||
2312 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); | 2392 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); |
2313 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); | 2393 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); |
2314 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ | 2394 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ |
@@ -2368,18 +2448,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2368 | for (i = 0; i < NR_VMX_MSR; ++i) { | 2448 | for (i = 0; i < NR_VMX_MSR; ++i) { |
2369 | u32 index = vmx_msr_index[i]; | 2449 | u32 index = vmx_msr_index[i]; |
2370 | u32 data_low, data_high; | 2450 | u32 data_low, data_high; |
2371 | u64 data; | ||
2372 | int j = vmx->nmsrs; | 2451 | int j = vmx->nmsrs; |
2373 | 2452 | ||
2374 | if (rdmsr_safe(index, &data_low, &data_high) < 0) | 2453 | if (rdmsr_safe(index, &data_low, &data_high) < 0) |
2375 | continue; | 2454 | continue; |
2376 | if (wrmsr_safe(index, data_low, data_high) < 0) | 2455 | if (wrmsr_safe(index, data_low, data_high) < 0) |
2377 | continue; | 2456 | continue; |
2378 | data = data_low | ((u64)data_high << 32); | 2457 | vmx->guest_msrs[j].index = i; |
2379 | vmx->host_msrs[j].index = index; | 2458 | vmx->guest_msrs[j].data = 0; |
2380 | vmx->host_msrs[j].reserved = 0; | 2459 | vmx->guest_msrs[j].mask = -1ull; |
2381 | vmx->host_msrs[j].data = data; | ||
2382 | vmx->guest_msrs[j] = vmx->host_msrs[j]; | ||
2383 | ++vmx->nmsrs; | 2460 | ++vmx->nmsrs; |
2384 | } | 2461 | } |
2385 | 2462 | ||
@@ -2389,7 +2466,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2389 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); | 2466 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); |
2390 | 2467 | ||
2391 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); | 2468 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); |
2392 | vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); | 2469 | vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; |
2470 | if (enable_ept) | ||
2471 | vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; | ||
2472 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); | ||
2393 | 2473 | ||
2394 | tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; | 2474 | tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; |
2395 | rdtscll(tsc_this); | 2475 | rdtscll(tsc_this); |
@@ -2414,10 +2494,10 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2414 | { | 2494 | { |
2415 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2495 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2416 | u64 msr; | 2496 | u64 msr; |
2417 | int ret; | 2497 | int ret, idx; |
2418 | 2498 | ||
2419 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | 2499 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); |
2420 | down_read(&vcpu->kvm->slots_lock); | 2500 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
2421 | if (!init_rmode(vmx->vcpu.kvm)) { | 2501 | if (!init_rmode(vmx->vcpu.kvm)) { |
2422 | ret = -ENOMEM; | 2502 | ret = -ENOMEM; |
2423 | goto out; | 2503 | goto out; |
@@ -2510,8 +2590,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2510 | if (vmx->vpid != 0) | 2590 | if (vmx->vpid != 0) |
2511 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | 2591 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); |
2512 | 2592 | ||
2513 | vmx->vcpu.arch.cr0 = 0x60000010; | 2593 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
2514 | vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ | 2594 | vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ |
2515 | vmx_set_cr4(&vmx->vcpu, 0); | 2595 | vmx_set_cr4(&vmx->vcpu, 0); |
2516 | vmx_set_efer(&vmx->vcpu, 0); | 2596 | vmx_set_efer(&vmx->vcpu, 0); |
2517 | vmx_fpu_activate(&vmx->vcpu); | 2597 | vmx_fpu_activate(&vmx->vcpu); |
@@ -2525,7 +2605,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2525 | vmx->emulation_required = 0; | 2605 | vmx->emulation_required = 0; |
2526 | 2606 | ||
2527 | out: | 2607 | out: |
2528 | up_read(&vcpu->kvm->slots_lock); | 2608 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
2529 | return ret; | 2609 | return ret; |
2530 | } | 2610 | } |
2531 | 2611 | ||
@@ -2623,8 +2703,35 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2623 | return 0; | 2703 | return 0; |
2624 | 2704 | ||
2625 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 2705 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
2626 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS | | 2706 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI)); |
2627 | GUEST_INTR_STATE_NMI)); | 2707 | } |
2708 | |||
2709 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | ||
2710 | { | ||
2711 | if (!cpu_has_virtual_nmis()) | ||
2712 | return to_vmx(vcpu)->soft_vnmi_blocked; | ||
2713 | else | ||
2714 | return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
2715 | GUEST_INTR_STATE_NMI); | ||
2716 | } | ||
2717 | |||
2718 | static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | ||
2719 | { | ||
2720 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2721 | |||
2722 | if (!cpu_has_virtual_nmis()) { | ||
2723 | if (vmx->soft_vnmi_blocked != masked) { | ||
2724 | vmx->soft_vnmi_blocked = masked; | ||
2725 | vmx->vnmi_blocked_time = 0; | ||
2726 | } | ||
2727 | } else { | ||
2728 | if (masked) | ||
2729 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
2730 | GUEST_INTR_STATE_NMI); | ||
2731 | else | ||
2732 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
2733 | GUEST_INTR_STATE_NMI); | ||
2734 | } | ||
2628 | } | 2735 | } |
2629 | 2736 | ||
2630 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 2737 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
@@ -2659,7 +2766,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
2659 | * Cause the #SS fault with 0 error code in VM86 mode. | 2766 | * Cause the #SS fault with 0 error code in VM86 mode. |
2660 | */ | 2767 | */ |
2661 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) | 2768 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) |
2662 | if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE) | 2769 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) |
2663 | return 1; | 2770 | return 1; |
2664 | /* | 2771 | /* |
2665 | * Forward all other exceptions that are valid in real mode. | 2772 | * Forward all other exceptions that are valid in real mode. |
@@ -2674,6 +2781,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
2674 | kvm_queue_exception(vcpu, vec); | 2781 | kvm_queue_exception(vcpu, vec); |
2675 | return 1; | 2782 | return 1; |
2676 | case BP_VECTOR: | 2783 | case BP_VECTOR: |
2784 | /* | ||
2785 | * Update instruction length as we may reinject the exception | ||
2786 | * from user space while in guest debugging mode. | ||
2787 | */ | ||
2788 | to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = | ||
2789 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | ||
2677 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | 2790 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) |
2678 | return 0; | 2791 | return 0; |
2679 | /* fall through */ | 2792 | /* fall through */ |
@@ -2710,15 +2823,16 @@ static void kvm_machine_check(void) | |||
2710 | #endif | 2823 | #endif |
2711 | } | 2824 | } |
2712 | 2825 | ||
2713 | static int handle_machine_check(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2826 | static int handle_machine_check(struct kvm_vcpu *vcpu) |
2714 | { | 2827 | { |
2715 | /* already handled by vcpu_run */ | 2828 | /* already handled by vcpu_run */ |
2716 | return 1; | 2829 | return 1; |
2717 | } | 2830 | } |
2718 | 2831 | ||
2719 | static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2832 | static int handle_exception(struct kvm_vcpu *vcpu) |
2720 | { | 2833 | { |
2721 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2834 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2835 | struct kvm_run *kvm_run = vcpu->run; | ||
2722 | u32 intr_info, ex_no, error_code; | 2836 | u32 intr_info, ex_no, error_code; |
2723 | unsigned long cr2, rip, dr6; | 2837 | unsigned long cr2, rip, dr6; |
2724 | u32 vect_info; | 2838 | u32 vect_info; |
@@ -2728,12 +2842,17 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2728 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 2842 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
2729 | 2843 | ||
2730 | if (is_machine_check(intr_info)) | 2844 | if (is_machine_check(intr_info)) |
2731 | return handle_machine_check(vcpu, kvm_run); | 2845 | return handle_machine_check(vcpu); |
2732 | 2846 | ||
2733 | if ((vect_info & VECTORING_INFO_VALID_MASK) && | 2847 | if ((vect_info & VECTORING_INFO_VALID_MASK) && |
2734 | !is_page_fault(intr_info)) | 2848 | !is_page_fault(intr_info)) { |
2735 | printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " | 2849 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
2736 | "intr info 0x%x\n", __func__, vect_info, intr_info); | 2850 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; |
2851 | vcpu->run->internal.ndata = 2; | ||
2852 | vcpu->run->internal.data[0] = vect_info; | ||
2853 | vcpu->run->internal.data[1] = intr_info; | ||
2854 | return 0; | ||
2855 | } | ||
2737 | 2856 | ||
2738 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) | 2857 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) |
2739 | return 1; /* already handled by vmx_vcpu_run() */ | 2858 | return 1; /* already handled by vmx_vcpu_run() */ |
@@ -2744,7 +2863,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2744 | } | 2863 | } |
2745 | 2864 | ||
2746 | if (is_invalid_opcode(intr_info)) { | 2865 | if (is_invalid_opcode(intr_info)) { |
2747 | er = emulate_instruction(vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); | 2866 | er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD); |
2748 | if (er != EMULATE_DONE) | 2867 | if (er != EMULATE_DONE) |
2749 | kvm_queue_exception(vcpu, UD_VECTOR); | 2868 | kvm_queue_exception(vcpu, UD_VECTOR); |
2750 | return 1; | 2869 | return 1; |
@@ -2790,6 +2909,13 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2790 | kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); | 2909 | kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); |
2791 | /* fall through */ | 2910 | /* fall through */ |
2792 | case BP_VECTOR: | 2911 | case BP_VECTOR: |
2912 | /* | ||
2913 | * Update instruction length as we may reinject #BP from | ||
2914 | * user space while in guest debugging mode. Reading it for | ||
2915 | * #DB as well causes no harm, it is not used in that case. | ||
2916 | */ | ||
2917 | vmx->vcpu.arch.event_exit_inst_len = | ||
2918 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | ||
2793 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 2919 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
2794 | kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; | 2920 | kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; |
2795 | kvm_run->debug.arch.exception = ex_no; | 2921 | kvm_run->debug.arch.exception = ex_no; |
@@ -2803,20 +2929,19 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2803 | return 0; | 2929 | return 0; |
2804 | } | 2930 | } |
2805 | 2931 | ||
2806 | static int handle_external_interrupt(struct kvm_vcpu *vcpu, | 2932 | static int handle_external_interrupt(struct kvm_vcpu *vcpu) |
2807 | struct kvm_run *kvm_run) | ||
2808 | { | 2933 | { |
2809 | ++vcpu->stat.irq_exits; | 2934 | ++vcpu->stat.irq_exits; |
2810 | return 1; | 2935 | return 1; |
2811 | } | 2936 | } |
2812 | 2937 | ||
2813 | static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2938 | static int handle_triple_fault(struct kvm_vcpu *vcpu) |
2814 | { | 2939 | { |
2815 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | 2940 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
2816 | return 0; | 2941 | return 0; |
2817 | } | 2942 | } |
2818 | 2943 | ||
2819 | static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2944 | static int handle_io(struct kvm_vcpu *vcpu) |
2820 | { | 2945 | { |
2821 | unsigned long exit_qualification; | 2946 | unsigned long exit_qualification; |
2822 | int size, in, string; | 2947 | int size, in, string; |
@@ -2827,8 +2952,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2827 | string = (exit_qualification & 16) != 0; | 2952 | string = (exit_qualification & 16) != 0; |
2828 | 2953 | ||
2829 | if (string) { | 2954 | if (string) { |
2830 | if (emulate_instruction(vcpu, | 2955 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) |
2831 | kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) | ||
2832 | return 0; | 2956 | return 0; |
2833 | return 1; | 2957 | return 1; |
2834 | } | 2958 | } |
@@ -2838,7 +2962,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2838 | port = exit_qualification >> 16; | 2962 | port = exit_qualification >> 16; |
2839 | 2963 | ||
2840 | skip_emulated_instruction(vcpu); | 2964 | skip_emulated_instruction(vcpu); |
2841 | return kvm_emulate_pio(vcpu, kvm_run, in, size, port); | 2965 | return kvm_emulate_pio(vcpu, in, size, port); |
2842 | } | 2966 | } |
2843 | 2967 | ||
2844 | static void | 2968 | static void |
@@ -2852,7 +2976,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
2852 | hypercall[2] = 0xc1; | 2976 | hypercall[2] = 0xc1; |
2853 | } | 2977 | } |
2854 | 2978 | ||
2855 | static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2979 | static int handle_cr(struct kvm_vcpu *vcpu) |
2856 | { | 2980 | { |
2857 | unsigned long exit_qualification, val; | 2981 | unsigned long exit_qualification, val; |
2858 | int cr; | 2982 | int cr; |
@@ -2887,17 +3011,16 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2887 | return 1; | 3011 | return 1; |
2888 | if (cr8_prev <= cr8) | 3012 | if (cr8_prev <= cr8) |
2889 | return 1; | 3013 | return 1; |
2890 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; | 3014 | vcpu->run->exit_reason = KVM_EXIT_SET_TPR; |
2891 | return 0; | 3015 | return 0; |
2892 | } | 3016 | } |
2893 | }; | 3017 | }; |
2894 | break; | 3018 | break; |
2895 | case 2: /* clts */ | 3019 | case 2: /* clts */ |
2896 | vmx_fpu_deactivate(vcpu); | 3020 | vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); |
2897 | vcpu->arch.cr0 &= ~X86_CR0_TS; | 3021 | trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); |
2898 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | ||
2899 | vmx_fpu_activate(vcpu); | ||
2900 | skip_emulated_instruction(vcpu); | 3022 | skip_emulated_instruction(vcpu); |
3023 | vmx_fpu_activate(vcpu); | ||
2901 | return 1; | 3024 | return 1; |
2902 | case 1: /*mov from cr*/ | 3025 | case 1: /*mov from cr*/ |
2903 | switch (cr) { | 3026 | switch (cr) { |
@@ -2915,25 +3038,37 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2915 | } | 3038 | } |
2916 | break; | 3039 | break; |
2917 | case 3: /* lmsw */ | 3040 | case 3: /* lmsw */ |
2918 | kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); | 3041 | val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; |
3042 | trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val); | ||
3043 | kvm_lmsw(vcpu, val); | ||
2919 | 3044 | ||
2920 | skip_emulated_instruction(vcpu); | 3045 | skip_emulated_instruction(vcpu); |
2921 | return 1; | 3046 | return 1; |
2922 | default: | 3047 | default: |
2923 | break; | 3048 | break; |
2924 | } | 3049 | } |
2925 | kvm_run->exit_reason = 0; | 3050 | vcpu->run->exit_reason = 0; |
2926 | pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", | 3051 | pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", |
2927 | (int)(exit_qualification >> 4) & 3, cr); | 3052 | (int)(exit_qualification >> 4) & 3, cr); |
2928 | return 0; | 3053 | return 0; |
2929 | } | 3054 | } |
2930 | 3055 | ||
2931 | static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3056 | static int check_dr_alias(struct kvm_vcpu *vcpu) |
3057 | { | ||
3058 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
3059 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3060 | return -1; | ||
3061 | } | ||
3062 | return 0; | ||
3063 | } | ||
3064 | |||
3065 | static int handle_dr(struct kvm_vcpu *vcpu) | ||
2932 | { | 3066 | { |
2933 | unsigned long exit_qualification; | 3067 | unsigned long exit_qualification; |
2934 | unsigned long val; | 3068 | unsigned long val; |
2935 | int dr, reg; | 3069 | int dr, reg; |
2936 | 3070 | ||
3071 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ | ||
2937 | if (!kvm_require_cpl(vcpu, 0)) | 3072 | if (!kvm_require_cpl(vcpu, 0)) |
2938 | return 1; | 3073 | return 1; |
2939 | dr = vmcs_readl(GUEST_DR7); | 3074 | dr = vmcs_readl(GUEST_DR7); |
@@ -2944,13 +3079,13 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2944 | * guest debugging itself. | 3079 | * guest debugging itself. |
2945 | */ | 3080 | */ |
2946 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { | 3081 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { |
2947 | kvm_run->debug.arch.dr6 = vcpu->arch.dr6; | 3082 | vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; |
2948 | kvm_run->debug.arch.dr7 = dr; | 3083 | vcpu->run->debug.arch.dr7 = dr; |
2949 | kvm_run->debug.arch.pc = | 3084 | vcpu->run->debug.arch.pc = |
2950 | vmcs_readl(GUEST_CS_BASE) + | 3085 | vmcs_readl(GUEST_CS_BASE) + |
2951 | vmcs_readl(GUEST_RIP); | 3086 | vmcs_readl(GUEST_RIP); |
2952 | kvm_run->debug.arch.exception = DB_VECTOR; | 3087 | vcpu->run->debug.arch.exception = DB_VECTOR; |
2953 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 3088 | vcpu->run->exit_reason = KVM_EXIT_DEBUG; |
2954 | return 0; | 3089 | return 0; |
2955 | } else { | 3090 | } else { |
2956 | vcpu->arch.dr7 &= ~DR7_GD; | 3091 | vcpu->arch.dr7 &= ~DR7_GD; |
@@ -2969,14 +3104,20 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2969 | case 0 ... 3: | 3104 | case 0 ... 3: |
2970 | val = vcpu->arch.db[dr]; | 3105 | val = vcpu->arch.db[dr]; |
2971 | break; | 3106 | break; |
3107 | case 4: | ||
3108 | if (check_dr_alias(vcpu) < 0) | ||
3109 | return 1; | ||
3110 | /* fall through */ | ||
2972 | case 6: | 3111 | case 6: |
2973 | val = vcpu->arch.dr6; | 3112 | val = vcpu->arch.dr6; |
2974 | break; | 3113 | break; |
2975 | case 7: | 3114 | case 5: |
3115 | if (check_dr_alias(vcpu) < 0) | ||
3116 | return 1; | ||
3117 | /* fall through */ | ||
3118 | default: /* 7 */ | ||
2976 | val = vcpu->arch.dr7; | 3119 | val = vcpu->arch.dr7; |
2977 | break; | 3120 | break; |
2978 | default: | ||
2979 | val = 0; | ||
2980 | } | 3121 | } |
2981 | kvm_register_write(vcpu, reg, val); | 3122 | kvm_register_write(vcpu, reg, val); |
2982 | } else { | 3123 | } else { |
@@ -2987,21 +3128,25 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2987 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | 3128 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) |
2988 | vcpu->arch.eff_db[dr] = val; | 3129 | vcpu->arch.eff_db[dr] = val; |
2989 | break; | 3130 | break; |
2990 | case 4 ... 5: | 3131 | case 4: |
2991 | if (vcpu->arch.cr4 & X86_CR4_DE) | 3132 | if (check_dr_alias(vcpu) < 0) |
2992 | kvm_queue_exception(vcpu, UD_VECTOR); | 3133 | return 1; |
2993 | break; | 3134 | /* fall through */ |
2994 | case 6: | 3135 | case 6: |
2995 | if (val & 0xffffffff00000000ULL) { | 3136 | if (val & 0xffffffff00000000ULL) { |
2996 | kvm_queue_exception(vcpu, GP_VECTOR); | 3137 | kvm_inject_gp(vcpu, 0); |
2997 | break; | 3138 | return 1; |
2998 | } | 3139 | } |
2999 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | 3140 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; |
3000 | break; | 3141 | break; |
3001 | case 7: | 3142 | case 5: |
3143 | if (check_dr_alias(vcpu) < 0) | ||
3144 | return 1; | ||
3145 | /* fall through */ | ||
3146 | default: /* 7 */ | ||
3002 | if (val & 0xffffffff00000000ULL) { | 3147 | if (val & 0xffffffff00000000ULL) { |
3003 | kvm_queue_exception(vcpu, GP_VECTOR); | 3148 | kvm_inject_gp(vcpu, 0); |
3004 | break; | 3149 | return 1; |
3005 | } | 3150 | } |
3006 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | 3151 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; |
3007 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | 3152 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { |
@@ -3016,18 +3161,19 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3016 | return 1; | 3161 | return 1; |
3017 | } | 3162 | } |
3018 | 3163 | ||
3019 | static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3164 | static int handle_cpuid(struct kvm_vcpu *vcpu) |
3020 | { | 3165 | { |
3021 | kvm_emulate_cpuid(vcpu); | 3166 | kvm_emulate_cpuid(vcpu); |
3022 | return 1; | 3167 | return 1; |
3023 | } | 3168 | } |
3024 | 3169 | ||
3025 | static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3170 | static int handle_rdmsr(struct kvm_vcpu *vcpu) |
3026 | { | 3171 | { |
3027 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3172 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; |
3028 | u64 data; | 3173 | u64 data; |
3029 | 3174 | ||
3030 | if (vmx_get_msr(vcpu, ecx, &data)) { | 3175 | if (vmx_get_msr(vcpu, ecx, &data)) { |
3176 | trace_kvm_msr_read_ex(ecx); | ||
3031 | kvm_inject_gp(vcpu, 0); | 3177 | kvm_inject_gp(vcpu, 0); |
3032 | return 1; | 3178 | return 1; |
3033 | } | 3179 | } |
@@ -3041,31 +3187,29 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3041 | return 1; | 3187 | return 1; |
3042 | } | 3188 | } |
3043 | 3189 | ||
3044 | static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3190 | static int handle_wrmsr(struct kvm_vcpu *vcpu) |
3045 | { | 3191 | { |
3046 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3192 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; |
3047 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) | 3193 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) |
3048 | | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); | 3194 | | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); |
3049 | 3195 | ||
3050 | trace_kvm_msr_write(ecx, data); | ||
3051 | |||
3052 | if (vmx_set_msr(vcpu, ecx, data) != 0) { | 3196 | if (vmx_set_msr(vcpu, ecx, data) != 0) { |
3197 | trace_kvm_msr_write_ex(ecx, data); | ||
3053 | kvm_inject_gp(vcpu, 0); | 3198 | kvm_inject_gp(vcpu, 0); |
3054 | return 1; | 3199 | return 1; |
3055 | } | 3200 | } |
3056 | 3201 | ||
3202 | trace_kvm_msr_write(ecx, data); | ||
3057 | skip_emulated_instruction(vcpu); | 3203 | skip_emulated_instruction(vcpu); |
3058 | return 1; | 3204 | return 1; |
3059 | } | 3205 | } |
3060 | 3206 | ||
3061 | static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu, | 3207 | static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) |
3062 | struct kvm_run *kvm_run) | ||
3063 | { | 3208 | { |
3064 | return 1; | 3209 | return 1; |
3065 | } | 3210 | } |
3066 | 3211 | ||
3067 | static int handle_interrupt_window(struct kvm_vcpu *vcpu, | 3212 | static int handle_interrupt_window(struct kvm_vcpu *vcpu) |
3068 | struct kvm_run *kvm_run) | ||
3069 | { | 3213 | { |
3070 | u32 cpu_based_vm_exec_control; | 3214 | u32 cpu_based_vm_exec_control; |
3071 | 3215 | ||
@@ -3081,34 +3225,34 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
3081 | * possible | 3225 | * possible |
3082 | */ | 3226 | */ |
3083 | if (!irqchip_in_kernel(vcpu->kvm) && | 3227 | if (!irqchip_in_kernel(vcpu->kvm) && |
3084 | kvm_run->request_interrupt_window && | 3228 | vcpu->run->request_interrupt_window && |
3085 | !kvm_cpu_has_interrupt(vcpu)) { | 3229 | !kvm_cpu_has_interrupt(vcpu)) { |
3086 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 3230 | vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
3087 | return 0; | 3231 | return 0; |
3088 | } | 3232 | } |
3089 | return 1; | 3233 | return 1; |
3090 | } | 3234 | } |
3091 | 3235 | ||
3092 | static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3236 | static int handle_halt(struct kvm_vcpu *vcpu) |
3093 | { | 3237 | { |
3094 | skip_emulated_instruction(vcpu); | 3238 | skip_emulated_instruction(vcpu); |
3095 | return kvm_emulate_halt(vcpu); | 3239 | return kvm_emulate_halt(vcpu); |
3096 | } | 3240 | } |
3097 | 3241 | ||
3098 | static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3242 | static int handle_vmcall(struct kvm_vcpu *vcpu) |
3099 | { | 3243 | { |
3100 | skip_emulated_instruction(vcpu); | 3244 | skip_emulated_instruction(vcpu); |
3101 | kvm_emulate_hypercall(vcpu); | 3245 | kvm_emulate_hypercall(vcpu); |
3102 | return 1; | 3246 | return 1; |
3103 | } | 3247 | } |
3104 | 3248 | ||
3105 | static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3249 | static int handle_vmx_insn(struct kvm_vcpu *vcpu) |
3106 | { | 3250 | { |
3107 | kvm_queue_exception(vcpu, UD_VECTOR); | 3251 | kvm_queue_exception(vcpu, UD_VECTOR); |
3108 | return 1; | 3252 | return 1; |
3109 | } | 3253 | } |
3110 | 3254 | ||
3111 | static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3255 | static int handle_invlpg(struct kvm_vcpu *vcpu) |
3112 | { | 3256 | { |
3113 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3257 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
3114 | 3258 | ||
@@ -3117,14 +3261,14 @@ static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3117 | return 1; | 3261 | return 1; |
3118 | } | 3262 | } |
3119 | 3263 | ||
3120 | static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3264 | static int handle_wbinvd(struct kvm_vcpu *vcpu) |
3121 | { | 3265 | { |
3122 | skip_emulated_instruction(vcpu); | 3266 | skip_emulated_instruction(vcpu); |
3123 | /* TODO: Add support for VT-d/pass-through device */ | 3267 | /* TODO: Add support for VT-d/pass-through device */ |
3124 | return 1; | 3268 | return 1; |
3125 | } | 3269 | } |
3126 | 3270 | ||
3127 | static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3271 | static int handle_apic_access(struct kvm_vcpu *vcpu) |
3128 | { | 3272 | { |
3129 | unsigned long exit_qualification; | 3273 | unsigned long exit_qualification; |
3130 | enum emulation_result er; | 3274 | enum emulation_result er; |
@@ -3133,7 +3277,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3133 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3277 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
3134 | offset = exit_qualification & 0xffful; | 3278 | offset = exit_qualification & 0xffful; |
3135 | 3279 | ||
3136 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 3280 | er = emulate_instruction(vcpu, 0, 0, 0); |
3137 | 3281 | ||
3138 | if (er != EMULATE_DONE) { | 3282 | if (er != EMULATE_DONE) { |
3139 | printk(KERN_ERR | 3283 | printk(KERN_ERR |
@@ -3144,7 +3288,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3144 | return 1; | 3288 | return 1; |
3145 | } | 3289 | } |
3146 | 3290 | ||
3147 | static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3291 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
3148 | { | 3292 | { |
3149 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3293 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3150 | unsigned long exit_qualification; | 3294 | unsigned long exit_qualification; |
@@ -3198,7 +3342,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3198 | return 1; | 3342 | return 1; |
3199 | } | 3343 | } |
3200 | 3344 | ||
3201 | static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3345 | static int handle_ept_violation(struct kvm_vcpu *vcpu) |
3202 | { | 3346 | { |
3203 | unsigned long exit_qualification; | 3347 | unsigned long exit_qualification; |
3204 | gpa_t gpa; | 3348 | gpa_t gpa; |
@@ -3219,8 +3363,8 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3219 | vmcs_readl(GUEST_LINEAR_ADDRESS)); | 3363 | vmcs_readl(GUEST_LINEAR_ADDRESS)); |
3220 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", | 3364 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", |
3221 | (long unsigned int)exit_qualification); | 3365 | (long unsigned int)exit_qualification); |
3222 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3366 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; |
3223 | kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; | 3367 | vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; |
3224 | return 0; | 3368 | return 0; |
3225 | } | 3369 | } |
3226 | 3370 | ||
@@ -3290,7 +3434,7 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte, | |||
3290 | } | 3434 | } |
3291 | } | 3435 | } |
3292 | 3436 | ||
3293 | static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3437 | static int handle_ept_misconfig(struct kvm_vcpu *vcpu) |
3294 | { | 3438 | { |
3295 | u64 sptes[4]; | 3439 | u64 sptes[4]; |
3296 | int nr_sptes, i; | 3440 | int nr_sptes, i; |
@@ -3306,13 +3450,13 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3306 | for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i) | 3450 | for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i) |
3307 | ept_misconfig_inspect_spte(vcpu, sptes[i-1], i); | 3451 | ept_misconfig_inspect_spte(vcpu, sptes[i-1], i); |
3308 | 3452 | ||
3309 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3453 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; |
3310 | kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; | 3454 | vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; |
3311 | 3455 | ||
3312 | return 0; | 3456 | return 0; |
3313 | } | 3457 | } |
3314 | 3458 | ||
3315 | static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3459 | static int handle_nmi_window(struct kvm_vcpu *vcpu) |
3316 | { | 3460 | { |
3317 | u32 cpu_based_vm_exec_control; | 3461 | u32 cpu_based_vm_exec_control; |
3318 | 3462 | ||
@@ -3325,36 +3469,55 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3325 | return 1; | 3469 | return 1; |
3326 | } | 3470 | } |
3327 | 3471 | ||
3328 | static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | 3472 | static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) |
3329 | struct kvm_run *kvm_run) | ||
3330 | { | 3473 | { |
3331 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3474 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3332 | enum emulation_result err = EMULATE_DONE; | 3475 | enum emulation_result err = EMULATE_DONE; |
3333 | 3476 | int ret = 1; | |
3334 | local_irq_enable(); | ||
3335 | preempt_enable(); | ||
3336 | 3477 | ||
3337 | while (!guest_state_valid(vcpu)) { | 3478 | while (!guest_state_valid(vcpu)) { |
3338 | err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 3479 | err = emulate_instruction(vcpu, 0, 0, 0); |
3339 | 3480 | ||
3340 | if (err == EMULATE_DO_MMIO) | 3481 | if (err == EMULATE_DO_MMIO) { |
3341 | break; | 3482 | ret = 0; |
3483 | goto out; | ||
3484 | } | ||
3342 | 3485 | ||
3343 | if (err != EMULATE_DONE) { | 3486 | if (err != EMULATE_DONE) { |
3344 | kvm_report_emulation_failure(vcpu, "emulation failure"); | 3487 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
3345 | break; | 3488 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
3489 | vcpu->run->internal.ndata = 0; | ||
3490 | ret = 0; | ||
3491 | goto out; | ||
3346 | } | 3492 | } |
3347 | 3493 | ||
3348 | if (signal_pending(current)) | 3494 | if (signal_pending(current)) |
3349 | break; | 3495 | goto out; |
3350 | if (need_resched()) | 3496 | if (need_resched()) |
3351 | schedule(); | 3497 | schedule(); |
3352 | } | 3498 | } |
3353 | 3499 | ||
3354 | preempt_disable(); | 3500 | vmx->emulation_required = 0; |
3355 | local_irq_disable(); | 3501 | out: |
3502 | return ret; | ||
3503 | } | ||
3504 | |||
3505 | /* | ||
3506 | * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE | ||
3507 | * exiting, so only get here on cpu with PAUSE-Loop-Exiting. | ||
3508 | */ | ||
3509 | static int handle_pause(struct kvm_vcpu *vcpu) | ||
3510 | { | ||
3511 | skip_emulated_instruction(vcpu); | ||
3512 | kvm_vcpu_on_spin(vcpu); | ||
3513 | |||
3514 | return 1; | ||
3515 | } | ||
3356 | 3516 | ||
3357 | vmx->invalid_state_emulation_result = err; | 3517 | static int handle_invalid_op(struct kvm_vcpu *vcpu) |
3518 | { | ||
3519 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3520 | return 1; | ||
3358 | } | 3521 | } |
3359 | 3522 | ||
3360 | /* | 3523 | /* |
@@ -3362,8 +3525,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | |||
3362 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 3525 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
3363 | * to be done to userspace and return 0. | 3526 | * to be done to userspace and return 0. |
3364 | */ | 3527 | */ |
3365 | static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | 3528 | static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { |
3366 | struct kvm_run *kvm_run) = { | ||
3367 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception, | 3529 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception, |
3368 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, | 3530 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, |
3369 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, | 3531 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, |
@@ -3394,6 +3556,9 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
3394 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, | 3556 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, |
3395 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | 3557 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, |
3396 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, | 3558 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, |
3559 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, | ||
3560 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, | ||
3561 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, | ||
3397 | }; | 3562 | }; |
3398 | 3563 | ||
3399 | static const int kvm_vmx_max_exit_handlers = | 3564 | static const int kvm_vmx_max_exit_handlers = |
@@ -3403,7 +3568,7 @@ static const int kvm_vmx_max_exit_handlers = | |||
3403 | * The guest has exited. See if we can fix it or if we need userspace | 3568 | * The guest has exited. See if we can fix it or if we need userspace |
3404 | * assistance. | 3569 | * assistance. |
3405 | */ | 3570 | */ |
3406 | static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 3571 | static int vmx_handle_exit(struct kvm_vcpu *vcpu) |
3407 | { | 3572 | { |
3408 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3573 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3409 | u32 exit_reason = vmx->exit_reason; | 3574 | u32 exit_reason = vmx->exit_reason; |
@@ -3411,13 +3576,9 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3411 | 3576 | ||
3412 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); | 3577 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); |
3413 | 3578 | ||
3414 | /* If we need to emulate an MMIO from handle_invalid_guest_state | 3579 | /* If guest state is invalid, start emulating */ |
3415 | * we just return 0 */ | 3580 | if (vmx->emulation_required && emulate_invalid_guest_state) |
3416 | if (vmx->emulation_required && emulate_invalid_guest_state) { | 3581 | return handle_invalid_guest_state(vcpu); |
3417 | if (guest_state_valid(vcpu)) | ||
3418 | vmx->emulation_required = 0; | ||
3419 | return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO; | ||
3420 | } | ||
3421 | 3582 | ||
3422 | /* Access CR3 don't cause VMExit in paging mode, so we need | 3583 | /* Access CR3 don't cause VMExit in paging mode, so we need |
3423 | * to sync with guest real CR3. */ | 3584 | * to sync with guest real CR3. */ |
@@ -3425,8 +3586,8 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3425 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | 3586 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); |
3426 | 3587 | ||
3427 | if (unlikely(vmx->fail)) { | 3588 | if (unlikely(vmx->fail)) { |
3428 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 3589 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
3429 | kvm_run->fail_entry.hardware_entry_failure_reason | 3590 | vcpu->run->fail_entry.hardware_entry_failure_reason |
3430 | = vmcs_read32(VM_INSTRUCTION_ERROR); | 3591 | = vmcs_read32(VM_INSTRUCTION_ERROR); |
3431 | return 0; | 3592 | return 0; |
3432 | } | 3593 | } |
@@ -3459,10 +3620,10 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3459 | 3620 | ||
3460 | if (exit_reason < kvm_vmx_max_exit_handlers | 3621 | if (exit_reason < kvm_vmx_max_exit_handlers |
3461 | && kvm_vmx_exit_handlers[exit_reason]) | 3622 | && kvm_vmx_exit_handlers[exit_reason]) |
3462 | return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); | 3623 | return kvm_vmx_exit_handlers[exit_reason](vcpu); |
3463 | else { | 3624 | else { |
3464 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3625 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; |
3465 | kvm_run->hw.hardware_exit_reason = exit_reason; | 3626 | vcpu->run->hw.hardware_exit_reason = exit_reason; |
3466 | } | 3627 | } |
3467 | return 0; | 3628 | return 0; |
3468 | } | 3629 | } |
@@ -3600,23 +3761,18 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) | |||
3600 | #define Q "l" | 3761 | #define Q "l" |
3601 | #endif | 3762 | #endif |
3602 | 3763 | ||
3603 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3764 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu) |
3604 | { | 3765 | { |
3605 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3766 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3606 | 3767 | ||
3607 | if (enable_ept && is_paging(vcpu)) { | ||
3608 | vmcs_writel(GUEST_CR3, vcpu->arch.cr3); | ||
3609 | ept_load_pdptrs(vcpu); | ||
3610 | } | ||
3611 | /* Record the guest's net vcpu time for enforced NMI injections. */ | 3768 | /* Record the guest's net vcpu time for enforced NMI injections. */ |
3612 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) | 3769 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) |
3613 | vmx->entry_time = ktime_get(); | 3770 | vmx->entry_time = ktime_get(); |
3614 | 3771 | ||
3615 | /* Handle invalid guest state instead of entering VMX */ | 3772 | /* Don't enter VMX if guest state is invalid, let the exit handler |
3616 | if (vmx->emulation_required && emulate_invalid_guest_state) { | 3773 | start emulation until we arrive back to a valid state */ |
3617 | handle_invalid_guest_state(vcpu, kvm_run); | 3774 | if (vmx->emulation_required && emulate_invalid_guest_state) |
3618 | return; | 3775 | return; |
3619 | } | ||
3620 | 3776 | ||
3621 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) | 3777 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) |
3622 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); | 3778 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); |
@@ -3636,9 +3792,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3636 | */ | 3792 | */ |
3637 | vmcs_writel(HOST_CR0, read_cr0()); | 3793 | vmcs_writel(HOST_CR0, read_cr0()); |
3638 | 3794 | ||
3639 | if (vcpu->arch.switch_db_regs) | ||
3640 | set_debugreg(vcpu->arch.dr6, 6); | ||
3641 | |||
3642 | asm( | 3795 | asm( |
3643 | /* Store host registers */ | 3796 | /* Store host registers */ |
3644 | "push %%"R"dx; push %%"R"bp;" | 3797 | "push %%"R"dx; push %%"R"bp;" |
@@ -3739,9 +3892,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3739 | | (1 << VCPU_EXREG_PDPTR)); | 3892 | | (1 << VCPU_EXREG_PDPTR)); |
3740 | vcpu->arch.regs_dirty = 0; | 3893 | vcpu->arch.regs_dirty = 0; |
3741 | 3894 | ||
3742 | if (vcpu->arch.switch_db_regs) | ||
3743 | get_debugreg(vcpu->arch.dr6, 6); | ||
3744 | |||
3745 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 3895 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
3746 | if (vmx->rmode.irq.pending) | 3896 | if (vmx->rmode.irq.pending) |
3747 | fixup_rmode_irq(vmx); | 3897 | fixup_rmode_irq(vmx); |
@@ -3775,7 +3925,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
3775 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | 3925 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); |
3776 | spin_unlock(&vmx_vpid_lock); | 3926 | spin_unlock(&vmx_vpid_lock); |
3777 | vmx_free_vmcs(vcpu); | 3927 | vmx_free_vmcs(vcpu); |
3778 | kfree(vmx->host_msrs); | ||
3779 | kfree(vmx->guest_msrs); | 3928 | kfree(vmx->guest_msrs); |
3780 | kvm_vcpu_uninit(vcpu); | 3929 | kvm_vcpu_uninit(vcpu); |
3781 | kmem_cache_free(kvm_vcpu_cache, vmx); | 3930 | kmem_cache_free(kvm_vcpu_cache, vmx); |
@@ -3802,10 +3951,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
3802 | goto uninit_vcpu; | 3951 | goto uninit_vcpu; |
3803 | } | 3952 | } |
3804 | 3953 | ||
3805 | vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
3806 | if (!vmx->host_msrs) | ||
3807 | goto free_guest_msrs; | ||
3808 | |||
3809 | vmx->vmcs = alloc_vmcs(); | 3954 | vmx->vmcs = alloc_vmcs(); |
3810 | if (!vmx->vmcs) | 3955 | if (!vmx->vmcs) |
3811 | goto free_msrs; | 3956 | goto free_msrs; |
@@ -3836,8 +3981,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
3836 | free_vmcs: | 3981 | free_vmcs: |
3837 | free_vmcs(vmx->vmcs); | 3982 | free_vmcs(vmx->vmcs); |
3838 | free_msrs: | 3983 | free_msrs: |
3839 | kfree(vmx->host_msrs); | ||
3840 | free_guest_msrs: | ||
3841 | kfree(vmx->guest_msrs); | 3984 | kfree(vmx->guest_msrs); |
3842 | uninit_vcpu: | 3985 | uninit_vcpu: |
3843 | kvm_vcpu_uninit(&vmx->vcpu); | 3986 | kvm_vcpu_uninit(&vmx->vcpu); |
@@ -3877,7 +4020,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
3877 | * b. VT-d with snooping control feature: snooping control feature of | 4020 | * b. VT-d with snooping control feature: snooping control feature of |
3878 | * VT-d engine can guarantee the cache correctness. Just set it | 4021 | * VT-d engine can guarantee the cache correctness. Just set it |
3879 | * to WB to keep consistent with host. So the same as item 3. | 4022 | * to WB to keep consistent with host. So the same as item 3. |
3880 | * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep | 4023 | * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep |
3881 | * consistent with host MTRR | 4024 | * consistent with host MTRR |
3882 | */ | 4025 | */ |
3883 | if (is_mmio) | 4026 | if (is_mmio) |
@@ -3888,37 +4031,88 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
3888 | VMX_EPT_MT_EPTE_SHIFT; | 4031 | VMX_EPT_MT_EPTE_SHIFT; |
3889 | else | 4032 | else |
3890 | ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | 4033 | ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) |
3891 | | VMX_EPT_IGMT_BIT; | 4034 | | VMX_EPT_IPAT_BIT; |
3892 | 4035 | ||
3893 | return ret; | 4036 | return ret; |
3894 | } | 4037 | } |
3895 | 4038 | ||
4039 | #define _ER(x) { EXIT_REASON_##x, #x } | ||
4040 | |||
3896 | static const struct trace_print_flags vmx_exit_reasons_str[] = { | 4041 | static const struct trace_print_flags vmx_exit_reasons_str[] = { |
3897 | { EXIT_REASON_EXCEPTION_NMI, "exception" }, | 4042 | _ER(EXCEPTION_NMI), |
3898 | { EXIT_REASON_EXTERNAL_INTERRUPT, "ext_irq" }, | 4043 | _ER(EXTERNAL_INTERRUPT), |
3899 | { EXIT_REASON_TRIPLE_FAULT, "triple_fault" }, | 4044 | _ER(TRIPLE_FAULT), |
3900 | { EXIT_REASON_NMI_WINDOW, "nmi_window" }, | 4045 | _ER(PENDING_INTERRUPT), |
3901 | { EXIT_REASON_IO_INSTRUCTION, "io_instruction" }, | 4046 | _ER(NMI_WINDOW), |
3902 | { EXIT_REASON_CR_ACCESS, "cr_access" }, | 4047 | _ER(TASK_SWITCH), |
3903 | { EXIT_REASON_DR_ACCESS, "dr_access" }, | 4048 | _ER(CPUID), |
3904 | { EXIT_REASON_CPUID, "cpuid" }, | 4049 | _ER(HLT), |
3905 | { EXIT_REASON_MSR_READ, "rdmsr" }, | 4050 | _ER(INVLPG), |
3906 | { EXIT_REASON_MSR_WRITE, "wrmsr" }, | 4051 | _ER(RDPMC), |
3907 | { EXIT_REASON_PENDING_INTERRUPT, "interrupt_window" }, | 4052 | _ER(RDTSC), |
3908 | { EXIT_REASON_HLT, "halt" }, | 4053 | _ER(VMCALL), |
3909 | { EXIT_REASON_INVLPG, "invlpg" }, | 4054 | _ER(VMCLEAR), |
3910 | { EXIT_REASON_VMCALL, "hypercall" }, | 4055 | _ER(VMLAUNCH), |
3911 | { EXIT_REASON_TPR_BELOW_THRESHOLD, "tpr_below_thres" }, | 4056 | _ER(VMPTRLD), |
3912 | { EXIT_REASON_APIC_ACCESS, "apic_access" }, | 4057 | _ER(VMPTRST), |
3913 | { EXIT_REASON_WBINVD, "wbinvd" }, | 4058 | _ER(VMREAD), |
3914 | { EXIT_REASON_TASK_SWITCH, "task_switch" }, | 4059 | _ER(VMRESUME), |
3915 | { EXIT_REASON_EPT_VIOLATION, "ept_violation" }, | 4060 | _ER(VMWRITE), |
4061 | _ER(VMOFF), | ||
4062 | _ER(VMON), | ||
4063 | _ER(CR_ACCESS), | ||
4064 | _ER(DR_ACCESS), | ||
4065 | _ER(IO_INSTRUCTION), | ||
4066 | _ER(MSR_READ), | ||
4067 | _ER(MSR_WRITE), | ||
4068 | _ER(MWAIT_INSTRUCTION), | ||
4069 | _ER(MONITOR_INSTRUCTION), | ||
4070 | _ER(PAUSE_INSTRUCTION), | ||
4071 | _ER(MCE_DURING_VMENTRY), | ||
4072 | _ER(TPR_BELOW_THRESHOLD), | ||
4073 | _ER(APIC_ACCESS), | ||
4074 | _ER(EPT_VIOLATION), | ||
4075 | _ER(EPT_MISCONFIG), | ||
4076 | _ER(WBINVD), | ||
3916 | { -1, NULL } | 4077 | { -1, NULL } |
3917 | }; | 4078 | }; |
3918 | 4079 | ||
3919 | static bool vmx_gb_page_enable(void) | 4080 | #undef _ER |
4081 | |||
4082 | static int vmx_get_lpage_level(void) | ||
3920 | { | 4083 | { |
3921 | return false; | 4084 | if (enable_ept && !cpu_has_vmx_ept_1g_page()) |
4085 | return PT_DIRECTORY_LEVEL; | ||
4086 | else | ||
4087 | /* For shadow and EPT supported 1GB page */ | ||
4088 | return PT_PDPE_LEVEL; | ||
4089 | } | ||
4090 | |||
4091 | static inline u32 bit(int bitno) | ||
4092 | { | ||
4093 | return 1 << (bitno & 31); | ||
4094 | } | ||
4095 | |||
4096 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | ||
4097 | { | ||
4098 | struct kvm_cpuid_entry2 *best; | ||
4099 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4100 | u32 exec_control; | ||
4101 | |||
4102 | vmx->rdtscp_enabled = false; | ||
4103 | if (vmx_rdtscp_supported()) { | ||
4104 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
4105 | if (exec_control & SECONDARY_EXEC_RDTSCP) { | ||
4106 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
4107 | if (best && (best->edx & bit(X86_FEATURE_RDTSCP))) | ||
4108 | vmx->rdtscp_enabled = true; | ||
4109 | else { | ||
4110 | exec_control &= ~SECONDARY_EXEC_RDTSCP; | ||
4111 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | ||
4112 | exec_control); | ||
4113 | } | ||
4114 | } | ||
4115 | } | ||
3922 | } | 4116 | } |
3923 | 4117 | ||
3924 | static struct kvm_x86_ops vmx_x86_ops = { | 4118 | static struct kvm_x86_ops vmx_x86_ops = { |
@@ -3947,6 +4141,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3947 | .set_segment = vmx_set_segment, | 4141 | .set_segment = vmx_set_segment, |
3948 | .get_cpl = vmx_get_cpl, | 4142 | .get_cpl = vmx_get_cpl, |
3949 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, | 4143 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, |
4144 | .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, | ||
3950 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, | 4145 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, |
3951 | .set_cr0 = vmx_set_cr0, | 4146 | .set_cr0 = vmx_set_cr0, |
3952 | .set_cr3 = vmx_set_cr3, | 4147 | .set_cr3 = vmx_set_cr3, |
@@ -3959,6 +4154,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3959 | .cache_reg = vmx_cache_reg, | 4154 | .cache_reg = vmx_cache_reg, |
3960 | .get_rflags = vmx_get_rflags, | 4155 | .get_rflags = vmx_get_rflags, |
3961 | .set_rflags = vmx_set_rflags, | 4156 | .set_rflags = vmx_set_rflags, |
4157 | .fpu_activate = vmx_fpu_activate, | ||
4158 | .fpu_deactivate = vmx_fpu_deactivate, | ||
3962 | 4159 | ||
3963 | .tlb_flush = vmx_flush_tlb, | 4160 | .tlb_flush = vmx_flush_tlb, |
3964 | 4161 | ||
@@ -3973,6 +4170,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3973 | .queue_exception = vmx_queue_exception, | 4170 | .queue_exception = vmx_queue_exception, |
3974 | .interrupt_allowed = vmx_interrupt_allowed, | 4171 | .interrupt_allowed = vmx_interrupt_allowed, |
3975 | .nmi_allowed = vmx_nmi_allowed, | 4172 | .nmi_allowed = vmx_nmi_allowed, |
4173 | .get_nmi_mask = vmx_get_nmi_mask, | ||
4174 | .set_nmi_mask = vmx_set_nmi_mask, | ||
3976 | .enable_nmi_window = enable_nmi_window, | 4175 | .enable_nmi_window = enable_nmi_window, |
3977 | .enable_irq_window = enable_irq_window, | 4176 | .enable_irq_window = enable_irq_window, |
3978 | .update_cr8_intercept = update_cr8_intercept, | 4177 | .update_cr8_intercept = update_cr8_intercept, |
@@ -3982,12 +4181,21 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3982 | .get_mt_mask = vmx_get_mt_mask, | 4181 | .get_mt_mask = vmx_get_mt_mask, |
3983 | 4182 | ||
3984 | .exit_reasons_str = vmx_exit_reasons_str, | 4183 | .exit_reasons_str = vmx_exit_reasons_str, |
3985 | .gb_page_enable = vmx_gb_page_enable, | 4184 | .get_lpage_level = vmx_get_lpage_level, |
4185 | |||
4186 | .cpuid_update = vmx_cpuid_update, | ||
4187 | |||
4188 | .rdtscp_supported = vmx_rdtscp_supported, | ||
3986 | }; | 4189 | }; |
3987 | 4190 | ||
3988 | static int __init vmx_init(void) | 4191 | static int __init vmx_init(void) |
3989 | { | 4192 | { |
3990 | int r; | 4193 | int r, i; |
4194 | |||
4195 | rdmsrl_safe(MSR_EFER, &host_efer); | ||
4196 | |||
4197 | for (i = 0; i < NR_VMX_MSR; ++i) | ||
4198 | kvm_define_shared_msr(i, vmx_msr_index[i]); | ||
3991 | 4199 | ||
3992 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); | 4200 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); |
3993 | if (!vmx_io_bitmap_a) | 4201 | if (!vmx_io_bitmap_a) |
@@ -4049,8 +4257,6 @@ static int __init vmx_init(void) | |||
4049 | if (bypass_guest_pf) | 4257 | if (bypass_guest_pf) |
4050 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); | 4258 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); |
4051 | 4259 | ||
4052 | ept_sync_global(); | ||
4053 | |||
4054 | return 0; | 4260 | return 0; |
4055 | 4261 | ||
4056 | out3: | 4262 | out3: |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ae07d261527c..c4f35b545c1d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -37,11 +37,15 @@ | |||
37 | #include <linux/iommu.h> | 37 | #include <linux/iommu.h> |
38 | #include <linux/intel-iommu.h> | 38 | #include <linux/intel-iommu.h> |
39 | #include <linux/cpufreq.h> | 39 | #include <linux/cpufreq.h> |
40 | #include <linux/user-return-notifier.h> | ||
41 | #include <linux/srcu.h> | ||
42 | #include <linux/slab.h> | ||
40 | #include <trace/events/kvm.h> | 43 | #include <trace/events/kvm.h> |
41 | #undef TRACE_INCLUDE_FILE | 44 | #undef TRACE_INCLUDE_FILE |
42 | #define CREATE_TRACE_POINTS | 45 | #define CREATE_TRACE_POINTS |
43 | #include "trace.h" | 46 | #include "trace.h" |
44 | 47 | ||
48 | #include <asm/debugreg.h> | ||
45 | #include <asm/uaccess.h> | 49 | #include <asm/uaccess.h> |
46 | #include <asm/msr.h> | 50 | #include <asm/msr.h> |
47 | #include <asm/desc.h> | 51 | #include <asm/desc.h> |
@@ -87,6 +91,25 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); | |||
87 | int ignore_msrs = 0; | 91 | int ignore_msrs = 0; |
88 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); | 92 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); |
89 | 93 | ||
94 | #define KVM_NR_SHARED_MSRS 16 | ||
95 | |||
96 | struct kvm_shared_msrs_global { | ||
97 | int nr; | ||
98 | u32 msrs[KVM_NR_SHARED_MSRS]; | ||
99 | }; | ||
100 | |||
101 | struct kvm_shared_msrs { | ||
102 | struct user_return_notifier urn; | ||
103 | bool registered; | ||
104 | struct kvm_shared_msr_values { | ||
105 | u64 host; | ||
106 | u64 curr; | ||
107 | } values[KVM_NR_SHARED_MSRS]; | ||
108 | }; | ||
109 | |||
110 | static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; | ||
111 | static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs); | ||
112 | |||
90 | struct kvm_stats_debugfs_item debugfs_entries[] = { | 113 | struct kvm_stats_debugfs_item debugfs_entries[] = { |
91 | { "pf_fixed", VCPU_STAT(pf_fixed) }, | 114 | { "pf_fixed", VCPU_STAT(pf_fixed) }, |
92 | { "pf_guest", VCPU_STAT(pf_guest) }, | 115 | { "pf_guest", VCPU_STAT(pf_guest) }, |
@@ -123,6 +146,83 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
123 | { NULL } | 146 | { NULL } |
124 | }; | 147 | }; |
125 | 148 | ||
149 | static void kvm_on_user_return(struct user_return_notifier *urn) | ||
150 | { | ||
151 | unsigned slot; | ||
152 | struct kvm_shared_msrs *locals | ||
153 | = container_of(urn, struct kvm_shared_msrs, urn); | ||
154 | struct kvm_shared_msr_values *values; | ||
155 | |||
156 | for (slot = 0; slot < shared_msrs_global.nr; ++slot) { | ||
157 | values = &locals->values[slot]; | ||
158 | if (values->host != values->curr) { | ||
159 | wrmsrl(shared_msrs_global.msrs[slot], values->host); | ||
160 | values->curr = values->host; | ||
161 | } | ||
162 | } | ||
163 | locals->registered = false; | ||
164 | user_return_notifier_unregister(urn); | ||
165 | } | ||
166 | |||
167 | static void shared_msr_update(unsigned slot, u32 msr) | ||
168 | { | ||
169 | struct kvm_shared_msrs *smsr; | ||
170 | u64 value; | ||
171 | |||
172 | smsr = &__get_cpu_var(shared_msrs); | ||
173 | /* only read, and nobody should modify it at this time, | ||
174 | * so don't need lock */ | ||
175 | if (slot >= shared_msrs_global.nr) { | ||
176 | printk(KERN_ERR "kvm: invalid MSR slot!"); | ||
177 | return; | ||
178 | } | ||
179 | rdmsrl_safe(msr, &value); | ||
180 | smsr->values[slot].host = value; | ||
181 | smsr->values[slot].curr = value; | ||
182 | } | ||
183 | |||
184 | void kvm_define_shared_msr(unsigned slot, u32 msr) | ||
185 | { | ||
186 | if (slot >= shared_msrs_global.nr) | ||
187 | shared_msrs_global.nr = slot + 1; | ||
188 | shared_msrs_global.msrs[slot] = msr; | ||
189 | /* we need ensured the shared_msr_global have been updated */ | ||
190 | smp_wmb(); | ||
191 | } | ||
192 | EXPORT_SYMBOL_GPL(kvm_define_shared_msr); | ||
193 | |||
194 | static void kvm_shared_msr_cpu_online(void) | ||
195 | { | ||
196 | unsigned i; | ||
197 | |||
198 | for (i = 0; i < shared_msrs_global.nr; ++i) | ||
199 | shared_msr_update(i, shared_msrs_global.msrs[i]); | ||
200 | } | ||
201 | |||
202 | void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) | ||
203 | { | ||
204 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); | ||
205 | |||
206 | if (((value ^ smsr->values[slot].curr) & mask) == 0) | ||
207 | return; | ||
208 | smsr->values[slot].curr = value; | ||
209 | wrmsrl(shared_msrs_global.msrs[slot], value); | ||
210 | if (!smsr->registered) { | ||
211 | smsr->urn.on_user_return = kvm_on_user_return; | ||
212 | user_return_notifier_register(&smsr->urn); | ||
213 | smsr->registered = true; | ||
214 | } | ||
215 | } | ||
216 | EXPORT_SYMBOL_GPL(kvm_set_shared_msr); | ||
217 | |||
218 | static void drop_user_return_notifiers(void *ignore) | ||
219 | { | ||
220 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); | ||
221 | |||
222 | if (smsr->registered) | ||
223 | kvm_on_user_return(&smsr->urn); | ||
224 | } | ||
225 | |||
126 | unsigned long segment_base(u16 selector) | 226 | unsigned long segment_base(u16 selector) |
127 | { | 227 | { |
128 | struct descriptor_table gdt; | 228 | struct descriptor_table gdt; |
@@ -170,12 +270,68 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) | |||
170 | } | 270 | } |
171 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); | 271 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); |
172 | 272 | ||
273 | #define EXCPT_BENIGN 0 | ||
274 | #define EXCPT_CONTRIBUTORY 1 | ||
275 | #define EXCPT_PF 2 | ||
276 | |||
277 | static int exception_class(int vector) | ||
278 | { | ||
279 | switch (vector) { | ||
280 | case PF_VECTOR: | ||
281 | return EXCPT_PF; | ||
282 | case DE_VECTOR: | ||
283 | case TS_VECTOR: | ||
284 | case NP_VECTOR: | ||
285 | case SS_VECTOR: | ||
286 | case GP_VECTOR: | ||
287 | return EXCPT_CONTRIBUTORY; | ||
288 | default: | ||
289 | break; | ||
290 | } | ||
291 | return EXCPT_BENIGN; | ||
292 | } | ||
293 | |||
294 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | ||
295 | unsigned nr, bool has_error, u32 error_code) | ||
296 | { | ||
297 | u32 prev_nr; | ||
298 | int class1, class2; | ||
299 | |||
300 | if (!vcpu->arch.exception.pending) { | ||
301 | queue: | ||
302 | vcpu->arch.exception.pending = true; | ||
303 | vcpu->arch.exception.has_error_code = has_error; | ||
304 | vcpu->arch.exception.nr = nr; | ||
305 | vcpu->arch.exception.error_code = error_code; | ||
306 | return; | ||
307 | } | ||
308 | |||
309 | /* to check exception */ | ||
310 | prev_nr = vcpu->arch.exception.nr; | ||
311 | if (prev_nr == DF_VECTOR) { | ||
312 | /* triple fault -> shutdown */ | ||
313 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | ||
314 | return; | ||
315 | } | ||
316 | class1 = exception_class(prev_nr); | ||
317 | class2 = exception_class(nr); | ||
318 | if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) | ||
319 | || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { | ||
320 | /* generate double fault per SDM Table 5-5 */ | ||
321 | vcpu->arch.exception.pending = true; | ||
322 | vcpu->arch.exception.has_error_code = true; | ||
323 | vcpu->arch.exception.nr = DF_VECTOR; | ||
324 | vcpu->arch.exception.error_code = 0; | ||
325 | } else | ||
326 | /* replace previous exception with a new one in a hope | ||
327 | that instruction re-execution will regenerate lost | ||
328 | exception */ | ||
329 | goto queue; | ||
330 | } | ||
331 | |||
173 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) | 332 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) |
174 | { | 333 | { |
175 | WARN_ON(vcpu->arch.exception.pending); | 334 | kvm_multiple_exception(vcpu, nr, false, 0); |
176 | vcpu->arch.exception.pending = true; | ||
177 | vcpu->arch.exception.has_error_code = false; | ||
178 | vcpu->arch.exception.nr = nr; | ||
179 | } | 335 | } |
180 | EXPORT_SYMBOL_GPL(kvm_queue_exception); | 336 | EXPORT_SYMBOL_GPL(kvm_queue_exception); |
181 | 337 | ||
@@ -183,25 +339,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, | |||
183 | u32 error_code) | 339 | u32 error_code) |
184 | { | 340 | { |
185 | ++vcpu->stat.pf_guest; | 341 | ++vcpu->stat.pf_guest; |
186 | |||
187 | if (vcpu->arch.exception.pending) { | ||
188 | switch(vcpu->arch.exception.nr) { | ||
189 | case DF_VECTOR: | ||
190 | /* triple fault -> shutdown */ | ||
191 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | ||
192 | return; | ||
193 | case PF_VECTOR: | ||
194 | vcpu->arch.exception.nr = DF_VECTOR; | ||
195 | vcpu->arch.exception.error_code = 0; | ||
196 | return; | ||
197 | default: | ||
198 | /* replace previous exception with a new one in a hope | ||
199 | that instruction re-execution will regenerate lost | ||
200 | exception */ | ||
201 | vcpu->arch.exception.pending = false; | ||
202 | break; | ||
203 | } | ||
204 | } | ||
205 | vcpu->arch.cr2 = addr; | 342 | vcpu->arch.cr2 = addr; |
206 | kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); | 343 | kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); |
207 | } | 344 | } |
@@ -214,11 +351,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi); | |||
214 | 351 | ||
215 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) | 352 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) |
216 | { | 353 | { |
217 | WARN_ON(vcpu->arch.exception.pending); | 354 | kvm_multiple_exception(vcpu, nr, true, error_code); |
218 | vcpu->arch.exception.pending = true; | ||
219 | vcpu->arch.exception.has_error_code = true; | ||
220 | vcpu->arch.exception.nr = nr; | ||
221 | vcpu->arch.exception.error_code = error_code; | ||
222 | } | 355 | } |
223 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); | 356 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); |
224 | 357 | ||
@@ -296,41 +429,38 @@ out: | |||
296 | 429 | ||
297 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 430 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
298 | { | 431 | { |
299 | if (cr0 & CR0_RESERVED_BITS) { | 432 | cr0 |= X86_CR0_ET; |
300 | printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", | 433 | |
301 | cr0, vcpu->arch.cr0); | 434 | #ifdef CONFIG_X86_64 |
435 | if (cr0 & 0xffffffff00000000UL) { | ||
302 | kvm_inject_gp(vcpu, 0); | 436 | kvm_inject_gp(vcpu, 0); |
303 | return; | 437 | return; |
304 | } | 438 | } |
439 | #endif | ||
440 | |||
441 | cr0 &= ~CR0_RESERVED_BITS; | ||
305 | 442 | ||
306 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { | 443 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { |
307 | printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); | ||
308 | kvm_inject_gp(vcpu, 0); | 444 | kvm_inject_gp(vcpu, 0); |
309 | return; | 445 | return; |
310 | } | 446 | } |
311 | 447 | ||
312 | if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { | 448 | if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { |
313 | printk(KERN_DEBUG "set_cr0: #GP, set PG flag " | ||
314 | "and a clear PE flag\n"); | ||
315 | kvm_inject_gp(vcpu, 0); | 449 | kvm_inject_gp(vcpu, 0); |
316 | return; | 450 | return; |
317 | } | 451 | } |
318 | 452 | ||
319 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 453 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
320 | #ifdef CONFIG_X86_64 | 454 | #ifdef CONFIG_X86_64 |
321 | if ((vcpu->arch.shadow_efer & EFER_LME)) { | 455 | if ((vcpu->arch.efer & EFER_LME)) { |
322 | int cs_db, cs_l; | 456 | int cs_db, cs_l; |
323 | 457 | ||
324 | if (!is_pae(vcpu)) { | 458 | if (!is_pae(vcpu)) { |
325 | printk(KERN_DEBUG "set_cr0: #GP, start paging " | ||
326 | "in long mode while PAE is disabled\n"); | ||
327 | kvm_inject_gp(vcpu, 0); | 459 | kvm_inject_gp(vcpu, 0); |
328 | return; | 460 | return; |
329 | } | 461 | } |
330 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 462 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
331 | if (cs_l) { | 463 | if (cs_l) { |
332 | printk(KERN_DEBUG "set_cr0: #GP, start paging " | ||
333 | "in long mode while CS.L == 1\n"); | ||
334 | kvm_inject_gp(vcpu, 0); | 464 | kvm_inject_gp(vcpu, 0); |
335 | return; | 465 | return; |
336 | 466 | ||
@@ -338,8 +468,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
338 | } else | 468 | } else |
339 | #endif | 469 | #endif |
340 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { | 470 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { |
341 | printk(KERN_DEBUG "set_cr0: #GP, pdptrs " | ||
342 | "reserved bits\n"); | ||
343 | kvm_inject_gp(vcpu, 0); | 471 | kvm_inject_gp(vcpu, 0); |
344 | return; | 472 | return; |
345 | } | 473 | } |
@@ -356,38 +484,33 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0); | |||
356 | 484 | ||
357 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | 485 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) |
358 | { | 486 | { |
359 | kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); | 487 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f)); |
360 | } | 488 | } |
361 | EXPORT_SYMBOL_GPL(kvm_lmsw); | 489 | EXPORT_SYMBOL_GPL(kvm_lmsw); |
362 | 490 | ||
363 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 491 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
364 | { | 492 | { |
365 | unsigned long old_cr4 = vcpu->arch.cr4; | 493 | unsigned long old_cr4 = kvm_read_cr4(vcpu); |
366 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; | 494 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; |
367 | 495 | ||
368 | if (cr4 & CR4_RESERVED_BITS) { | 496 | if (cr4 & CR4_RESERVED_BITS) { |
369 | printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); | ||
370 | kvm_inject_gp(vcpu, 0); | 497 | kvm_inject_gp(vcpu, 0); |
371 | return; | 498 | return; |
372 | } | 499 | } |
373 | 500 | ||
374 | if (is_long_mode(vcpu)) { | 501 | if (is_long_mode(vcpu)) { |
375 | if (!(cr4 & X86_CR4_PAE)) { | 502 | if (!(cr4 & X86_CR4_PAE)) { |
376 | printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " | ||
377 | "in long mode\n"); | ||
378 | kvm_inject_gp(vcpu, 0); | 503 | kvm_inject_gp(vcpu, 0); |
379 | return; | 504 | return; |
380 | } | 505 | } |
381 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) | 506 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) |
382 | && ((cr4 ^ old_cr4) & pdptr_bits) | 507 | && ((cr4 ^ old_cr4) & pdptr_bits) |
383 | && !load_pdptrs(vcpu, vcpu->arch.cr3)) { | 508 | && !load_pdptrs(vcpu, vcpu->arch.cr3)) { |
384 | printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); | ||
385 | kvm_inject_gp(vcpu, 0); | 509 | kvm_inject_gp(vcpu, 0); |
386 | return; | 510 | return; |
387 | } | 511 | } |
388 | 512 | ||
389 | if (cr4 & X86_CR4_VMXE) { | 513 | if (cr4 & X86_CR4_VMXE) { |
390 | printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); | ||
391 | kvm_inject_gp(vcpu, 0); | 514 | kvm_inject_gp(vcpu, 0); |
392 | return; | 515 | return; |
393 | } | 516 | } |
@@ -408,21 +531,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
408 | 531 | ||
409 | if (is_long_mode(vcpu)) { | 532 | if (is_long_mode(vcpu)) { |
410 | if (cr3 & CR3_L_MODE_RESERVED_BITS) { | 533 | if (cr3 & CR3_L_MODE_RESERVED_BITS) { |
411 | printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); | ||
412 | kvm_inject_gp(vcpu, 0); | 534 | kvm_inject_gp(vcpu, 0); |
413 | return; | 535 | return; |
414 | } | 536 | } |
415 | } else { | 537 | } else { |
416 | if (is_pae(vcpu)) { | 538 | if (is_pae(vcpu)) { |
417 | if (cr3 & CR3_PAE_RESERVED_BITS) { | 539 | if (cr3 & CR3_PAE_RESERVED_BITS) { |
418 | printk(KERN_DEBUG | ||
419 | "set_cr3: #GP, reserved bits\n"); | ||
420 | kvm_inject_gp(vcpu, 0); | 540 | kvm_inject_gp(vcpu, 0); |
421 | return; | 541 | return; |
422 | } | 542 | } |
423 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { | 543 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { |
424 | printk(KERN_DEBUG "set_cr3: #GP, pdptrs " | ||
425 | "reserved bits\n"); | ||
426 | kvm_inject_gp(vcpu, 0); | 544 | kvm_inject_gp(vcpu, 0); |
427 | return; | 545 | return; |
428 | } | 546 | } |
@@ -454,7 +572,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3); | |||
454 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | 572 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) |
455 | { | 573 | { |
456 | if (cr8 & CR8_RESERVED_BITS) { | 574 | if (cr8 & CR8_RESERVED_BITS) { |
457 | printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); | ||
458 | kvm_inject_gp(vcpu, 0); | 575 | kvm_inject_gp(vcpu, 0); |
459 | return; | 576 | return; |
460 | } | 577 | } |
@@ -484,16 +601,21 @@ static inline u32 bit(int bitno) | |||
484 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. | 601 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. |
485 | * | 602 | * |
486 | * This list is modified at module load time to reflect the | 603 | * This list is modified at module load time to reflect the |
487 | * capabilities of the host cpu. | 604 | * capabilities of the host cpu. This capabilities test skips MSRs that are |
605 | * kvm-specific. Those are put in the beginning of the list. | ||
488 | */ | 606 | */ |
607 | |||
608 | #define KVM_SAVE_MSRS_BEGIN 5 | ||
489 | static u32 msrs_to_save[] = { | 609 | static u32 msrs_to_save[] = { |
610 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | ||
611 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | ||
612 | HV_X64_MSR_APIC_ASSIST_PAGE, | ||
490 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 613 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
491 | MSR_K6_STAR, | 614 | MSR_K6_STAR, |
492 | #ifdef CONFIG_X86_64 | 615 | #ifdef CONFIG_X86_64 |
493 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | 616 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, |
494 | #endif | 617 | #endif |
495 | MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 618 | MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA |
496 | MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA | ||
497 | }; | 619 | }; |
498 | 620 | ||
499 | static unsigned num_msrs_to_save; | 621 | static unsigned num_msrs_to_save; |
@@ -505,15 +627,12 @@ static u32 emulated_msrs[] = { | |||
505 | static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | 627 | static void set_efer(struct kvm_vcpu *vcpu, u64 efer) |
506 | { | 628 | { |
507 | if (efer & efer_reserved_bits) { | 629 | if (efer & efer_reserved_bits) { |
508 | printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", | ||
509 | efer); | ||
510 | kvm_inject_gp(vcpu, 0); | 630 | kvm_inject_gp(vcpu, 0); |
511 | return; | 631 | return; |
512 | } | 632 | } |
513 | 633 | ||
514 | if (is_paging(vcpu) | 634 | if (is_paging(vcpu) |
515 | && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) { | 635 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { |
516 | printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); | ||
517 | kvm_inject_gp(vcpu, 0); | 636 | kvm_inject_gp(vcpu, 0); |
518 | return; | 637 | return; |
519 | } | 638 | } |
@@ -523,7 +642,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
523 | 642 | ||
524 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 643 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
525 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { | 644 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { |
526 | printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n"); | ||
527 | kvm_inject_gp(vcpu, 0); | 645 | kvm_inject_gp(vcpu, 0); |
528 | return; | 646 | return; |
529 | } | 647 | } |
@@ -534,7 +652,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
534 | 652 | ||
535 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 653 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
536 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { | 654 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { |
537 | printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n"); | ||
538 | kvm_inject_gp(vcpu, 0); | 655 | kvm_inject_gp(vcpu, 0); |
539 | return; | 656 | return; |
540 | } | 657 | } |
@@ -543,9 +660,9 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
543 | kvm_x86_ops->set_efer(vcpu, efer); | 660 | kvm_x86_ops->set_efer(vcpu, efer); |
544 | 661 | ||
545 | efer &= ~EFER_LMA; | 662 | efer &= ~EFER_LMA; |
546 | efer |= vcpu->arch.shadow_efer & EFER_LMA; | 663 | efer |= vcpu->arch.efer & EFER_LMA; |
547 | 664 | ||
548 | vcpu->arch.shadow_efer = efer; | 665 | vcpu->arch.efer = efer; |
549 | 666 | ||
550 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | 667 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; |
551 | kvm_mmu_reset_context(vcpu); | 668 | kvm_mmu_reset_context(vcpu); |
@@ -580,7 +697,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | |||
580 | { | 697 | { |
581 | static int version; | 698 | static int version; |
582 | struct pvclock_wall_clock wc; | 699 | struct pvclock_wall_clock wc; |
583 | struct timespec now, sys, boot; | 700 | struct timespec boot; |
584 | 701 | ||
585 | if (!wall_clock) | 702 | if (!wall_clock) |
586 | return; | 703 | return; |
@@ -595,9 +712,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | |||
595 | * wall clock specified here. guest system time equals host | 712 | * wall clock specified here. guest system time equals host |
596 | * system time for us, thus we must fill in host boot time here. | 713 | * system time for us, thus we must fill in host boot time here. |
597 | */ | 714 | */ |
598 | now = current_kernel_time(); | 715 | getboottime(&boot); |
599 | ktime_get_ts(&sys); | ||
600 | boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys)); | ||
601 | 716 | ||
602 | wc.sec = boot.tv_sec; | 717 | wc.sec = boot.tv_sec; |
603 | wc.nsec = boot.tv_nsec; | 718 | wc.nsec = boot.tv_nsec; |
@@ -672,12 +787,14 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
672 | local_irq_save(flags); | 787 | local_irq_save(flags); |
673 | kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); | 788 | kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); |
674 | ktime_get_ts(&ts); | 789 | ktime_get_ts(&ts); |
790 | monotonic_to_bootbased(&ts); | ||
675 | local_irq_restore(flags); | 791 | local_irq_restore(flags); |
676 | 792 | ||
677 | /* With all the info we got, fill in the values */ | 793 | /* With all the info we got, fill in the values */ |
678 | 794 | ||
679 | vcpu->hv_clock.system_time = ts.tv_nsec + | 795 | vcpu->hv_clock.system_time = ts.tv_nsec + |
680 | (NSEC_PER_SEC * (u64)ts.tv_sec); | 796 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; |
797 | |||
681 | /* | 798 | /* |
682 | * The interface expects us to write an even number signaling that the | 799 | * The interface expects us to write an even number signaling that the |
683 | * update is finished. Since the guest won't see the intermediate | 800 | * update is finished. Since the guest won't see the intermediate |
@@ -823,9 +940,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
823 | if (msr >= MSR_IA32_MC0_CTL && | 940 | if (msr >= MSR_IA32_MC0_CTL && |
824 | msr < MSR_IA32_MC0_CTL + 4 * bank_num) { | 941 | msr < MSR_IA32_MC0_CTL + 4 * bank_num) { |
825 | u32 offset = msr - MSR_IA32_MC0_CTL; | 942 | u32 offset = msr - MSR_IA32_MC0_CTL; |
826 | /* only 0 or all 1s can be written to IA32_MCi_CTL */ | 943 | /* only 0 or all 1s can be written to IA32_MCi_CTL |
944 | * some Linux kernels though clear bit 10 in bank 4 to | ||
945 | * workaround a BIOS/GART TBL issue on AMD K8s, ignore | ||
946 | * this to avoid an uncatched #GP in the guest | ||
947 | */ | ||
827 | if ((offset & 0x3) == 0 && | 948 | if ((offset & 0x3) == 0 && |
828 | data != 0 && data != ~(u64)0) | 949 | data != 0 && (data | (1 << 10)) != ~(u64)0) |
829 | return -1; | 950 | return -1; |
830 | vcpu->arch.mce_banks[offset] = data; | 951 | vcpu->arch.mce_banks[offset] = data; |
831 | break; | 952 | break; |
@@ -835,6 +956,132 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
835 | return 0; | 956 | return 0; |
836 | } | 957 | } |
837 | 958 | ||
959 | static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) | ||
960 | { | ||
961 | struct kvm *kvm = vcpu->kvm; | ||
962 | int lm = is_long_mode(vcpu); | ||
963 | u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64 | ||
964 | : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32; | ||
965 | u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 | ||
966 | : kvm->arch.xen_hvm_config.blob_size_32; | ||
967 | u32 page_num = data & ~PAGE_MASK; | ||
968 | u64 page_addr = data & PAGE_MASK; | ||
969 | u8 *page; | ||
970 | int r; | ||
971 | |||
972 | r = -E2BIG; | ||
973 | if (page_num >= blob_size) | ||
974 | goto out; | ||
975 | r = -ENOMEM; | ||
976 | page = kzalloc(PAGE_SIZE, GFP_KERNEL); | ||
977 | if (!page) | ||
978 | goto out; | ||
979 | r = -EFAULT; | ||
980 | if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE)) | ||
981 | goto out_free; | ||
982 | if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE)) | ||
983 | goto out_free; | ||
984 | r = 0; | ||
985 | out_free: | ||
986 | kfree(page); | ||
987 | out: | ||
988 | return r; | ||
989 | } | ||
990 | |||
991 | static bool kvm_hv_hypercall_enabled(struct kvm *kvm) | ||
992 | { | ||
993 | return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; | ||
994 | } | ||
995 | |||
996 | static bool kvm_hv_msr_partition_wide(u32 msr) | ||
997 | { | ||
998 | bool r = false; | ||
999 | switch (msr) { | ||
1000 | case HV_X64_MSR_GUEST_OS_ID: | ||
1001 | case HV_X64_MSR_HYPERCALL: | ||
1002 | r = true; | ||
1003 | break; | ||
1004 | } | ||
1005 | |||
1006 | return r; | ||
1007 | } | ||
1008 | |||
1009 | static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
1010 | { | ||
1011 | struct kvm *kvm = vcpu->kvm; | ||
1012 | |||
1013 | switch (msr) { | ||
1014 | case HV_X64_MSR_GUEST_OS_ID: | ||
1015 | kvm->arch.hv_guest_os_id = data; | ||
1016 | /* setting guest os id to zero disables hypercall page */ | ||
1017 | if (!kvm->arch.hv_guest_os_id) | ||
1018 | kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; | ||
1019 | break; | ||
1020 | case HV_X64_MSR_HYPERCALL: { | ||
1021 | u64 gfn; | ||
1022 | unsigned long addr; | ||
1023 | u8 instructions[4]; | ||
1024 | |||
1025 | /* if guest os id is not set hypercall should remain disabled */ | ||
1026 | if (!kvm->arch.hv_guest_os_id) | ||
1027 | break; | ||
1028 | if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { | ||
1029 | kvm->arch.hv_hypercall = data; | ||
1030 | break; | ||
1031 | } | ||
1032 | gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; | ||
1033 | addr = gfn_to_hva(kvm, gfn); | ||
1034 | if (kvm_is_error_hva(addr)) | ||
1035 | return 1; | ||
1036 | kvm_x86_ops->patch_hypercall(vcpu, instructions); | ||
1037 | ((unsigned char *)instructions)[3] = 0xc3; /* ret */ | ||
1038 | if (copy_to_user((void __user *)addr, instructions, 4)) | ||
1039 | return 1; | ||
1040 | kvm->arch.hv_hypercall = data; | ||
1041 | break; | ||
1042 | } | ||
1043 | default: | ||
1044 | pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " | ||
1045 | "data 0x%llx\n", msr, data); | ||
1046 | return 1; | ||
1047 | } | ||
1048 | return 0; | ||
1049 | } | ||
1050 | |||
1051 | static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
1052 | { | ||
1053 | switch (msr) { | ||
1054 | case HV_X64_MSR_APIC_ASSIST_PAGE: { | ||
1055 | unsigned long addr; | ||
1056 | |||
1057 | if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { | ||
1058 | vcpu->arch.hv_vapic = data; | ||
1059 | break; | ||
1060 | } | ||
1061 | addr = gfn_to_hva(vcpu->kvm, data >> | ||
1062 | HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT); | ||
1063 | if (kvm_is_error_hva(addr)) | ||
1064 | return 1; | ||
1065 | if (clear_user((void __user *)addr, PAGE_SIZE)) | ||
1066 | return 1; | ||
1067 | vcpu->arch.hv_vapic = data; | ||
1068 | break; | ||
1069 | } | ||
1070 | case HV_X64_MSR_EOI: | ||
1071 | return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); | ||
1072 | case HV_X64_MSR_ICR: | ||
1073 | return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); | ||
1074 | case HV_X64_MSR_TPR: | ||
1075 | return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); | ||
1076 | default: | ||
1077 | pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " | ||
1078 | "data 0x%llx\n", msr, data); | ||
1079 | return 1; | ||
1080 | } | ||
1081 | |||
1082 | return 0; | ||
1083 | } | ||
1084 | |||
838 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1085 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
839 | { | 1086 | { |
840 | switch (msr) { | 1087 | switch (msr) { |
@@ -949,7 +1196,19 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
949 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " | 1196 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " |
950 | "0x%x data 0x%llx\n", msr, data); | 1197 | "0x%x data 0x%llx\n", msr, data); |
951 | break; | 1198 | break; |
1199 | case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: | ||
1200 | if (kvm_hv_msr_partition_wide(msr)) { | ||
1201 | int r; | ||
1202 | mutex_lock(&vcpu->kvm->lock); | ||
1203 | r = set_msr_hyperv_pw(vcpu, msr, data); | ||
1204 | mutex_unlock(&vcpu->kvm->lock); | ||
1205 | return r; | ||
1206 | } else | ||
1207 | return set_msr_hyperv(vcpu, msr, data); | ||
1208 | break; | ||
952 | default: | 1209 | default: |
1210 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | ||
1211 | return xen_hvm_config(vcpu, data); | ||
953 | if (!ignore_msrs) { | 1212 | if (!ignore_msrs) { |
954 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", | 1213 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", |
955 | msr, data); | 1214 | msr, data); |
@@ -1046,6 +1305,54 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1046 | return 0; | 1305 | return 0; |
1047 | } | 1306 | } |
1048 | 1307 | ||
1308 | static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | ||
1309 | { | ||
1310 | u64 data = 0; | ||
1311 | struct kvm *kvm = vcpu->kvm; | ||
1312 | |||
1313 | switch (msr) { | ||
1314 | case HV_X64_MSR_GUEST_OS_ID: | ||
1315 | data = kvm->arch.hv_guest_os_id; | ||
1316 | break; | ||
1317 | case HV_X64_MSR_HYPERCALL: | ||
1318 | data = kvm->arch.hv_hypercall; | ||
1319 | break; | ||
1320 | default: | ||
1321 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | ||
1322 | return 1; | ||
1323 | } | ||
1324 | |||
1325 | *pdata = data; | ||
1326 | return 0; | ||
1327 | } | ||
1328 | |||
1329 | static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | ||
1330 | { | ||
1331 | u64 data = 0; | ||
1332 | |||
1333 | switch (msr) { | ||
1334 | case HV_X64_MSR_VP_INDEX: { | ||
1335 | int r; | ||
1336 | struct kvm_vcpu *v; | ||
1337 | kvm_for_each_vcpu(r, v, vcpu->kvm) | ||
1338 | if (v == vcpu) | ||
1339 | data = r; | ||
1340 | break; | ||
1341 | } | ||
1342 | case HV_X64_MSR_EOI: | ||
1343 | return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); | ||
1344 | case HV_X64_MSR_ICR: | ||
1345 | return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); | ||
1346 | case HV_X64_MSR_TPR: | ||
1347 | return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); | ||
1348 | default: | ||
1349 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | ||
1350 | return 1; | ||
1351 | } | ||
1352 | *pdata = data; | ||
1353 | return 0; | ||
1354 | } | ||
1355 | |||
1049 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | 1356 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) |
1050 | { | 1357 | { |
1051 | u64 data; | 1358 | u64 data; |
@@ -1097,7 +1404,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1097 | data |= (((uint64_t)4ULL) << 40); | 1404 | data |= (((uint64_t)4ULL) << 40); |
1098 | break; | 1405 | break; |
1099 | case MSR_EFER: | 1406 | case MSR_EFER: |
1100 | data = vcpu->arch.shadow_efer; | 1407 | data = vcpu->arch.efer; |
1101 | break; | 1408 | break; |
1102 | case MSR_KVM_WALL_CLOCK: | 1409 | case MSR_KVM_WALL_CLOCK: |
1103 | data = vcpu->kvm->arch.wall_clock; | 1410 | data = vcpu->kvm->arch.wall_clock; |
@@ -1112,6 +1419,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1112 | case MSR_IA32_MCG_STATUS: | 1419 | case MSR_IA32_MCG_STATUS: |
1113 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: | 1420 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: |
1114 | return get_msr_mce(vcpu, msr, pdata); | 1421 | return get_msr_mce(vcpu, msr, pdata); |
1422 | case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: | ||
1423 | if (kvm_hv_msr_partition_wide(msr)) { | ||
1424 | int r; | ||
1425 | mutex_lock(&vcpu->kvm->lock); | ||
1426 | r = get_msr_hyperv_pw(vcpu, msr, pdata); | ||
1427 | mutex_unlock(&vcpu->kvm->lock); | ||
1428 | return r; | ||
1429 | } else | ||
1430 | return get_msr_hyperv(vcpu, msr, pdata); | ||
1431 | break; | ||
1115 | default: | 1432 | default: |
1116 | if (!ignore_msrs) { | 1433 | if (!ignore_msrs) { |
1117 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 1434 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
@@ -1137,15 +1454,15 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, | |||
1137 | int (*do_msr)(struct kvm_vcpu *vcpu, | 1454 | int (*do_msr)(struct kvm_vcpu *vcpu, |
1138 | unsigned index, u64 *data)) | 1455 | unsigned index, u64 *data)) |
1139 | { | 1456 | { |
1140 | int i; | 1457 | int i, idx; |
1141 | 1458 | ||
1142 | vcpu_load(vcpu); | 1459 | vcpu_load(vcpu); |
1143 | 1460 | ||
1144 | down_read(&vcpu->kvm->slots_lock); | 1461 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
1145 | for (i = 0; i < msrs->nmsrs; ++i) | 1462 | for (i = 0; i < msrs->nmsrs; ++i) |
1146 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) | 1463 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) |
1147 | break; | 1464 | break; |
1148 | up_read(&vcpu->kvm->slots_lock); | 1465 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
1149 | 1466 | ||
1150 | vcpu_put(vcpu); | 1467 | vcpu_put(vcpu); |
1151 | 1468 | ||
@@ -1224,6 +1541,14 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1224 | case KVM_CAP_PIT2: | 1541 | case KVM_CAP_PIT2: |
1225 | case KVM_CAP_PIT_STATE2: | 1542 | case KVM_CAP_PIT_STATE2: |
1226 | case KVM_CAP_SET_IDENTITY_MAP_ADDR: | 1543 | case KVM_CAP_SET_IDENTITY_MAP_ADDR: |
1544 | case KVM_CAP_XEN_HVM: | ||
1545 | case KVM_CAP_ADJUST_CLOCK: | ||
1546 | case KVM_CAP_VCPU_EVENTS: | ||
1547 | case KVM_CAP_HYPERV: | ||
1548 | case KVM_CAP_HYPERV_VAPIC: | ||
1549 | case KVM_CAP_HYPERV_SPIN: | ||
1550 | case KVM_CAP_PCI_SEGMENT: | ||
1551 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | ||
1227 | r = 1; | 1552 | r = 1; |
1228 | break; | 1553 | break; |
1229 | case KVM_CAP_COALESCED_MMIO: | 1554 | case KVM_CAP_COALESCED_MMIO: |
@@ -1238,8 +1563,8 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1238 | case KVM_CAP_NR_MEMSLOTS: | 1563 | case KVM_CAP_NR_MEMSLOTS: |
1239 | r = KVM_MEMORY_SLOTS; | 1564 | r = KVM_MEMORY_SLOTS; |
1240 | break; | 1565 | break; |
1241 | case KVM_CAP_PV_MMU: | 1566 | case KVM_CAP_PV_MMU: /* obsolete */ |
1242 | r = !tdp_enabled; | 1567 | r = 0; |
1243 | break; | 1568 | break; |
1244 | case KVM_CAP_IOMMU: | 1569 | case KVM_CAP_IOMMU: |
1245 | r = iommu_found(); | 1570 | r = iommu_found(); |
@@ -1326,13 +1651,19 @@ out: | |||
1326 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 1651 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
1327 | { | 1652 | { |
1328 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 1653 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
1654 | if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { | ||
1655 | unsigned long khz = cpufreq_quick_get(cpu); | ||
1656 | if (!khz) | ||
1657 | khz = tsc_khz; | ||
1658 | per_cpu(cpu_tsc_khz, cpu) = khz; | ||
1659 | } | ||
1329 | kvm_request_guest_time_update(vcpu); | 1660 | kvm_request_guest_time_update(vcpu); |
1330 | } | 1661 | } |
1331 | 1662 | ||
1332 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 1663 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
1333 | { | 1664 | { |
1334 | kvm_x86_ops->vcpu_put(vcpu); | ||
1335 | kvm_put_guest_fpu(vcpu); | 1665 | kvm_put_guest_fpu(vcpu); |
1666 | kvm_x86_ops->vcpu_put(vcpu); | ||
1336 | } | 1667 | } |
1337 | 1668 | ||
1338 | static int is_efer_nx(void) | 1669 | static int is_efer_nx(void) |
@@ -1381,6 +1712,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
1381 | if (copy_from_user(cpuid_entries, entries, | 1712 | if (copy_from_user(cpuid_entries, entries, |
1382 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | 1713 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) |
1383 | goto out_free; | 1714 | goto out_free; |
1715 | vcpu_load(vcpu); | ||
1384 | for (i = 0; i < cpuid->nent; i++) { | 1716 | for (i = 0; i < cpuid->nent; i++) { |
1385 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; | 1717 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; |
1386 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; | 1718 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; |
@@ -1397,6 +1729,8 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
1397 | cpuid_fix_nx_cap(vcpu); | 1729 | cpuid_fix_nx_cap(vcpu); |
1398 | r = 0; | 1730 | r = 0; |
1399 | kvm_apic_set_version(vcpu); | 1731 | kvm_apic_set_version(vcpu); |
1732 | kvm_x86_ops->cpuid_update(vcpu); | ||
1733 | vcpu_put(vcpu); | ||
1400 | 1734 | ||
1401 | out_free: | 1735 | out_free: |
1402 | vfree(cpuid_entries); | 1736 | vfree(cpuid_entries); |
@@ -1417,8 +1751,11 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
1417 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, | 1751 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, |
1418 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) | 1752 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) |
1419 | goto out; | 1753 | goto out; |
1754 | vcpu_load(vcpu); | ||
1420 | vcpu->arch.cpuid_nent = cpuid->nent; | 1755 | vcpu->arch.cpuid_nent = cpuid->nent; |
1421 | kvm_apic_set_version(vcpu); | 1756 | kvm_apic_set_version(vcpu); |
1757 | kvm_x86_ops->cpuid_update(vcpu); | ||
1758 | vcpu_put(vcpu); | ||
1422 | return 0; | 1759 | return 0; |
1423 | 1760 | ||
1424 | out: | 1761 | out: |
@@ -1461,12 +1798,15 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1461 | u32 index, int *nent, int maxnent) | 1798 | u32 index, int *nent, int maxnent) |
1462 | { | 1799 | { |
1463 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; | 1800 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; |
1464 | unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0; | ||
1465 | #ifdef CONFIG_X86_64 | 1801 | #ifdef CONFIG_X86_64 |
1802 | unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL) | ||
1803 | ? F(GBPAGES) : 0; | ||
1466 | unsigned f_lm = F(LM); | 1804 | unsigned f_lm = F(LM); |
1467 | #else | 1805 | #else |
1806 | unsigned f_gbpages = 0; | ||
1468 | unsigned f_lm = 0; | 1807 | unsigned f_lm = 0; |
1469 | #endif | 1808 | #endif |
1809 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; | ||
1470 | 1810 | ||
1471 | /* cpuid 1.edx */ | 1811 | /* cpuid 1.edx */ |
1472 | const u32 kvm_supported_word0_x86_features = | 1812 | const u32 kvm_supported_word0_x86_features = |
@@ -1486,7 +1826,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1486 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | 1826 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | |
1487 | F(PAT) | F(PSE36) | 0 /* Reserved */ | | 1827 | F(PAT) | F(PSE36) | 0 /* Reserved */ | |
1488 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | | 1828 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | |
1489 | F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ | | 1829 | F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp | |
1490 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); | 1830 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); |
1491 | /* cpuid 1.ecx */ | 1831 | /* cpuid 1.ecx */ |
1492 | const u32 kvm_supported_word4_x86_features = | 1832 | const u32 kvm_supported_word4_x86_features = |
@@ -1733,7 +2073,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
1733 | return 0; | 2073 | return 0; |
1734 | if (mce->status & MCI_STATUS_UC) { | 2074 | if (mce->status & MCI_STATUS_UC) { |
1735 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || | 2075 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || |
1736 | !(vcpu->arch.cr4 & X86_CR4_MCE)) { | 2076 | !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { |
1737 | printk(KERN_DEBUG "kvm: set_mce: " | 2077 | printk(KERN_DEBUG "kvm: set_mce: " |
1738 | "injects mce exception while " | 2078 | "injects mce exception while " |
1739 | "previous one is in progress!\n"); | 2079 | "previous one is in progress!\n"); |
@@ -1759,6 +2099,65 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
1759 | return 0; | 2099 | return 0; |
1760 | } | 2100 | } |
1761 | 2101 | ||
2102 | static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | ||
2103 | struct kvm_vcpu_events *events) | ||
2104 | { | ||
2105 | vcpu_load(vcpu); | ||
2106 | |||
2107 | events->exception.injected = vcpu->arch.exception.pending; | ||
2108 | events->exception.nr = vcpu->arch.exception.nr; | ||
2109 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; | ||
2110 | events->exception.error_code = vcpu->arch.exception.error_code; | ||
2111 | |||
2112 | events->interrupt.injected = vcpu->arch.interrupt.pending; | ||
2113 | events->interrupt.nr = vcpu->arch.interrupt.nr; | ||
2114 | events->interrupt.soft = vcpu->arch.interrupt.soft; | ||
2115 | |||
2116 | events->nmi.injected = vcpu->arch.nmi_injected; | ||
2117 | events->nmi.pending = vcpu->arch.nmi_pending; | ||
2118 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); | ||
2119 | |||
2120 | events->sipi_vector = vcpu->arch.sipi_vector; | ||
2121 | |||
2122 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | ||
2123 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR); | ||
2124 | |||
2125 | vcpu_put(vcpu); | ||
2126 | } | ||
2127 | |||
2128 | static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | ||
2129 | struct kvm_vcpu_events *events) | ||
2130 | { | ||
2131 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING | ||
2132 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) | ||
2133 | return -EINVAL; | ||
2134 | |||
2135 | vcpu_load(vcpu); | ||
2136 | |||
2137 | vcpu->arch.exception.pending = events->exception.injected; | ||
2138 | vcpu->arch.exception.nr = events->exception.nr; | ||
2139 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; | ||
2140 | vcpu->arch.exception.error_code = events->exception.error_code; | ||
2141 | |||
2142 | vcpu->arch.interrupt.pending = events->interrupt.injected; | ||
2143 | vcpu->arch.interrupt.nr = events->interrupt.nr; | ||
2144 | vcpu->arch.interrupt.soft = events->interrupt.soft; | ||
2145 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) | ||
2146 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
2147 | |||
2148 | vcpu->arch.nmi_injected = events->nmi.injected; | ||
2149 | if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) | ||
2150 | vcpu->arch.nmi_pending = events->nmi.pending; | ||
2151 | kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); | ||
2152 | |||
2153 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) | ||
2154 | vcpu->arch.sipi_vector = events->sipi_vector; | ||
2155 | |||
2156 | vcpu_put(vcpu); | ||
2157 | |||
2158 | return 0; | ||
2159 | } | ||
2160 | |||
1762 | long kvm_arch_vcpu_ioctl(struct file *filp, | 2161 | long kvm_arch_vcpu_ioctl(struct file *filp, |
1763 | unsigned int ioctl, unsigned long arg) | 2162 | unsigned int ioctl, unsigned long arg) |
1764 | { | 2163 | { |
@@ -1769,6 +2168,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
1769 | 2168 | ||
1770 | switch (ioctl) { | 2169 | switch (ioctl) { |
1771 | case KVM_GET_LAPIC: { | 2170 | case KVM_GET_LAPIC: { |
2171 | r = -EINVAL; | ||
2172 | if (!vcpu->arch.apic) | ||
2173 | goto out; | ||
1772 | lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 2174 | lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
1773 | 2175 | ||
1774 | r = -ENOMEM; | 2176 | r = -ENOMEM; |
@@ -1784,6 +2186,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
1784 | break; | 2186 | break; |
1785 | } | 2187 | } |
1786 | case KVM_SET_LAPIC: { | 2188 | case KVM_SET_LAPIC: { |
2189 | r = -EINVAL; | ||
2190 | if (!vcpu->arch.apic) | ||
2191 | goto out; | ||
1787 | lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 2192 | lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
1788 | r = -ENOMEM; | 2193 | r = -ENOMEM; |
1789 | if (!lapic) | 2194 | if (!lapic) |
@@ -1910,6 +2315,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
1910 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); | 2315 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); |
1911 | break; | 2316 | break; |
1912 | } | 2317 | } |
2318 | case KVM_GET_VCPU_EVENTS: { | ||
2319 | struct kvm_vcpu_events events; | ||
2320 | |||
2321 | kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events); | ||
2322 | |||
2323 | r = -EFAULT; | ||
2324 | if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events))) | ||
2325 | break; | ||
2326 | r = 0; | ||
2327 | break; | ||
2328 | } | ||
2329 | case KVM_SET_VCPU_EVENTS: { | ||
2330 | struct kvm_vcpu_events events; | ||
2331 | |||
2332 | r = -EFAULT; | ||
2333 | if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events))) | ||
2334 | break; | ||
2335 | |||
2336 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); | ||
2337 | break; | ||
2338 | } | ||
1913 | default: | 2339 | default: |
1914 | r = -EINVAL; | 2340 | r = -EINVAL; |
1915 | } | 2341 | } |
@@ -1941,14 +2367,14 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, | |||
1941 | if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) | 2367 | if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) |
1942 | return -EINVAL; | 2368 | return -EINVAL; |
1943 | 2369 | ||
1944 | down_write(&kvm->slots_lock); | 2370 | mutex_lock(&kvm->slots_lock); |
1945 | spin_lock(&kvm->mmu_lock); | 2371 | spin_lock(&kvm->mmu_lock); |
1946 | 2372 | ||
1947 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); | 2373 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); |
1948 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; | 2374 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; |
1949 | 2375 | ||
1950 | spin_unlock(&kvm->mmu_lock); | 2376 | spin_unlock(&kvm->mmu_lock); |
1951 | up_write(&kvm->slots_lock); | 2377 | mutex_unlock(&kvm->slots_lock); |
1952 | return 0; | 2378 | return 0; |
1953 | } | 2379 | } |
1954 | 2380 | ||
@@ -1957,13 +2383,35 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) | |||
1957 | return kvm->arch.n_alloc_mmu_pages; | 2383 | return kvm->arch.n_alloc_mmu_pages; |
1958 | } | 2384 | } |
1959 | 2385 | ||
2386 | gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) | ||
2387 | { | ||
2388 | int i; | ||
2389 | struct kvm_mem_alias *alias; | ||
2390 | struct kvm_mem_aliases *aliases; | ||
2391 | |||
2392 | aliases = rcu_dereference(kvm->arch.aliases); | ||
2393 | |||
2394 | for (i = 0; i < aliases->naliases; ++i) { | ||
2395 | alias = &aliases->aliases[i]; | ||
2396 | if (alias->flags & KVM_ALIAS_INVALID) | ||
2397 | continue; | ||
2398 | if (gfn >= alias->base_gfn | ||
2399 | && gfn < alias->base_gfn + alias->npages) | ||
2400 | return alias->target_gfn + gfn - alias->base_gfn; | ||
2401 | } | ||
2402 | return gfn; | ||
2403 | } | ||
2404 | |||
1960 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | 2405 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) |
1961 | { | 2406 | { |
1962 | int i; | 2407 | int i; |
1963 | struct kvm_mem_alias *alias; | 2408 | struct kvm_mem_alias *alias; |
2409 | struct kvm_mem_aliases *aliases; | ||
1964 | 2410 | ||
1965 | for (i = 0; i < kvm->arch.naliases; ++i) { | 2411 | aliases = rcu_dereference(kvm->arch.aliases); |
1966 | alias = &kvm->arch.aliases[i]; | 2412 | |
2413 | for (i = 0; i < aliases->naliases; ++i) { | ||
2414 | alias = &aliases->aliases[i]; | ||
1967 | if (gfn >= alias->base_gfn | 2415 | if (gfn >= alias->base_gfn |
1968 | && gfn < alias->base_gfn + alias->npages) | 2416 | && gfn < alias->base_gfn + alias->npages) |
1969 | return alias->target_gfn + gfn - alias->base_gfn; | 2417 | return alias->target_gfn + gfn - alias->base_gfn; |
@@ -1981,6 +2429,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
1981 | { | 2429 | { |
1982 | int r, n; | 2430 | int r, n; |
1983 | struct kvm_mem_alias *p; | 2431 | struct kvm_mem_alias *p; |
2432 | struct kvm_mem_aliases *aliases, *old_aliases; | ||
1984 | 2433 | ||
1985 | r = -EINVAL; | 2434 | r = -EINVAL; |
1986 | /* General sanity checks */ | 2435 | /* General sanity checks */ |
@@ -1997,26 +2446,48 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
1997 | < alias->target_phys_addr) | 2446 | < alias->target_phys_addr) |
1998 | goto out; | 2447 | goto out; |
1999 | 2448 | ||
2000 | down_write(&kvm->slots_lock); | 2449 | r = -ENOMEM; |
2001 | spin_lock(&kvm->mmu_lock); | 2450 | aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); |
2451 | if (!aliases) | ||
2452 | goto out; | ||
2002 | 2453 | ||
2003 | p = &kvm->arch.aliases[alias->slot]; | 2454 | mutex_lock(&kvm->slots_lock); |
2455 | |||
2456 | /* invalidate any gfn reference in case of deletion/shrinking */ | ||
2457 | memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); | ||
2458 | aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID; | ||
2459 | old_aliases = kvm->arch.aliases; | ||
2460 | rcu_assign_pointer(kvm->arch.aliases, aliases); | ||
2461 | synchronize_srcu_expedited(&kvm->srcu); | ||
2462 | kvm_mmu_zap_all(kvm); | ||
2463 | kfree(old_aliases); | ||
2464 | |||
2465 | r = -ENOMEM; | ||
2466 | aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
2467 | if (!aliases) | ||
2468 | goto out_unlock; | ||
2469 | |||
2470 | memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); | ||
2471 | |||
2472 | p = &aliases->aliases[alias->slot]; | ||
2004 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | 2473 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; |
2005 | p->npages = alias->memory_size >> PAGE_SHIFT; | 2474 | p->npages = alias->memory_size >> PAGE_SHIFT; |
2006 | p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; | 2475 | p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; |
2476 | p->flags &= ~(KVM_ALIAS_INVALID); | ||
2007 | 2477 | ||
2008 | for (n = KVM_ALIAS_SLOTS; n > 0; --n) | 2478 | for (n = KVM_ALIAS_SLOTS; n > 0; --n) |
2009 | if (kvm->arch.aliases[n - 1].npages) | 2479 | if (aliases->aliases[n - 1].npages) |
2010 | break; | 2480 | break; |
2011 | kvm->arch.naliases = n; | 2481 | aliases->naliases = n; |
2012 | |||
2013 | spin_unlock(&kvm->mmu_lock); | ||
2014 | kvm_mmu_zap_all(kvm); | ||
2015 | |||
2016 | up_write(&kvm->slots_lock); | ||
2017 | 2482 | ||
2018 | return 0; | 2483 | old_aliases = kvm->arch.aliases; |
2484 | rcu_assign_pointer(kvm->arch.aliases, aliases); | ||
2485 | synchronize_srcu_expedited(&kvm->srcu); | ||
2486 | kfree(old_aliases); | ||
2487 | r = 0; | ||
2019 | 2488 | ||
2489 | out_unlock: | ||
2490 | mutex_unlock(&kvm->slots_lock); | ||
2020 | out: | 2491 | out: |
2021 | return r; | 2492 | return r; |
2022 | } | 2493 | } |
@@ -2038,9 +2509,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
2038 | sizeof(struct kvm_pic_state)); | 2509 | sizeof(struct kvm_pic_state)); |
2039 | break; | 2510 | break; |
2040 | case KVM_IRQCHIP_IOAPIC: | 2511 | case KVM_IRQCHIP_IOAPIC: |
2041 | memcpy(&chip->chip.ioapic, | 2512 | r = kvm_get_ioapic(kvm, &chip->chip.ioapic); |
2042 | ioapic_irqchip(kvm), | ||
2043 | sizeof(struct kvm_ioapic_state)); | ||
2044 | break; | 2513 | break; |
2045 | default: | 2514 | default: |
2046 | r = -EINVAL; | 2515 | r = -EINVAL; |
@@ -2056,25 +2525,21 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
2056 | r = 0; | 2525 | r = 0; |
2057 | switch (chip->chip_id) { | 2526 | switch (chip->chip_id) { |
2058 | case KVM_IRQCHIP_PIC_MASTER: | 2527 | case KVM_IRQCHIP_PIC_MASTER: |
2059 | spin_lock(&pic_irqchip(kvm)->lock); | 2528 | raw_spin_lock(&pic_irqchip(kvm)->lock); |
2060 | memcpy(&pic_irqchip(kvm)->pics[0], | 2529 | memcpy(&pic_irqchip(kvm)->pics[0], |
2061 | &chip->chip.pic, | 2530 | &chip->chip.pic, |
2062 | sizeof(struct kvm_pic_state)); | 2531 | sizeof(struct kvm_pic_state)); |
2063 | spin_unlock(&pic_irqchip(kvm)->lock); | 2532 | raw_spin_unlock(&pic_irqchip(kvm)->lock); |
2064 | break; | 2533 | break; |
2065 | case KVM_IRQCHIP_PIC_SLAVE: | 2534 | case KVM_IRQCHIP_PIC_SLAVE: |
2066 | spin_lock(&pic_irqchip(kvm)->lock); | 2535 | raw_spin_lock(&pic_irqchip(kvm)->lock); |
2067 | memcpy(&pic_irqchip(kvm)->pics[1], | 2536 | memcpy(&pic_irqchip(kvm)->pics[1], |
2068 | &chip->chip.pic, | 2537 | &chip->chip.pic, |
2069 | sizeof(struct kvm_pic_state)); | 2538 | sizeof(struct kvm_pic_state)); |
2070 | spin_unlock(&pic_irqchip(kvm)->lock); | 2539 | raw_spin_unlock(&pic_irqchip(kvm)->lock); |
2071 | break; | 2540 | break; |
2072 | case KVM_IRQCHIP_IOAPIC: | 2541 | case KVM_IRQCHIP_IOAPIC: |
2073 | mutex_lock(&kvm->irq_lock); | 2542 | r = kvm_set_ioapic(kvm, &chip->chip.ioapic); |
2074 | memcpy(ioapic_irqchip(kvm), | ||
2075 | &chip->chip.ioapic, | ||
2076 | sizeof(struct kvm_ioapic_state)); | ||
2077 | mutex_unlock(&kvm->irq_lock); | ||
2078 | break; | 2543 | break; |
2079 | default: | 2544 | default: |
2080 | r = -EINVAL; | 2545 | r = -EINVAL; |
@@ -2151,29 +2616,63 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, | |||
2151 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | 2616 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, |
2152 | struct kvm_dirty_log *log) | 2617 | struct kvm_dirty_log *log) |
2153 | { | 2618 | { |
2154 | int r; | 2619 | int r, i; |
2155 | int n; | ||
2156 | struct kvm_memory_slot *memslot; | 2620 | struct kvm_memory_slot *memslot; |
2157 | int is_dirty = 0; | 2621 | unsigned long n; |
2622 | unsigned long is_dirty = 0; | ||
2623 | unsigned long *dirty_bitmap = NULL; | ||
2158 | 2624 | ||
2159 | down_write(&kvm->slots_lock); | 2625 | mutex_lock(&kvm->slots_lock); |
2160 | 2626 | ||
2161 | r = kvm_get_dirty_log(kvm, log, &is_dirty); | 2627 | r = -EINVAL; |
2162 | if (r) | 2628 | if (log->slot >= KVM_MEMORY_SLOTS) |
2629 | goto out; | ||
2630 | |||
2631 | memslot = &kvm->memslots->memslots[log->slot]; | ||
2632 | r = -ENOENT; | ||
2633 | if (!memslot->dirty_bitmap) | ||
2634 | goto out; | ||
2635 | |||
2636 | n = kvm_dirty_bitmap_bytes(memslot); | ||
2637 | |||
2638 | r = -ENOMEM; | ||
2639 | dirty_bitmap = vmalloc(n); | ||
2640 | if (!dirty_bitmap) | ||
2163 | goto out; | 2641 | goto out; |
2642 | memset(dirty_bitmap, 0, n); | ||
2643 | |||
2644 | for (i = 0; !is_dirty && i < n/sizeof(long); i++) | ||
2645 | is_dirty = memslot->dirty_bitmap[i]; | ||
2164 | 2646 | ||
2165 | /* If nothing is dirty, don't bother messing with page tables. */ | 2647 | /* If nothing is dirty, don't bother messing with page tables. */ |
2166 | if (is_dirty) { | 2648 | if (is_dirty) { |
2649 | struct kvm_memslots *slots, *old_slots; | ||
2650 | |||
2167 | spin_lock(&kvm->mmu_lock); | 2651 | spin_lock(&kvm->mmu_lock); |
2168 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | 2652 | kvm_mmu_slot_remove_write_access(kvm, log->slot); |
2169 | spin_unlock(&kvm->mmu_lock); | 2653 | spin_unlock(&kvm->mmu_lock); |
2170 | memslot = &kvm->memslots[log->slot]; | 2654 | |
2171 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; | 2655 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
2172 | memset(memslot->dirty_bitmap, 0, n); | 2656 | if (!slots) |
2657 | goto out_free; | ||
2658 | |||
2659 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | ||
2660 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; | ||
2661 | |||
2662 | old_slots = kvm->memslots; | ||
2663 | rcu_assign_pointer(kvm->memslots, slots); | ||
2664 | synchronize_srcu_expedited(&kvm->srcu); | ||
2665 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; | ||
2666 | kfree(old_slots); | ||
2173 | } | 2667 | } |
2668 | |||
2174 | r = 0; | 2669 | r = 0; |
2670 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) | ||
2671 | r = -EFAULT; | ||
2672 | out_free: | ||
2673 | vfree(dirty_bitmap); | ||
2175 | out: | 2674 | out: |
2176 | up_write(&kvm->slots_lock); | 2675 | mutex_unlock(&kvm->slots_lock); |
2177 | return r; | 2676 | return r; |
2178 | } | 2677 | } |
2179 | 2678 | ||
@@ -2182,7 +2681,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2182 | { | 2681 | { |
2183 | struct kvm *kvm = filp->private_data; | 2682 | struct kvm *kvm = filp->private_data; |
2184 | void __user *argp = (void __user *)arg; | 2683 | void __user *argp = (void __user *)arg; |
2185 | int r = -EINVAL; | 2684 | int r = -ENOTTY; |
2186 | /* | 2685 | /* |
2187 | * This union makes it completely explicit to gcc-3.x | 2686 | * This union makes it completely explicit to gcc-3.x |
2188 | * that these two variables' stack usage should be | 2687 | * that these two variables' stack usage should be |
@@ -2244,25 +2743,39 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2244 | if (r) | 2743 | if (r) |
2245 | goto out; | 2744 | goto out; |
2246 | break; | 2745 | break; |
2247 | case KVM_CREATE_IRQCHIP: | 2746 | case KVM_CREATE_IRQCHIP: { |
2747 | struct kvm_pic *vpic; | ||
2748 | |||
2749 | mutex_lock(&kvm->lock); | ||
2750 | r = -EEXIST; | ||
2751 | if (kvm->arch.vpic) | ||
2752 | goto create_irqchip_unlock; | ||
2248 | r = -ENOMEM; | 2753 | r = -ENOMEM; |
2249 | kvm->arch.vpic = kvm_create_pic(kvm); | 2754 | vpic = kvm_create_pic(kvm); |
2250 | if (kvm->arch.vpic) { | 2755 | if (vpic) { |
2251 | r = kvm_ioapic_init(kvm); | 2756 | r = kvm_ioapic_init(kvm); |
2252 | if (r) { | 2757 | if (r) { |
2253 | kfree(kvm->arch.vpic); | 2758 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, |
2254 | kvm->arch.vpic = NULL; | 2759 | &vpic->dev); |
2255 | goto out; | 2760 | kfree(vpic); |
2761 | goto create_irqchip_unlock; | ||
2256 | } | 2762 | } |
2257 | } else | 2763 | } else |
2258 | goto out; | 2764 | goto create_irqchip_unlock; |
2765 | smp_wmb(); | ||
2766 | kvm->arch.vpic = vpic; | ||
2767 | smp_wmb(); | ||
2259 | r = kvm_setup_default_irq_routing(kvm); | 2768 | r = kvm_setup_default_irq_routing(kvm); |
2260 | if (r) { | 2769 | if (r) { |
2261 | kfree(kvm->arch.vpic); | 2770 | mutex_lock(&kvm->irq_lock); |
2262 | kfree(kvm->arch.vioapic); | 2771 | kvm_ioapic_destroy(kvm); |
2263 | goto out; | 2772 | kvm_destroy_pic(kvm); |
2773 | mutex_unlock(&kvm->irq_lock); | ||
2264 | } | 2774 | } |
2775 | create_irqchip_unlock: | ||
2776 | mutex_unlock(&kvm->lock); | ||
2265 | break; | 2777 | break; |
2778 | } | ||
2266 | case KVM_CREATE_PIT: | 2779 | case KVM_CREATE_PIT: |
2267 | u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; | 2780 | u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; |
2268 | goto create_pit; | 2781 | goto create_pit; |
@@ -2272,7 +2785,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2272 | sizeof(struct kvm_pit_config))) | 2785 | sizeof(struct kvm_pit_config))) |
2273 | goto out; | 2786 | goto out; |
2274 | create_pit: | 2787 | create_pit: |
2275 | down_write(&kvm->slots_lock); | 2788 | mutex_lock(&kvm->slots_lock); |
2276 | r = -EEXIST; | 2789 | r = -EEXIST; |
2277 | if (kvm->arch.vpit) | 2790 | if (kvm->arch.vpit) |
2278 | goto create_pit_unlock; | 2791 | goto create_pit_unlock; |
@@ -2281,7 +2794,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2281 | if (kvm->arch.vpit) | 2794 | if (kvm->arch.vpit) |
2282 | r = 0; | 2795 | r = 0; |
2283 | create_pit_unlock: | 2796 | create_pit_unlock: |
2284 | up_write(&kvm->slots_lock); | 2797 | mutex_unlock(&kvm->slots_lock); |
2285 | break; | 2798 | break; |
2286 | case KVM_IRQ_LINE_STATUS: | 2799 | case KVM_IRQ_LINE_STATUS: |
2287 | case KVM_IRQ_LINE: { | 2800 | case KVM_IRQ_LINE: { |
@@ -2292,10 +2805,8 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2292 | goto out; | 2805 | goto out; |
2293 | if (irqchip_in_kernel(kvm)) { | 2806 | if (irqchip_in_kernel(kvm)) { |
2294 | __s32 status; | 2807 | __s32 status; |
2295 | mutex_lock(&kvm->irq_lock); | ||
2296 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 2808 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
2297 | irq_event.irq, irq_event.level); | 2809 | irq_event.irq, irq_event.level); |
2298 | mutex_unlock(&kvm->irq_lock); | ||
2299 | if (ioctl == KVM_IRQ_LINE_STATUS) { | 2810 | if (ioctl == KVM_IRQ_LINE_STATUS) { |
2300 | irq_event.status = status; | 2811 | irq_event.status = status; |
2301 | if (copy_to_user(argp, &irq_event, | 2812 | if (copy_to_user(argp, &irq_event, |
@@ -2421,6 +2932,55 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2421 | r = 0; | 2932 | r = 0; |
2422 | break; | 2933 | break; |
2423 | } | 2934 | } |
2935 | case KVM_XEN_HVM_CONFIG: { | ||
2936 | r = -EFAULT; | ||
2937 | if (copy_from_user(&kvm->arch.xen_hvm_config, argp, | ||
2938 | sizeof(struct kvm_xen_hvm_config))) | ||
2939 | goto out; | ||
2940 | r = -EINVAL; | ||
2941 | if (kvm->arch.xen_hvm_config.flags) | ||
2942 | goto out; | ||
2943 | r = 0; | ||
2944 | break; | ||
2945 | } | ||
2946 | case KVM_SET_CLOCK: { | ||
2947 | struct timespec now; | ||
2948 | struct kvm_clock_data user_ns; | ||
2949 | u64 now_ns; | ||
2950 | s64 delta; | ||
2951 | |||
2952 | r = -EFAULT; | ||
2953 | if (copy_from_user(&user_ns, argp, sizeof(user_ns))) | ||
2954 | goto out; | ||
2955 | |||
2956 | r = -EINVAL; | ||
2957 | if (user_ns.flags) | ||
2958 | goto out; | ||
2959 | |||
2960 | r = 0; | ||
2961 | ktime_get_ts(&now); | ||
2962 | now_ns = timespec_to_ns(&now); | ||
2963 | delta = user_ns.clock - now_ns; | ||
2964 | kvm->arch.kvmclock_offset = delta; | ||
2965 | break; | ||
2966 | } | ||
2967 | case KVM_GET_CLOCK: { | ||
2968 | struct timespec now; | ||
2969 | struct kvm_clock_data user_ns; | ||
2970 | u64 now_ns; | ||
2971 | |||
2972 | ktime_get_ts(&now); | ||
2973 | now_ns = timespec_to_ns(&now); | ||
2974 | user_ns.clock = kvm->arch.kvmclock_offset + now_ns; | ||
2975 | user_ns.flags = 0; | ||
2976 | |||
2977 | r = -EFAULT; | ||
2978 | if (copy_to_user(argp, &user_ns, sizeof(user_ns))) | ||
2979 | goto out; | ||
2980 | r = 0; | ||
2981 | break; | ||
2982 | } | ||
2983 | |||
2424 | default: | 2984 | default: |
2425 | ; | 2985 | ; |
2426 | } | 2986 | } |
@@ -2433,7 +2993,8 @@ static void kvm_init_msr_list(void) | |||
2433 | u32 dummy[2]; | 2993 | u32 dummy[2]; |
2434 | unsigned i, j; | 2994 | unsigned i, j; |
2435 | 2995 | ||
2436 | for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { | 2996 | /* skip the first msrs in the list. KVM-specific */ |
2997 | for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) { | ||
2437 | if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) | 2998 | if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) |
2438 | continue; | 2999 | continue; |
2439 | if (j < i) | 3000 | if (j < i) |
@@ -2450,7 +3011,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, | |||
2450 | !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) | 3011 | !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) |
2451 | return 0; | 3012 | return 0; |
2452 | 3013 | ||
2453 | return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v); | 3014 | return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); |
2454 | } | 3015 | } |
2455 | 3016 | ||
2456 | static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | 3017 | static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) |
@@ -2459,17 +3020,44 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | |||
2459 | !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) | 3020 | !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) |
2460 | return 0; | 3021 | return 0; |
2461 | 3022 | ||
2462 | return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v); | 3023 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); |
2463 | } | 3024 | } |
2464 | 3025 | ||
2465 | static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3026 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) |
2466 | struct kvm_vcpu *vcpu) | 3027 | { |
3028 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3029 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); | ||
3030 | } | ||
3031 | |||
3032 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | ||
3033 | { | ||
3034 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3035 | access |= PFERR_FETCH_MASK; | ||
3036 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); | ||
3037 | } | ||
3038 | |||
3039 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | ||
3040 | { | ||
3041 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3042 | access |= PFERR_WRITE_MASK; | ||
3043 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); | ||
3044 | } | ||
3045 | |||
3046 | /* uses this to access any guest's mapped memory without checking CPL */ | ||
3047 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | ||
3048 | { | ||
3049 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error); | ||
3050 | } | ||
3051 | |||
3052 | static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, | ||
3053 | struct kvm_vcpu *vcpu, u32 access, | ||
3054 | u32 *error) | ||
2467 | { | 3055 | { |
2468 | void *data = val; | 3056 | void *data = val; |
2469 | int r = X86EMUL_CONTINUE; | 3057 | int r = X86EMUL_CONTINUE; |
2470 | 3058 | ||
2471 | while (bytes) { | 3059 | while (bytes) { |
2472 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3060 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error); |
2473 | unsigned offset = addr & (PAGE_SIZE-1); | 3061 | unsigned offset = addr & (PAGE_SIZE-1); |
2474 | unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); | 3062 | unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); |
2475 | int ret; | 3063 | int ret; |
@@ -2492,14 +3080,37 @@ out: | |||
2492 | return r; | 3080 | return r; |
2493 | } | 3081 | } |
2494 | 3082 | ||
3083 | /* used for instruction fetching */ | ||
3084 | static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, | ||
3085 | struct kvm_vcpu *vcpu, u32 *error) | ||
3086 | { | ||
3087 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3088 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, | ||
3089 | access | PFERR_FETCH_MASK, error); | ||
3090 | } | ||
3091 | |||
3092 | static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, | ||
3093 | struct kvm_vcpu *vcpu, u32 *error) | ||
3094 | { | ||
3095 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3096 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, | ||
3097 | error); | ||
3098 | } | ||
3099 | |||
3100 | static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, | ||
3101 | struct kvm_vcpu *vcpu, u32 *error) | ||
3102 | { | ||
3103 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); | ||
3104 | } | ||
3105 | |||
2495 | static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3106 | static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, |
2496 | struct kvm_vcpu *vcpu) | 3107 | struct kvm_vcpu *vcpu, u32 *error) |
2497 | { | 3108 | { |
2498 | void *data = val; | 3109 | void *data = val; |
2499 | int r = X86EMUL_CONTINUE; | 3110 | int r = X86EMUL_CONTINUE; |
2500 | 3111 | ||
2501 | while (bytes) { | 3112 | while (bytes) { |
2502 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3113 | gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); |
2503 | unsigned offset = addr & (PAGE_SIZE-1); | 3114 | unsigned offset = addr & (PAGE_SIZE-1); |
2504 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); | 3115 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); |
2505 | int ret; | 3116 | int ret; |
@@ -2529,6 +3140,7 @@ static int emulator_read_emulated(unsigned long addr, | |||
2529 | struct kvm_vcpu *vcpu) | 3140 | struct kvm_vcpu *vcpu) |
2530 | { | 3141 | { |
2531 | gpa_t gpa; | 3142 | gpa_t gpa; |
3143 | u32 error_code; | ||
2532 | 3144 | ||
2533 | if (vcpu->mmio_read_completed) { | 3145 | if (vcpu->mmio_read_completed) { |
2534 | memcpy(val, vcpu->mmio_data, bytes); | 3146 | memcpy(val, vcpu->mmio_data, bytes); |
@@ -2538,17 +3150,20 @@ static int emulator_read_emulated(unsigned long addr, | |||
2538 | return X86EMUL_CONTINUE; | 3150 | return X86EMUL_CONTINUE; |
2539 | } | 3151 | } |
2540 | 3152 | ||
2541 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3153 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code); |
3154 | |||
3155 | if (gpa == UNMAPPED_GVA) { | ||
3156 | kvm_inject_page_fault(vcpu, addr, error_code); | ||
3157 | return X86EMUL_PROPAGATE_FAULT; | ||
3158 | } | ||
2542 | 3159 | ||
2543 | /* For APIC access vmexit */ | 3160 | /* For APIC access vmexit */ |
2544 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3161 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
2545 | goto mmio; | 3162 | goto mmio; |
2546 | 3163 | ||
2547 | if (kvm_read_guest_virt(addr, val, bytes, vcpu) | 3164 | if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL) |
2548 | == X86EMUL_CONTINUE) | 3165 | == X86EMUL_CONTINUE) |
2549 | return X86EMUL_CONTINUE; | 3166 | return X86EMUL_CONTINUE; |
2550 | if (gpa == UNMAPPED_GVA) | ||
2551 | return X86EMUL_PROPAGATE_FAULT; | ||
2552 | 3167 | ||
2553 | mmio: | 3168 | mmio: |
2554 | /* | 3169 | /* |
@@ -2587,11 +3202,12 @@ static int emulator_write_emulated_onepage(unsigned long addr, | |||
2587 | struct kvm_vcpu *vcpu) | 3202 | struct kvm_vcpu *vcpu) |
2588 | { | 3203 | { |
2589 | gpa_t gpa; | 3204 | gpa_t gpa; |
3205 | u32 error_code; | ||
2590 | 3206 | ||
2591 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3207 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code); |
2592 | 3208 | ||
2593 | if (gpa == UNMAPPED_GVA) { | 3209 | if (gpa == UNMAPPED_GVA) { |
2594 | kvm_inject_page_fault(vcpu, addr, 2); | 3210 | kvm_inject_page_fault(vcpu, addr, error_code); |
2595 | return X86EMUL_PROPAGATE_FAULT; | 3211 | return X86EMUL_PROPAGATE_FAULT; |
2596 | } | 3212 | } |
2597 | 3213 | ||
@@ -2655,7 +3271,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
2655 | char *kaddr; | 3271 | char *kaddr; |
2656 | u64 val; | 3272 | u64 val; |
2657 | 3273 | ||
2658 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3274 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); |
2659 | 3275 | ||
2660 | if (gpa == UNMAPPED_GVA || | 3276 | if (gpa == UNMAPPED_GVA || |
2661 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3277 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
@@ -2692,35 +3308,21 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | |||
2692 | 3308 | ||
2693 | int emulate_clts(struct kvm_vcpu *vcpu) | 3309 | int emulate_clts(struct kvm_vcpu *vcpu) |
2694 | { | 3310 | { |
2695 | kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); | 3311 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); |
3312 | kvm_x86_ops->fpu_activate(vcpu); | ||
2696 | return X86EMUL_CONTINUE; | 3313 | return X86EMUL_CONTINUE; |
2697 | } | 3314 | } |
2698 | 3315 | ||
2699 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) | 3316 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) |
2700 | { | 3317 | { |
2701 | struct kvm_vcpu *vcpu = ctxt->vcpu; | 3318 | return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); |
2702 | |||
2703 | switch (dr) { | ||
2704 | case 0 ... 3: | ||
2705 | *dest = kvm_x86_ops->get_dr(vcpu, dr); | ||
2706 | return X86EMUL_CONTINUE; | ||
2707 | default: | ||
2708 | pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr); | ||
2709 | return X86EMUL_UNHANDLEABLE; | ||
2710 | } | ||
2711 | } | 3319 | } |
2712 | 3320 | ||
2713 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | 3321 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) |
2714 | { | 3322 | { |
2715 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; | 3323 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; |
2716 | int exception; | ||
2717 | 3324 | ||
2718 | kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); | 3325 | return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); |
2719 | if (exception) { | ||
2720 | /* FIXME: better handling */ | ||
2721 | return X86EMUL_UNHANDLEABLE; | ||
2722 | } | ||
2723 | return X86EMUL_CONTINUE; | ||
2724 | } | 3326 | } |
2725 | 3327 | ||
2726 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 3328 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) |
@@ -2734,7 +3336,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | |||
2734 | 3336 | ||
2735 | rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); | 3337 | rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); |
2736 | 3338 | ||
2737 | kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu); | 3339 | kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL); |
2738 | 3340 | ||
2739 | printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", | 3341 | printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", |
2740 | context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); | 3342 | context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); |
@@ -2742,7 +3344,8 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | |||
2742 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | 3344 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); |
2743 | 3345 | ||
2744 | static struct x86_emulate_ops emulate_ops = { | 3346 | static struct x86_emulate_ops emulate_ops = { |
2745 | .read_std = kvm_read_guest_virt, | 3347 | .read_std = kvm_read_guest_virt_system, |
3348 | .fetch = kvm_fetch_guest_virt, | ||
2746 | .read_emulated = emulator_read_emulated, | 3349 | .read_emulated = emulator_read_emulated, |
2747 | .write_emulated = emulator_write_emulated, | 3350 | .write_emulated = emulator_write_emulated, |
2748 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 3351 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
@@ -2757,13 +3360,13 @@ static void cache_all_regs(struct kvm_vcpu *vcpu) | |||
2757 | } | 3360 | } |
2758 | 3361 | ||
2759 | int emulate_instruction(struct kvm_vcpu *vcpu, | 3362 | int emulate_instruction(struct kvm_vcpu *vcpu, |
2760 | struct kvm_run *run, | ||
2761 | unsigned long cr2, | 3363 | unsigned long cr2, |
2762 | u16 error_code, | 3364 | u16 error_code, |
2763 | int emulation_type) | 3365 | int emulation_type) |
2764 | { | 3366 | { |
2765 | int r, shadow_mask; | 3367 | int r, shadow_mask; |
2766 | struct decode_cache *c; | 3368 | struct decode_cache *c; |
3369 | struct kvm_run *run = vcpu->run; | ||
2767 | 3370 | ||
2768 | kvm_clear_exception_queue(vcpu); | 3371 | kvm_clear_exception_queue(vcpu); |
2769 | vcpu->arch.mmio_fault_cr2 = cr2; | 3372 | vcpu->arch.mmio_fault_cr2 = cr2; |
@@ -2783,10 +3386,11 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
2783 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 3386 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
2784 | 3387 | ||
2785 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 3388 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
2786 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); | 3389 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); |
2787 | vcpu->arch.emulate_ctxt.mode = | 3390 | vcpu->arch.emulate_ctxt.mode = |
3391 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | ||
2788 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | 3392 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) |
2789 | ? X86EMUL_MODE_REAL : cs_l | 3393 | ? X86EMUL_MODE_VM86 : cs_l |
2790 | ? X86EMUL_MODE_PROT64 : cs_db | 3394 | ? X86EMUL_MODE_PROT64 : cs_db |
2791 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 3395 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
2792 | 3396 | ||
@@ -2861,7 +3465,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
2861 | return EMULATE_DO_MMIO; | 3465 | return EMULATE_DO_MMIO; |
2862 | } | 3466 | } |
2863 | 3467 | ||
2864 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 3468 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
2865 | 3469 | ||
2866 | if (vcpu->mmio_is_write) { | 3470 | if (vcpu->mmio_is_write) { |
2867 | vcpu->mmio_needed = 0; | 3471 | vcpu->mmio_needed = 0; |
@@ -2878,12 +3482,17 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) | |||
2878 | gva_t q = vcpu->arch.pio.guest_gva; | 3482 | gva_t q = vcpu->arch.pio.guest_gva; |
2879 | unsigned bytes; | 3483 | unsigned bytes; |
2880 | int ret; | 3484 | int ret; |
3485 | u32 error_code; | ||
2881 | 3486 | ||
2882 | bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; | 3487 | bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; |
2883 | if (vcpu->arch.pio.in) | 3488 | if (vcpu->arch.pio.in) |
2884 | ret = kvm_write_guest_virt(q, p, bytes, vcpu); | 3489 | ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code); |
2885 | else | 3490 | else |
2886 | ret = kvm_read_guest_virt(q, p, bytes, vcpu); | 3491 | ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code); |
3492 | |||
3493 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
3494 | kvm_inject_page_fault(vcpu, q, error_code); | ||
3495 | |||
2887 | return ret; | 3496 | return ret; |
2888 | } | 3497 | } |
2889 | 3498 | ||
@@ -2904,7 +3513,7 @@ int complete_pio(struct kvm_vcpu *vcpu) | |||
2904 | if (io->in) { | 3513 | if (io->in) { |
2905 | r = pio_copy_data(vcpu); | 3514 | r = pio_copy_data(vcpu); |
2906 | if (r) | 3515 | if (r) |
2907 | return r; | 3516 | goto out; |
2908 | } | 3517 | } |
2909 | 3518 | ||
2910 | delta = 1; | 3519 | delta = 1; |
@@ -2931,7 +3540,7 @@ int complete_pio(struct kvm_vcpu *vcpu) | |||
2931 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); | 3540 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); |
2932 | } | 3541 | } |
2933 | } | 3542 | } |
2934 | 3543 | out: | |
2935 | io->count -= io->cur_count; | 3544 | io->count -= io->cur_count; |
2936 | io->cur_count = 0; | 3545 | io->cur_count = 0; |
2937 | 3546 | ||
@@ -2944,11 +3553,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | |||
2944 | int r; | 3553 | int r; |
2945 | 3554 | ||
2946 | if (vcpu->arch.pio.in) | 3555 | if (vcpu->arch.pio.in) |
2947 | r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, | 3556 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, |
2948 | vcpu->arch.pio.size, pd); | 3557 | vcpu->arch.pio.size, pd); |
2949 | else | 3558 | else |
2950 | r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, | 3559 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, |
2951 | vcpu->arch.pio.size, pd); | 3560 | vcpu->arch.pio.port, vcpu->arch.pio.size, |
3561 | pd); | ||
2952 | return r; | 3562 | return r; |
2953 | } | 3563 | } |
2954 | 3564 | ||
@@ -2959,7 +3569,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu) | |||
2959 | int i, r = 0; | 3569 | int i, r = 0; |
2960 | 3570 | ||
2961 | for (i = 0; i < io->cur_count; i++) { | 3571 | for (i = 0; i < io->cur_count; i++) { |
2962 | if (kvm_io_bus_write(&vcpu->kvm->pio_bus, | 3572 | if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, |
2963 | io->port, io->size, pd)) { | 3573 | io->port, io->size, pd)) { |
2964 | r = -EOPNOTSUPP; | 3574 | r = -EOPNOTSUPP; |
2965 | break; | 3575 | break; |
@@ -2969,11 +3579,12 @@ static int pio_string_write(struct kvm_vcpu *vcpu) | |||
2969 | return r; | 3579 | return r; |
2970 | } | 3580 | } |
2971 | 3581 | ||
2972 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 3582 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) |
2973 | int size, unsigned port) | ||
2974 | { | 3583 | { |
2975 | unsigned long val; | 3584 | unsigned long val; |
2976 | 3585 | ||
3586 | trace_kvm_pio(!in, port, size, 1); | ||
3587 | |||
2977 | vcpu->run->exit_reason = KVM_EXIT_IO; | 3588 | vcpu->run->exit_reason = KVM_EXIT_IO; |
2978 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | 3589 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; |
2979 | vcpu->run->io.size = vcpu->arch.pio.size = size; | 3590 | vcpu->run->io.size = vcpu->arch.pio.size = size; |
@@ -2985,11 +3596,10 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
2985 | vcpu->arch.pio.down = 0; | 3596 | vcpu->arch.pio.down = 0; |
2986 | vcpu->arch.pio.rep = 0; | 3597 | vcpu->arch.pio.rep = 0; |
2987 | 3598 | ||
2988 | trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, | 3599 | if (!vcpu->arch.pio.in) { |
2989 | size, 1); | 3600 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
2990 | 3601 | memcpy(vcpu->arch.pio_data, &val, 4); | |
2991 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | 3602 | } |
2992 | memcpy(vcpu->arch.pio_data, &val, 4); | ||
2993 | 3603 | ||
2994 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | 3604 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { |
2995 | complete_pio(vcpu); | 3605 | complete_pio(vcpu); |
@@ -2999,13 +3609,15 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
2999 | } | 3609 | } |
3000 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); | 3610 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); |
3001 | 3611 | ||
3002 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 3612 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, |
3003 | int size, unsigned long count, int down, | 3613 | int size, unsigned long count, int down, |
3004 | gva_t address, int rep, unsigned port) | 3614 | gva_t address, int rep, unsigned port) |
3005 | { | 3615 | { |
3006 | unsigned now, in_page; | 3616 | unsigned now, in_page; |
3007 | int ret = 0; | 3617 | int ret = 0; |
3008 | 3618 | ||
3619 | trace_kvm_pio(!in, port, size, count); | ||
3620 | |||
3009 | vcpu->run->exit_reason = KVM_EXIT_IO; | 3621 | vcpu->run->exit_reason = KVM_EXIT_IO; |
3010 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | 3622 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; |
3011 | vcpu->run->io.size = vcpu->arch.pio.size = size; | 3623 | vcpu->run->io.size = vcpu->arch.pio.size = size; |
@@ -3017,9 +3629,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
3017 | vcpu->arch.pio.down = down; | 3629 | vcpu->arch.pio.down = down; |
3018 | vcpu->arch.pio.rep = rep; | 3630 | vcpu->arch.pio.rep = rep; |
3019 | 3631 | ||
3020 | trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, | ||
3021 | size, count); | ||
3022 | |||
3023 | if (!count) { | 3632 | if (!count) { |
3024 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 3633 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
3025 | return 1; | 3634 | return 1; |
@@ -3051,10 +3660,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
3051 | if (!vcpu->arch.pio.in) { | 3660 | if (!vcpu->arch.pio.in) { |
3052 | /* string PIO write */ | 3661 | /* string PIO write */ |
3053 | ret = pio_copy_data(vcpu); | 3662 | ret = pio_copy_data(vcpu); |
3054 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 3663 | if (ret == X86EMUL_PROPAGATE_FAULT) |
3055 | kvm_inject_gp(vcpu, 0); | ||
3056 | return 1; | 3664 | return 1; |
3057 | } | ||
3058 | if (ret == 0 && !pio_string_write(vcpu)) { | 3665 | if (ret == 0 && !pio_string_write(vcpu)) { |
3059 | complete_pio(vcpu); | 3666 | complete_pio(vcpu); |
3060 | if (vcpu->arch.pio.count == 0) | 3667 | if (vcpu->arch.pio.count == 0) |
@@ -3072,9 +3679,6 @@ static void bounce_off(void *info) | |||
3072 | /* nothing */ | 3679 | /* nothing */ |
3073 | } | 3680 | } |
3074 | 3681 | ||
3075 | static unsigned int ref_freq; | ||
3076 | static unsigned long tsc_khz_ref; | ||
3077 | |||
3078 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | 3682 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, |
3079 | void *data) | 3683 | void *data) |
3080 | { | 3684 | { |
@@ -3083,14 +3687,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
3083 | struct kvm_vcpu *vcpu; | 3687 | struct kvm_vcpu *vcpu; |
3084 | int i, send_ipi = 0; | 3688 | int i, send_ipi = 0; |
3085 | 3689 | ||
3086 | if (!ref_freq) | ||
3087 | ref_freq = freq->old; | ||
3088 | |||
3089 | if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) | 3690 | if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) |
3090 | return 0; | 3691 | return 0; |
3091 | if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) | 3692 | if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) |
3092 | return 0; | 3693 | return 0; |
3093 | per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); | 3694 | per_cpu(cpu_tsc_khz, freq->cpu) = freq->new; |
3094 | 3695 | ||
3095 | spin_lock(&kvm_lock); | 3696 | spin_lock(&kvm_lock); |
3096 | list_for_each_entry(kvm, &vm_list, vm_list) { | 3697 | list_for_each_entry(kvm, &vm_list, vm_list) { |
@@ -3127,9 +3728,28 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = { | |||
3127 | .notifier_call = kvmclock_cpufreq_notifier | 3728 | .notifier_call = kvmclock_cpufreq_notifier |
3128 | }; | 3729 | }; |
3129 | 3730 | ||
3731 | static void kvm_timer_init(void) | ||
3732 | { | ||
3733 | int cpu; | ||
3734 | |||
3735 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | ||
3736 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, | ||
3737 | CPUFREQ_TRANSITION_NOTIFIER); | ||
3738 | for_each_online_cpu(cpu) { | ||
3739 | unsigned long khz = cpufreq_get(cpu); | ||
3740 | if (!khz) | ||
3741 | khz = tsc_khz; | ||
3742 | per_cpu(cpu_tsc_khz, cpu) = khz; | ||
3743 | } | ||
3744 | } else { | ||
3745 | for_each_possible_cpu(cpu) | ||
3746 | per_cpu(cpu_tsc_khz, cpu) = tsc_khz; | ||
3747 | } | ||
3748 | } | ||
3749 | |||
3130 | int kvm_arch_init(void *opaque) | 3750 | int kvm_arch_init(void *opaque) |
3131 | { | 3751 | { |
3132 | int r, cpu; | 3752 | int r; |
3133 | struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; | 3753 | struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; |
3134 | 3754 | ||
3135 | if (kvm_x86_ops) { | 3755 | if (kvm_x86_ops) { |
@@ -3161,13 +3781,7 @@ int kvm_arch_init(void *opaque) | |||
3161 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 3781 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
3162 | PT_DIRTY_MASK, PT64_NX_MASK, 0); | 3782 | PT_DIRTY_MASK, PT64_NX_MASK, 0); |
3163 | 3783 | ||
3164 | for_each_possible_cpu(cpu) | 3784 | kvm_timer_init(); |
3165 | per_cpu(cpu_tsc_khz, cpu) = tsc_khz; | ||
3166 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | ||
3167 | tsc_khz_ref = tsc_khz; | ||
3168 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, | ||
3169 | CPUFREQ_TRANSITION_NOTIFIER); | ||
3170 | } | ||
3171 | 3785 | ||
3172 | return 0; | 3786 | return 0; |
3173 | 3787 | ||
@@ -3206,11 +3820,76 @@ static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0, | |||
3206 | return a0 | ((gpa_t)a1 << 32); | 3820 | return a0 | ((gpa_t)a1 << 32); |
3207 | } | 3821 | } |
3208 | 3822 | ||
3823 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | ||
3824 | { | ||
3825 | u64 param, ingpa, outgpa, ret; | ||
3826 | uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; | ||
3827 | bool fast, longmode; | ||
3828 | int cs_db, cs_l; | ||
3829 | |||
3830 | /* | ||
3831 | * hypercall generates UD from non zero cpl and real mode | ||
3832 | * per HYPER-V spec | ||
3833 | */ | ||
3834 | if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { | ||
3835 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3836 | return 0; | ||
3837 | } | ||
3838 | |||
3839 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | ||
3840 | longmode = is_long_mode(vcpu) && cs_l == 1; | ||
3841 | |||
3842 | if (!longmode) { | ||
3843 | param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | | ||
3844 | (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); | ||
3845 | ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | | ||
3846 | (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); | ||
3847 | outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | | ||
3848 | (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); | ||
3849 | } | ||
3850 | #ifdef CONFIG_X86_64 | ||
3851 | else { | ||
3852 | param = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
3853 | ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
3854 | outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); | ||
3855 | } | ||
3856 | #endif | ||
3857 | |||
3858 | code = param & 0xffff; | ||
3859 | fast = (param >> 16) & 0x1; | ||
3860 | rep_cnt = (param >> 32) & 0xfff; | ||
3861 | rep_idx = (param >> 48) & 0xfff; | ||
3862 | |||
3863 | trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); | ||
3864 | |||
3865 | switch (code) { | ||
3866 | case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: | ||
3867 | kvm_vcpu_on_spin(vcpu); | ||
3868 | break; | ||
3869 | default: | ||
3870 | res = HV_STATUS_INVALID_HYPERCALL_CODE; | ||
3871 | break; | ||
3872 | } | ||
3873 | |||
3874 | ret = res | (((u64)rep_done & 0xfff) << 32); | ||
3875 | if (longmode) { | ||
3876 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret); | ||
3877 | } else { | ||
3878 | kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32); | ||
3879 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); | ||
3880 | } | ||
3881 | |||
3882 | return 1; | ||
3883 | } | ||
3884 | |||
3209 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | 3885 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) |
3210 | { | 3886 | { |
3211 | unsigned long nr, a0, a1, a2, a3, ret; | 3887 | unsigned long nr, a0, a1, a2, a3, ret; |
3212 | int r = 1; | 3888 | int r = 1; |
3213 | 3889 | ||
3890 | if (kvm_hv_hypercall_enabled(vcpu->kvm)) | ||
3891 | return kvm_hv_hypercall(vcpu); | ||
3892 | |||
3214 | nr = kvm_register_read(vcpu, VCPU_REGS_RAX); | 3893 | nr = kvm_register_read(vcpu, VCPU_REGS_RAX); |
3215 | a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); | 3894 | a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); |
3216 | a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); | 3895 | a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); |
@@ -3253,10 +3932,8 @@ EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); | |||
3253 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | 3932 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu) |
3254 | { | 3933 | { |
3255 | char instruction[3]; | 3934 | char instruction[3]; |
3256 | int ret = 0; | ||
3257 | unsigned long rip = kvm_rip_read(vcpu); | 3935 | unsigned long rip = kvm_rip_read(vcpu); |
3258 | 3936 | ||
3259 | |||
3260 | /* | 3937 | /* |
3261 | * Blow out the MMU to ensure that no other VCPU has an active mapping | 3938 | * Blow out the MMU to ensure that no other VCPU has an active mapping |
3262 | * to ensure that the updated hypercall appears atomically across all | 3939 | * to ensure that the updated hypercall appears atomically across all |
@@ -3265,11 +3942,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
3265 | kvm_mmu_zap_all(vcpu->kvm); | 3942 | kvm_mmu_zap_all(vcpu->kvm); |
3266 | 3943 | ||
3267 | kvm_x86_ops->patch_hypercall(vcpu, instruction); | 3944 | kvm_x86_ops->patch_hypercall(vcpu, instruction); |
3268 | if (emulator_write_emulated(rip, instruction, 3, vcpu) | ||
3269 | != X86EMUL_CONTINUE) | ||
3270 | ret = -EFAULT; | ||
3271 | 3945 | ||
3272 | return ret; | 3946 | return emulator_write_emulated(rip, instruction, 3, vcpu); |
3273 | } | 3947 | } |
3274 | 3948 | ||
3275 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | 3949 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) |
@@ -3295,17 +3969,16 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | |||
3295 | unsigned long *rflags) | 3969 | unsigned long *rflags) |
3296 | { | 3970 | { |
3297 | kvm_lmsw(vcpu, msw); | 3971 | kvm_lmsw(vcpu, msw); |
3298 | *rflags = kvm_x86_ops->get_rflags(vcpu); | 3972 | *rflags = kvm_get_rflags(vcpu); |
3299 | } | 3973 | } |
3300 | 3974 | ||
3301 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | 3975 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) |
3302 | { | 3976 | { |
3303 | unsigned long value; | 3977 | unsigned long value; |
3304 | 3978 | ||
3305 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | ||
3306 | switch (cr) { | 3979 | switch (cr) { |
3307 | case 0: | 3980 | case 0: |
3308 | value = vcpu->arch.cr0; | 3981 | value = kvm_read_cr0(vcpu); |
3309 | break; | 3982 | break; |
3310 | case 2: | 3983 | case 2: |
3311 | value = vcpu->arch.cr2; | 3984 | value = vcpu->arch.cr2; |
@@ -3314,7 +3987,7 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | |||
3314 | value = vcpu->arch.cr3; | 3987 | value = vcpu->arch.cr3; |
3315 | break; | 3988 | break; |
3316 | case 4: | 3989 | case 4: |
3317 | value = vcpu->arch.cr4; | 3990 | value = kvm_read_cr4(vcpu); |
3318 | break; | 3991 | break; |
3319 | case 8: | 3992 | case 8: |
3320 | value = kvm_get_cr8(vcpu); | 3993 | value = kvm_get_cr8(vcpu); |
@@ -3332,8 +4005,8 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | |||
3332 | { | 4005 | { |
3333 | switch (cr) { | 4006 | switch (cr) { |
3334 | case 0: | 4007 | case 0: |
3335 | kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); | 4008 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); |
3336 | *rflags = kvm_x86_ops->get_rflags(vcpu); | 4009 | *rflags = kvm_get_rflags(vcpu); |
3337 | break; | 4010 | break; |
3338 | case 2: | 4011 | case 2: |
3339 | vcpu->arch.cr2 = val; | 4012 | vcpu->arch.cr2 = val; |
@@ -3342,7 +4015,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | |||
3342 | kvm_set_cr3(vcpu, val); | 4015 | kvm_set_cr3(vcpu, val); |
3343 | break; | 4016 | break; |
3344 | case 4: | 4017 | case 4: |
3345 | kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); | 4018 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); |
3346 | break; | 4019 | break; |
3347 | case 8: | 4020 | case 8: |
3348 | kvm_set_cr8(vcpu, val & 0xfUL); | 4021 | kvm_set_cr8(vcpu, val & 0xfUL); |
@@ -3409,6 +4082,7 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | |||
3409 | } | 4082 | } |
3410 | return best; | 4083 | return best; |
3411 | } | 4084 | } |
4085 | EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); | ||
3412 | 4086 | ||
3413 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | 4087 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) |
3414 | { | 4088 | { |
@@ -3453,18 +4127,18 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | |||
3453 | * | 4127 | * |
3454 | * No need to exit to userspace if we already have an interrupt queued. | 4128 | * No need to exit to userspace if we already have an interrupt queued. |
3455 | */ | 4129 | */ |
3456 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, | 4130 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) |
3457 | struct kvm_run *kvm_run) | ||
3458 | { | 4131 | { |
3459 | return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && | 4132 | return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && |
3460 | kvm_run->request_interrupt_window && | 4133 | vcpu->run->request_interrupt_window && |
3461 | kvm_arch_interrupt_allowed(vcpu)); | 4134 | kvm_arch_interrupt_allowed(vcpu)); |
3462 | } | 4135 | } |
3463 | 4136 | ||
3464 | static void post_kvm_run_save(struct kvm_vcpu *vcpu, | 4137 | static void post_kvm_run_save(struct kvm_vcpu *vcpu) |
3465 | struct kvm_run *kvm_run) | ||
3466 | { | 4138 | { |
3467 | kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; | 4139 | struct kvm_run *kvm_run = vcpu->run; |
4140 | |||
4141 | kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; | ||
3468 | kvm_run->cr8 = kvm_get_cr8(vcpu); | 4142 | kvm_run->cr8 = kvm_get_cr8(vcpu); |
3469 | kvm_run->apic_base = kvm_get_apic_base(vcpu); | 4143 | kvm_run->apic_base = kvm_get_apic_base(vcpu); |
3470 | if (irqchip_in_kernel(vcpu->kvm)) | 4144 | if (irqchip_in_kernel(vcpu->kvm)) |
@@ -3492,14 +4166,15 @@ static void vapic_enter(struct kvm_vcpu *vcpu) | |||
3492 | static void vapic_exit(struct kvm_vcpu *vcpu) | 4166 | static void vapic_exit(struct kvm_vcpu *vcpu) |
3493 | { | 4167 | { |
3494 | struct kvm_lapic *apic = vcpu->arch.apic; | 4168 | struct kvm_lapic *apic = vcpu->arch.apic; |
4169 | int idx; | ||
3495 | 4170 | ||
3496 | if (!apic || !apic->vapic_addr) | 4171 | if (!apic || !apic->vapic_addr) |
3497 | return; | 4172 | return; |
3498 | 4173 | ||
3499 | down_read(&vcpu->kvm->slots_lock); | 4174 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
3500 | kvm_release_page_dirty(apic->vapic_page); | 4175 | kvm_release_page_dirty(apic->vapic_page); |
3501 | mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); | 4176 | mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); |
3502 | up_read(&vcpu->kvm->slots_lock); | 4177 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
3503 | } | 4178 | } |
3504 | 4179 | ||
3505 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) | 4180 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) |
@@ -3525,7 +4200,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) | |||
3525 | kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); | 4200 | kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); |
3526 | } | 4201 | } |
3527 | 4202 | ||
3528 | static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 4203 | static void inject_pending_event(struct kvm_vcpu *vcpu) |
3529 | { | 4204 | { |
3530 | /* try to reinject previous events if any */ | 4205 | /* try to reinject previous events if any */ |
3531 | if (vcpu->arch.exception.pending) { | 4206 | if (vcpu->arch.exception.pending) { |
@@ -3561,11 +4236,11 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3561 | } | 4236 | } |
3562 | } | 4237 | } |
3563 | 4238 | ||
3564 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 4239 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
3565 | { | 4240 | { |
3566 | int r; | 4241 | int r; |
3567 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 4242 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
3568 | kvm_run->request_interrupt_window; | 4243 | vcpu->run->request_interrupt_window; |
3569 | 4244 | ||
3570 | if (vcpu->requests) | 4245 | if (vcpu->requests) |
3571 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | 4246 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) |
@@ -3586,21 +4261,26 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3586 | kvm_x86_ops->tlb_flush(vcpu); | 4261 | kvm_x86_ops->tlb_flush(vcpu); |
3587 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, | 4262 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, |
3588 | &vcpu->requests)) { | 4263 | &vcpu->requests)) { |
3589 | kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; | 4264 | vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; |
3590 | r = 0; | 4265 | r = 0; |
3591 | goto out; | 4266 | goto out; |
3592 | } | 4267 | } |
3593 | if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { | 4268 | if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { |
3594 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | 4269 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
3595 | r = 0; | 4270 | r = 0; |
3596 | goto out; | 4271 | goto out; |
3597 | } | 4272 | } |
4273 | if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) { | ||
4274 | vcpu->fpu_active = 0; | ||
4275 | kvm_x86_ops->fpu_deactivate(vcpu); | ||
4276 | } | ||
3598 | } | 4277 | } |
3599 | 4278 | ||
3600 | preempt_disable(); | 4279 | preempt_disable(); |
3601 | 4280 | ||
3602 | kvm_x86_ops->prepare_guest_switch(vcpu); | 4281 | kvm_x86_ops->prepare_guest_switch(vcpu); |
3603 | kvm_load_guest_fpu(vcpu); | 4282 | if (vcpu->fpu_active) |
4283 | kvm_load_guest_fpu(vcpu); | ||
3604 | 4284 | ||
3605 | local_irq_disable(); | 4285 | local_irq_disable(); |
3606 | 4286 | ||
@@ -3615,7 +4295,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3615 | goto out; | 4295 | goto out; |
3616 | } | 4296 | } |
3617 | 4297 | ||
3618 | inject_pending_event(vcpu, kvm_run); | 4298 | inject_pending_event(vcpu); |
3619 | 4299 | ||
3620 | /* enable NMI/IRQ window open exits if needed */ | 4300 | /* enable NMI/IRQ window open exits if needed */ |
3621 | if (vcpu->arch.nmi_pending) | 4301 | if (vcpu->arch.nmi_pending) |
@@ -3628,7 +4308,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3628 | kvm_lapic_sync_to_vapic(vcpu); | 4308 | kvm_lapic_sync_to_vapic(vcpu); |
3629 | } | 4309 | } |
3630 | 4310 | ||
3631 | up_read(&vcpu->kvm->slots_lock); | 4311 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
3632 | 4312 | ||
3633 | kvm_guest_enter(); | 4313 | kvm_guest_enter(); |
3634 | 4314 | ||
@@ -3641,16 +4321,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3641 | } | 4321 | } |
3642 | 4322 | ||
3643 | trace_kvm_entry(vcpu->vcpu_id); | 4323 | trace_kvm_entry(vcpu->vcpu_id); |
3644 | kvm_x86_ops->run(vcpu, kvm_run); | 4324 | kvm_x86_ops->run(vcpu); |
3645 | 4325 | ||
3646 | if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { | 4326 | /* |
3647 | set_debugreg(current->thread.debugreg0, 0); | 4327 | * If the guest has used debug registers, at least dr7 |
3648 | set_debugreg(current->thread.debugreg1, 1); | 4328 | * will be disabled while returning to the host. |
3649 | set_debugreg(current->thread.debugreg2, 2); | 4329 | * If we don't have active breakpoints in the host, we don't |
3650 | set_debugreg(current->thread.debugreg3, 3); | 4330 | * care about the messed up debug address registers. But if |
3651 | set_debugreg(current->thread.debugreg6, 6); | 4331 | * we have some of them active, restore the old state. |
3652 | set_debugreg(current->thread.debugreg7, 7); | 4332 | */ |
3653 | } | 4333 | if (hw_breakpoint_active()) |
4334 | hw_breakpoint_restore(); | ||
3654 | 4335 | ||
3655 | set_bit(KVM_REQ_KICK, &vcpu->requests); | 4336 | set_bit(KVM_REQ_KICK, &vcpu->requests); |
3656 | local_irq_enable(); | 4337 | local_irq_enable(); |
@@ -3669,7 +4350,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3669 | 4350 | ||
3670 | preempt_enable(); | 4351 | preempt_enable(); |
3671 | 4352 | ||
3672 | down_read(&vcpu->kvm->slots_lock); | 4353 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
3673 | 4354 | ||
3674 | /* | 4355 | /* |
3675 | * Profile KVM exit RIPs: | 4356 | * Profile KVM exit RIPs: |
@@ -3682,15 +4363,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3682 | 4363 | ||
3683 | kvm_lapic_sync_from_vapic(vcpu); | 4364 | kvm_lapic_sync_from_vapic(vcpu); |
3684 | 4365 | ||
3685 | r = kvm_x86_ops->handle_exit(kvm_run, vcpu); | 4366 | r = kvm_x86_ops->handle_exit(vcpu); |
3686 | out: | 4367 | out: |
3687 | return r; | 4368 | return r; |
3688 | } | 4369 | } |
3689 | 4370 | ||
3690 | 4371 | ||
3691 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 4372 | static int __vcpu_run(struct kvm_vcpu *vcpu) |
3692 | { | 4373 | { |
3693 | int r; | 4374 | int r; |
4375 | struct kvm *kvm = vcpu->kvm; | ||
3694 | 4376 | ||
3695 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { | 4377 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { |
3696 | pr_debug("vcpu %d received sipi with vector # %x\n", | 4378 | pr_debug("vcpu %d received sipi with vector # %x\n", |
@@ -3702,17 +4384,17 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3702 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 4384 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
3703 | } | 4385 | } |
3704 | 4386 | ||
3705 | down_read(&vcpu->kvm->slots_lock); | 4387 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
3706 | vapic_enter(vcpu); | 4388 | vapic_enter(vcpu); |
3707 | 4389 | ||
3708 | r = 1; | 4390 | r = 1; |
3709 | while (r > 0) { | 4391 | while (r > 0) { |
3710 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | 4392 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) |
3711 | r = vcpu_enter_guest(vcpu, kvm_run); | 4393 | r = vcpu_enter_guest(vcpu); |
3712 | else { | 4394 | else { |
3713 | up_read(&vcpu->kvm->slots_lock); | 4395 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
3714 | kvm_vcpu_block(vcpu); | 4396 | kvm_vcpu_block(vcpu); |
3715 | down_read(&vcpu->kvm->slots_lock); | 4397 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
3716 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) | 4398 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) |
3717 | { | 4399 | { |
3718 | switch(vcpu->arch.mp_state) { | 4400 | switch(vcpu->arch.mp_state) { |
@@ -3736,25 +4418,25 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3736 | if (kvm_cpu_has_pending_timer(vcpu)) | 4418 | if (kvm_cpu_has_pending_timer(vcpu)) |
3737 | kvm_inject_pending_timer_irqs(vcpu); | 4419 | kvm_inject_pending_timer_irqs(vcpu); |
3738 | 4420 | ||
3739 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | 4421 | if (dm_request_for_irq_injection(vcpu)) { |
3740 | r = -EINTR; | 4422 | r = -EINTR; |
3741 | kvm_run->exit_reason = KVM_EXIT_INTR; | 4423 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
3742 | ++vcpu->stat.request_irq_exits; | 4424 | ++vcpu->stat.request_irq_exits; |
3743 | } | 4425 | } |
3744 | if (signal_pending(current)) { | 4426 | if (signal_pending(current)) { |
3745 | r = -EINTR; | 4427 | r = -EINTR; |
3746 | kvm_run->exit_reason = KVM_EXIT_INTR; | 4428 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
3747 | ++vcpu->stat.signal_exits; | 4429 | ++vcpu->stat.signal_exits; |
3748 | } | 4430 | } |
3749 | if (need_resched()) { | 4431 | if (need_resched()) { |
3750 | up_read(&vcpu->kvm->slots_lock); | 4432 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
3751 | kvm_resched(vcpu); | 4433 | kvm_resched(vcpu); |
3752 | down_read(&vcpu->kvm->slots_lock); | 4434 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
3753 | } | 4435 | } |
3754 | } | 4436 | } |
3755 | 4437 | ||
3756 | up_read(&vcpu->kvm->slots_lock); | 4438 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
3757 | post_kvm_run_save(vcpu, kvm_run); | 4439 | post_kvm_run_save(vcpu); |
3758 | 4440 | ||
3759 | vapic_exit(vcpu); | 4441 | vapic_exit(vcpu); |
3760 | 4442 | ||
@@ -3783,21 +4465,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3783 | kvm_set_cr8(vcpu, kvm_run->cr8); | 4465 | kvm_set_cr8(vcpu, kvm_run->cr8); |
3784 | 4466 | ||
3785 | if (vcpu->arch.pio.cur_count) { | 4467 | if (vcpu->arch.pio.cur_count) { |
4468 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
3786 | r = complete_pio(vcpu); | 4469 | r = complete_pio(vcpu); |
4470 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
3787 | if (r) | 4471 | if (r) |
3788 | goto out; | 4472 | goto out; |
3789 | } | 4473 | } |
3790 | #if CONFIG_HAS_IOMEM | ||
3791 | if (vcpu->mmio_needed) { | 4474 | if (vcpu->mmio_needed) { |
3792 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | 4475 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); |
3793 | vcpu->mmio_read_completed = 1; | 4476 | vcpu->mmio_read_completed = 1; |
3794 | vcpu->mmio_needed = 0; | 4477 | vcpu->mmio_needed = 0; |
3795 | 4478 | ||
3796 | down_read(&vcpu->kvm->slots_lock); | 4479 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
3797 | r = emulate_instruction(vcpu, kvm_run, | 4480 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, |
3798 | vcpu->arch.mmio_fault_cr2, 0, | ||
3799 | EMULTYPE_NO_DECODE); | 4481 | EMULTYPE_NO_DECODE); |
3800 | up_read(&vcpu->kvm->slots_lock); | 4482 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
3801 | if (r == EMULATE_DO_MMIO) { | 4483 | if (r == EMULATE_DO_MMIO) { |
3802 | /* | 4484 | /* |
3803 | * Read-modify-write. Back to userspace. | 4485 | * Read-modify-write. Back to userspace. |
@@ -3806,12 +4488,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3806 | goto out; | 4488 | goto out; |
3807 | } | 4489 | } |
3808 | } | 4490 | } |
3809 | #endif | ||
3810 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) | 4491 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) |
3811 | kvm_register_write(vcpu, VCPU_REGS_RAX, | 4492 | kvm_register_write(vcpu, VCPU_REGS_RAX, |
3812 | kvm_run->hypercall.ret); | 4493 | kvm_run->hypercall.ret); |
3813 | 4494 | ||
3814 | r = __vcpu_run(vcpu, kvm_run); | 4495 | r = __vcpu_run(vcpu); |
3815 | 4496 | ||
3816 | out: | 4497 | out: |
3817 | if (vcpu->sigset_active) | 4498 | if (vcpu->sigset_active) |
@@ -3845,13 +4526,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
3845 | #endif | 4526 | #endif |
3846 | 4527 | ||
3847 | regs->rip = kvm_rip_read(vcpu); | 4528 | regs->rip = kvm_rip_read(vcpu); |
3848 | regs->rflags = kvm_x86_ops->get_rflags(vcpu); | 4529 | regs->rflags = kvm_get_rflags(vcpu); |
3849 | |||
3850 | /* | ||
3851 | * Don't leak debug flags in case they were set for guest debugging | ||
3852 | */ | ||
3853 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
3854 | regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
3855 | 4530 | ||
3856 | vcpu_put(vcpu); | 4531 | vcpu_put(vcpu); |
3857 | 4532 | ||
@@ -3879,12 +4554,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
3879 | kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); | 4554 | kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); |
3880 | kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); | 4555 | kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); |
3881 | kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); | 4556 | kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); |
3882 | |||
3883 | #endif | 4557 | #endif |
3884 | 4558 | ||
3885 | kvm_rip_write(vcpu, regs->rip); | 4559 | kvm_rip_write(vcpu, regs->rip); |
3886 | kvm_x86_ops->set_rflags(vcpu, regs->rflags); | 4560 | kvm_set_rflags(vcpu, regs->rflags); |
3887 | |||
3888 | 4561 | ||
3889 | vcpu->arch.exception.pending = false; | 4562 | vcpu->arch.exception.pending = false; |
3890 | 4563 | ||
@@ -3933,13 +4606,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
3933 | sregs->gdt.limit = dt.limit; | 4606 | sregs->gdt.limit = dt.limit; |
3934 | sregs->gdt.base = dt.base; | 4607 | sregs->gdt.base = dt.base; |
3935 | 4608 | ||
3936 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | 4609 | sregs->cr0 = kvm_read_cr0(vcpu); |
3937 | sregs->cr0 = vcpu->arch.cr0; | ||
3938 | sregs->cr2 = vcpu->arch.cr2; | 4610 | sregs->cr2 = vcpu->arch.cr2; |
3939 | sregs->cr3 = vcpu->arch.cr3; | 4611 | sregs->cr3 = vcpu->arch.cr3; |
3940 | sregs->cr4 = vcpu->arch.cr4; | 4612 | sregs->cr4 = kvm_read_cr4(vcpu); |
3941 | sregs->cr8 = kvm_get_cr8(vcpu); | 4613 | sregs->cr8 = kvm_get_cr8(vcpu); |
3942 | sregs->efer = vcpu->arch.shadow_efer; | 4614 | sregs->efer = vcpu->arch.efer; |
3943 | sregs->apic_base = kvm_get_apic_base(vcpu); | 4615 | sregs->apic_base = kvm_get_apic_base(vcpu); |
3944 | 4616 | ||
3945 | memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); | 4617 | memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); |
@@ -4027,14 +4699,23 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
4027 | { | 4699 | { |
4028 | struct descriptor_table dtable; | 4700 | struct descriptor_table dtable; |
4029 | u16 index = selector >> 3; | 4701 | u16 index = selector >> 3; |
4702 | int ret; | ||
4703 | u32 err; | ||
4704 | gva_t addr; | ||
4030 | 4705 | ||
4031 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | 4706 | get_segment_descriptor_dtable(vcpu, selector, &dtable); |
4032 | 4707 | ||
4033 | if (dtable.limit < index * 8 + 7) { | 4708 | if (dtable.limit < index * 8 + 7) { |
4034 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | 4709 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); |
4035 | return 1; | 4710 | return X86EMUL_PROPAGATE_FAULT; |
4036 | } | 4711 | } |
4037 | return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); | 4712 | addr = dtable.base + index * 8; |
4713 | ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc), | ||
4714 | vcpu, &err); | ||
4715 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
4716 | kvm_inject_page_fault(vcpu, addr, err); | ||
4717 | |||
4718 | return ret; | ||
4038 | } | 4719 | } |
4039 | 4720 | ||
4040 | /* allowed just for 8 bytes segments */ | 4721 | /* allowed just for 8 bytes segments */ |
@@ -4048,15 +4729,23 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
4048 | 4729 | ||
4049 | if (dtable.limit < index * 8 + 7) | 4730 | if (dtable.limit < index * 8 + 7) |
4050 | return 1; | 4731 | return 1; |
4051 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); | 4732 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL); |
4733 | } | ||
4734 | |||
4735 | static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu, | ||
4736 | struct desc_struct *seg_desc) | ||
4737 | { | ||
4738 | u32 base_addr = get_desc_base(seg_desc); | ||
4739 | |||
4740 | return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL); | ||
4052 | } | 4741 | } |
4053 | 4742 | ||
4054 | static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu, | 4743 | static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, |
4055 | struct desc_struct *seg_desc) | 4744 | struct desc_struct *seg_desc) |
4056 | { | 4745 | { |
4057 | u32 base_addr = get_desc_base(seg_desc); | 4746 | u32 base_addr = get_desc_base(seg_desc); |
4058 | 4747 | ||
4059 | return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); | 4748 | return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); |
4060 | } | 4749 | } |
4061 | 4750 | ||
4062 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | 4751 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) |
@@ -4067,18 +4756,6 @@ static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | |||
4067 | return kvm_seg.selector; | 4756 | return kvm_seg.selector; |
4068 | } | 4757 | } |
4069 | 4758 | ||
4070 | static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu, | ||
4071 | u16 selector, | ||
4072 | struct kvm_segment *kvm_seg) | ||
4073 | { | ||
4074 | struct desc_struct seg_desc; | ||
4075 | |||
4076 | if (load_guest_segment_descriptor(vcpu, selector, &seg_desc)) | ||
4077 | return 1; | ||
4078 | seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg); | ||
4079 | return 0; | ||
4080 | } | ||
4081 | |||
4082 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) | 4759 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) |
4083 | { | 4760 | { |
4084 | struct kvm_segment segvar = { | 4761 | struct kvm_segment segvar = { |
@@ -4096,34 +4773,122 @@ static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int se | |||
4096 | .unusable = 0, | 4773 | .unusable = 0, |
4097 | }; | 4774 | }; |
4098 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); | 4775 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); |
4099 | return 0; | 4776 | return X86EMUL_CONTINUE; |
4100 | } | 4777 | } |
4101 | 4778 | ||
4102 | static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | 4779 | static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) |
4103 | { | 4780 | { |
4104 | return (seg != VCPU_SREG_LDTR) && | 4781 | return (seg != VCPU_SREG_LDTR) && |
4105 | (seg != VCPU_SREG_TR) && | 4782 | (seg != VCPU_SREG_TR) && |
4106 | (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM); | 4783 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); |
4107 | } | 4784 | } |
4108 | 4785 | ||
4109 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 4786 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg) |
4110 | int type_bits, int seg) | ||
4111 | { | 4787 | { |
4112 | struct kvm_segment kvm_seg; | 4788 | struct kvm_segment kvm_seg; |
4789 | struct desc_struct seg_desc; | ||
4790 | u8 dpl, rpl, cpl; | ||
4791 | unsigned err_vec = GP_VECTOR; | ||
4792 | u32 err_code = 0; | ||
4793 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
4794 | int ret; | ||
4113 | 4795 | ||
4114 | if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE)) | 4796 | if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) |
4115 | return kvm_load_realmode_segment(vcpu, selector, seg); | 4797 | return kvm_load_realmode_segment(vcpu, selector, seg); |
4116 | if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg)) | ||
4117 | return 1; | ||
4118 | kvm_seg.type |= type_bits; | ||
4119 | 4798 | ||
4120 | if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS && | 4799 | /* NULL selector is not valid for TR, CS and SS */ |
4121 | seg != VCPU_SREG_LDTR) | 4800 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) |
4122 | if (!kvm_seg.s) | 4801 | && null_selector) |
4123 | kvm_seg.unusable = 1; | 4802 | goto exception; |
4803 | |||
4804 | /* TR should be in GDT only */ | ||
4805 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | ||
4806 | goto exception; | ||
4807 | |||
4808 | ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4809 | if (ret) | ||
4810 | return ret; | ||
4811 | |||
4812 | seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg); | ||
4813 | |||
4814 | if (null_selector) { /* for NULL selector skip all following checks */ | ||
4815 | kvm_seg.unusable = 1; | ||
4816 | goto load; | ||
4817 | } | ||
4818 | |||
4819 | err_code = selector & 0xfffc; | ||
4820 | err_vec = GP_VECTOR; | ||
4124 | 4821 | ||
4822 | /* can't load system descriptor into segment selecor */ | ||
4823 | if (seg <= VCPU_SREG_GS && !kvm_seg.s) | ||
4824 | goto exception; | ||
4825 | |||
4826 | if (!kvm_seg.present) { | ||
4827 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
4828 | goto exception; | ||
4829 | } | ||
4830 | |||
4831 | rpl = selector & 3; | ||
4832 | dpl = kvm_seg.dpl; | ||
4833 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
4834 | |||
4835 | switch (seg) { | ||
4836 | case VCPU_SREG_SS: | ||
4837 | /* | ||
4838 | * segment is not a writable data segment or segment | ||
4839 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
4840 | */ | ||
4841 | if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl) | ||
4842 | goto exception; | ||
4843 | break; | ||
4844 | case VCPU_SREG_CS: | ||
4845 | if (!(kvm_seg.type & 8)) | ||
4846 | goto exception; | ||
4847 | |||
4848 | if (kvm_seg.type & 4) { | ||
4849 | /* conforming */ | ||
4850 | if (dpl > cpl) | ||
4851 | goto exception; | ||
4852 | } else { | ||
4853 | /* nonconforming */ | ||
4854 | if (rpl > cpl || dpl != cpl) | ||
4855 | goto exception; | ||
4856 | } | ||
4857 | /* CS(RPL) <- CPL */ | ||
4858 | selector = (selector & 0xfffc) | cpl; | ||
4859 | break; | ||
4860 | case VCPU_SREG_TR: | ||
4861 | if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9)) | ||
4862 | goto exception; | ||
4863 | break; | ||
4864 | case VCPU_SREG_LDTR: | ||
4865 | if (kvm_seg.s || kvm_seg.type != 2) | ||
4866 | goto exception; | ||
4867 | break; | ||
4868 | default: /* DS, ES, FS, or GS */ | ||
4869 | /* | ||
4870 | * segment is not a data or readable code segment or | ||
4871 | * ((segment is a data or nonconforming code segment) | ||
4872 | * and (both RPL and CPL > DPL)) | ||
4873 | */ | ||
4874 | if ((kvm_seg.type & 0xa) == 0x8 || | ||
4875 | (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl))) | ||
4876 | goto exception; | ||
4877 | break; | ||
4878 | } | ||
4879 | |||
4880 | if (!kvm_seg.unusable && kvm_seg.s) { | ||
4881 | /* mark segment as accessed */ | ||
4882 | kvm_seg.type |= 1; | ||
4883 | seg_desc.type |= 1; | ||
4884 | save_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4885 | } | ||
4886 | load: | ||
4125 | kvm_set_segment(vcpu, &kvm_seg, seg); | 4887 | kvm_set_segment(vcpu, &kvm_seg, seg); |
4126 | return 0; | 4888 | return X86EMUL_CONTINUE; |
4889 | exception: | ||
4890 | kvm_queue_exception_e(vcpu, err_vec, err_code); | ||
4891 | return X86EMUL_PROPAGATE_FAULT; | ||
4127 | } | 4892 | } |
4128 | 4893 | ||
4129 | static void save_state_to_tss32(struct kvm_vcpu *vcpu, | 4894 | static void save_state_to_tss32(struct kvm_vcpu *vcpu, |
@@ -4131,7 +4896,7 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, | |||
4131 | { | 4896 | { |
4132 | tss->cr3 = vcpu->arch.cr3; | 4897 | tss->cr3 = vcpu->arch.cr3; |
4133 | tss->eip = kvm_rip_read(vcpu); | 4898 | tss->eip = kvm_rip_read(vcpu); |
4134 | tss->eflags = kvm_x86_ops->get_rflags(vcpu); | 4899 | tss->eflags = kvm_get_rflags(vcpu); |
4135 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 4900 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
4136 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 4901 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
4137 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); | 4902 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); |
@@ -4149,13 +4914,21 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, | |||
4149 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); | 4914 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); |
4150 | } | 4915 | } |
4151 | 4916 | ||
4917 | static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg) | ||
4918 | { | ||
4919 | struct kvm_segment kvm_seg; | ||
4920 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
4921 | kvm_seg.selector = sel; | ||
4922 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
4923 | } | ||
4924 | |||
4152 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, | 4925 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, |
4153 | struct tss_segment_32 *tss) | 4926 | struct tss_segment_32 *tss) |
4154 | { | 4927 | { |
4155 | kvm_set_cr3(vcpu, tss->cr3); | 4928 | kvm_set_cr3(vcpu, tss->cr3); |
4156 | 4929 | ||
4157 | kvm_rip_write(vcpu, tss->eip); | 4930 | kvm_rip_write(vcpu, tss->eip); |
4158 | kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); | 4931 | kvm_set_rflags(vcpu, tss->eflags | 2); |
4159 | 4932 | ||
4160 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); | 4933 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); |
4161 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); | 4934 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); |
@@ -4166,25 +4939,41 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, | |||
4166 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); | 4939 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); |
4167 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); | 4940 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); |
4168 | 4941 | ||
4169 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) | 4942 | /* |
4943 | * SDM says that segment selectors are loaded before segment | ||
4944 | * descriptors | ||
4945 | */ | ||
4946 | kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR); | ||
4947 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
4948 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
4949 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
4950 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
4951 | kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS); | ||
4952 | kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS); | ||
4953 | |||
4954 | /* | ||
4955 | * Now load segment descriptors. If fault happenes at this stage | ||
4956 | * it is handled in a context of new task | ||
4957 | */ | ||
4958 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR)) | ||
4170 | return 1; | 4959 | return 1; |
4171 | 4960 | ||
4172 | if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) | 4961 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) |
4173 | return 1; | 4962 | return 1; |
4174 | 4963 | ||
4175 | if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) | 4964 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) |
4176 | return 1; | 4965 | return 1; |
4177 | 4966 | ||
4178 | if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) | 4967 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) |
4179 | return 1; | 4968 | return 1; |
4180 | 4969 | ||
4181 | if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) | 4970 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) |
4182 | return 1; | 4971 | return 1; |
4183 | 4972 | ||
4184 | if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) | 4973 | if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS)) |
4185 | return 1; | 4974 | return 1; |
4186 | 4975 | ||
4187 | if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) | 4976 | if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS)) |
4188 | return 1; | 4977 | return 1; |
4189 | return 0; | 4978 | return 0; |
4190 | } | 4979 | } |
@@ -4193,7 +4982,7 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu, | |||
4193 | struct tss_segment_16 *tss) | 4982 | struct tss_segment_16 *tss) |
4194 | { | 4983 | { |
4195 | tss->ip = kvm_rip_read(vcpu); | 4984 | tss->ip = kvm_rip_read(vcpu); |
4196 | tss->flag = kvm_x86_ops->get_rflags(vcpu); | 4985 | tss->flag = kvm_get_rflags(vcpu); |
4197 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 4986 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
4198 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 4987 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
4199 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); | 4988 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); |
@@ -4208,14 +4997,13 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu, | |||
4208 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | 4997 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); |
4209 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | 4998 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); |
4210 | tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); | 4999 | tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); |
4211 | tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
4212 | } | 5000 | } |
4213 | 5001 | ||
4214 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, | 5002 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, |
4215 | struct tss_segment_16 *tss) | 5003 | struct tss_segment_16 *tss) |
4216 | { | 5004 | { |
4217 | kvm_rip_write(vcpu, tss->ip); | 5005 | kvm_rip_write(vcpu, tss->ip); |
4218 | kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); | 5006 | kvm_set_rflags(vcpu, tss->flag | 2); |
4219 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); | 5007 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); |
4220 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); | 5008 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); |
4221 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); | 5009 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); |
@@ -4225,19 +5013,33 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, | |||
4225 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); | 5013 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); |
4226 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); | 5014 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); |
4227 | 5015 | ||
4228 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) | 5016 | /* |
5017 | * SDM says that segment selectors are loaded before segment | ||
5018 | * descriptors | ||
5019 | */ | ||
5020 | kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR); | ||
5021 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
5022 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
5023 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
5024 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
5025 | |||
5026 | /* | ||
5027 | * Now load segment descriptors. If fault happenes at this stage | ||
5028 | * it is handled in a context of new task | ||
5029 | */ | ||
5030 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR)) | ||
4229 | return 1; | 5031 | return 1; |
4230 | 5032 | ||
4231 | if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) | 5033 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) |
4232 | return 1; | 5034 | return 1; |
4233 | 5035 | ||
4234 | if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) | 5036 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) |
4235 | return 1; | 5037 | return 1; |
4236 | 5038 | ||
4237 | if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) | 5039 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) |
4238 | return 1; | 5040 | return 1; |
4239 | 5041 | ||
4240 | if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) | 5042 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) |
4241 | return 1; | 5043 | return 1; |
4242 | return 0; | 5044 | return 0; |
4243 | } | 5045 | } |
@@ -4259,7 +5061,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4259 | sizeof tss_segment_16)) | 5061 | sizeof tss_segment_16)) |
4260 | goto out; | 5062 | goto out; |
4261 | 5063 | ||
4262 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | 5064 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), |
4263 | &tss_segment_16, sizeof tss_segment_16)) | 5065 | &tss_segment_16, sizeof tss_segment_16)) |
4264 | goto out; | 5066 | goto out; |
4265 | 5067 | ||
@@ -4267,7 +5069,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4267 | tss_segment_16.prev_task_link = old_tss_sel; | 5069 | tss_segment_16.prev_task_link = old_tss_sel; |
4268 | 5070 | ||
4269 | if (kvm_write_guest(vcpu->kvm, | 5071 | if (kvm_write_guest(vcpu->kvm, |
4270 | get_tss_base_addr(vcpu, nseg_desc), | 5072 | get_tss_base_addr_write(vcpu, nseg_desc), |
4271 | &tss_segment_16.prev_task_link, | 5073 | &tss_segment_16.prev_task_link, |
4272 | sizeof tss_segment_16.prev_task_link)) | 5074 | sizeof tss_segment_16.prev_task_link)) |
4273 | goto out; | 5075 | goto out; |
@@ -4298,7 +5100,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4298 | sizeof tss_segment_32)) | 5100 | sizeof tss_segment_32)) |
4299 | goto out; | 5101 | goto out; |
4300 | 5102 | ||
4301 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | 5103 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), |
4302 | &tss_segment_32, sizeof tss_segment_32)) | 5104 | &tss_segment_32, sizeof tss_segment_32)) |
4303 | goto out; | 5105 | goto out; |
4304 | 5106 | ||
@@ -4306,7 +5108,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4306 | tss_segment_32.prev_task_link = old_tss_sel; | 5108 | tss_segment_32.prev_task_link = old_tss_sel; |
4307 | 5109 | ||
4308 | if (kvm_write_guest(vcpu->kvm, | 5110 | if (kvm_write_guest(vcpu->kvm, |
4309 | get_tss_base_addr(vcpu, nseg_desc), | 5111 | get_tss_base_addr_write(vcpu, nseg_desc), |
4310 | &tss_segment_32.prev_task_link, | 5112 | &tss_segment_32.prev_task_link, |
4311 | sizeof tss_segment_32.prev_task_link)) | 5113 | sizeof tss_segment_32.prev_task_link)) |
4312 | goto out; | 5114 | goto out; |
@@ -4328,8 +5130,9 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4328 | int ret = 0; | 5130 | int ret = 0; |
4329 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); | 5131 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); |
4330 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); | 5132 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); |
5133 | u32 desc_limit; | ||
4331 | 5134 | ||
4332 | old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); | 5135 | old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); |
4333 | 5136 | ||
4334 | /* FIXME: Handle errors. Failure to read either TSS or their | 5137 | /* FIXME: Handle errors. Failure to read either TSS or their |
4335 | * descriptors should generate a pagefault. | 5138 | * descriptors should generate a pagefault. |
@@ -4350,7 +5153,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4350 | } | 5153 | } |
4351 | } | 5154 | } |
4352 | 5155 | ||
4353 | if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { | 5156 | desc_limit = get_desc_limit(&nseg_desc); |
5157 | if (!nseg_desc.p || | ||
5158 | ((desc_limit < 0x67 && (nseg_desc.type & 8)) || | ||
5159 | desc_limit < 0x2b)) { | ||
4354 | kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); | 5160 | kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); |
4355 | return 1; | 5161 | return 1; |
4356 | } | 5162 | } |
@@ -4361,8 +5167,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4361 | } | 5167 | } |
4362 | 5168 | ||
4363 | if (reason == TASK_SWITCH_IRET) { | 5169 | if (reason == TASK_SWITCH_IRET) { |
4364 | u32 eflags = kvm_x86_ops->get_rflags(vcpu); | 5170 | u32 eflags = kvm_get_rflags(vcpu); |
4365 | kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); | 5171 | kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); |
4366 | } | 5172 | } |
4367 | 5173 | ||
4368 | /* set back link to prev task only if NT bit is set in eflags | 5174 | /* set back link to prev task only if NT bit is set in eflags |
@@ -4370,11 +5176,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4370 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | 5176 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) |
4371 | old_tss_sel = 0xffff; | 5177 | old_tss_sel = 0xffff; |
4372 | 5178 | ||
4373 | /* set back link to prev task only if NT bit is set in eflags | ||
4374 | note that old_tss_sel is not used afetr this point */ | ||
4375 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
4376 | old_tss_sel = 0xffff; | ||
4377 | |||
4378 | if (nseg_desc.type & 8) | 5179 | if (nseg_desc.type & 8) |
4379 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, | 5180 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, |
4380 | old_tss_base, &nseg_desc); | 5181 | old_tss_base, &nseg_desc); |
@@ -4383,8 +5184,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4383 | old_tss_base, &nseg_desc); | 5184 | old_tss_base, &nseg_desc); |
4384 | 5185 | ||
4385 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | 5186 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { |
4386 | u32 eflags = kvm_x86_ops->get_rflags(vcpu); | 5187 | u32 eflags = kvm_get_rflags(vcpu); |
4387 | kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT); | 5188 | kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT); |
4388 | } | 5189 | } |
4389 | 5190 | ||
4390 | if (reason != TASK_SWITCH_IRET) { | 5191 | if (reason != TASK_SWITCH_IRET) { |
@@ -4393,7 +5194,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4393 | &nseg_desc); | 5194 | &nseg_desc); |
4394 | } | 5195 | } |
4395 | 5196 | ||
4396 | kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); | 5197 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS); |
4397 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); | 5198 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); |
4398 | tr_seg.type = 11; | 5199 | tr_seg.type = 11; |
4399 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); | 5200 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); |
@@ -4424,20 +5225,20 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4424 | 5225 | ||
4425 | kvm_set_cr8(vcpu, sregs->cr8); | 5226 | kvm_set_cr8(vcpu, sregs->cr8); |
4426 | 5227 | ||
4427 | mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer; | 5228 | mmu_reset_needed |= vcpu->arch.efer != sregs->efer; |
4428 | kvm_x86_ops->set_efer(vcpu, sregs->efer); | 5229 | kvm_x86_ops->set_efer(vcpu, sregs->efer); |
4429 | kvm_set_apic_base(vcpu, sregs->apic_base); | 5230 | kvm_set_apic_base(vcpu, sregs->apic_base); |
4430 | 5231 | ||
4431 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | 5232 | mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; |
4432 | |||
4433 | mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0; | ||
4434 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); | 5233 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); |
4435 | vcpu->arch.cr0 = sregs->cr0; | 5234 | vcpu->arch.cr0 = sregs->cr0; |
4436 | 5235 | ||
4437 | mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; | 5236 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; |
4438 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 5237 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
4439 | if (!is_long_mode(vcpu) && is_pae(vcpu)) | 5238 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
4440 | load_pdptrs(vcpu, vcpu->arch.cr3); | 5239 | load_pdptrs(vcpu, vcpu->arch.cr3); |
5240 | mmu_reset_needed = 1; | ||
5241 | } | ||
4441 | 5242 | ||
4442 | if (mmu_reset_needed) | 5243 | if (mmu_reset_needed) |
4443 | kvm_mmu_reset_context(vcpu); | 5244 | kvm_mmu_reset_context(vcpu); |
@@ -4467,7 +5268,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4467 | /* Older userspace won't unhalt the vcpu on reset. */ | 5268 | /* Older userspace won't unhalt the vcpu on reset. */ |
4468 | if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && | 5269 | if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && |
4469 | sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && | 5270 | sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && |
4470 | !(vcpu->arch.cr0 & X86_CR0_PE)) | 5271 | !is_protmode(vcpu)) |
4471 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 5272 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
4472 | 5273 | ||
4473 | vcpu_put(vcpu); | 5274 | vcpu_put(vcpu); |
@@ -4478,12 +5279,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4478 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | 5279 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, |
4479 | struct kvm_guest_debug *dbg) | 5280 | struct kvm_guest_debug *dbg) |
4480 | { | 5281 | { |
5282 | unsigned long rflags; | ||
4481 | int i, r; | 5283 | int i, r; |
4482 | 5284 | ||
4483 | vcpu_load(vcpu); | 5285 | vcpu_load(vcpu); |
4484 | 5286 | ||
4485 | if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) == | 5287 | if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { |
4486 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) { | 5288 | r = -EBUSY; |
5289 | if (vcpu->arch.exception.pending) | ||
5290 | goto unlock_out; | ||
5291 | if (dbg->control & KVM_GUESTDBG_INJECT_DB) | ||
5292 | kvm_queue_exception(vcpu, DB_VECTOR); | ||
5293 | else | ||
5294 | kvm_queue_exception(vcpu, BP_VECTOR); | ||
5295 | } | ||
5296 | |||
5297 | /* | ||
5298 | * Read rflags as long as potentially injected trace flags are still | ||
5299 | * filtered out. | ||
5300 | */ | ||
5301 | rflags = kvm_get_rflags(vcpu); | ||
5302 | |||
5303 | vcpu->guest_debug = dbg->control; | ||
5304 | if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) | ||
5305 | vcpu->guest_debug = 0; | ||
5306 | |||
5307 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { | ||
4487 | for (i = 0; i < KVM_NR_DB_REGS; ++i) | 5308 | for (i = 0; i < KVM_NR_DB_REGS; ++i) |
4488 | vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; | 5309 | vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; |
4489 | vcpu->arch.switch_db_regs = | 5310 | vcpu->arch.switch_db_regs = |
@@ -4494,13 +5315,23 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
4494 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); | 5315 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); |
4495 | } | 5316 | } |
4496 | 5317 | ||
4497 | r = kvm_x86_ops->set_guest_debug(vcpu, dbg); | 5318 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { |
5319 | vcpu->arch.singlestep_cs = | ||
5320 | get_segment_selector(vcpu, VCPU_SREG_CS); | ||
5321 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu); | ||
5322 | } | ||
5323 | |||
5324 | /* | ||
5325 | * Trigger an rflags update that will inject or remove the trace | ||
5326 | * flags. | ||
5327 | */ | ||
5328 | kvm_set_rflags(vcpu, rflags); | ||
4498 | 5329 | ||
4499 | if (dbg->control & KVM_GUESTDBG_INJECT_DB) | 5330 | kvm_x86_ops->set_guest_debug(vcpu, dbg); |
4500 | kvm_queue_exception(vcpu, DB_VECTOR); | ||
4501 | else if (dbg->control & KVM_GUESTDBG_INJECT_BP) | ||
4502 | kvm_queue_exception(vcpu, BP_VECTOR); | ||
4503 | 5331 | ||
5332 | r = 0; | ||
5333 | |||
5334 | unlock_out: | ||
4504 | vcpu_put(vcpu); | 5335 | vcpu_put(vcpu); |
4505 | 5336 | ||
4506 | return r; | 5337 | return r; |
@@ -4535,11 +5366,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
4535 | { | 5366 | { |
4536 | unsigned long vaddr = tr->linear_address; | 5367 | unsigned long vaddr = tr->linear_address; |
4537 | gpa_t gpa; | 5368 | gpa_t gpa; |
5369 | int idx; | ||
4538 | 5370 | ||
4539 | vcpu_load(vcpu); | 5371 | vcpu_load(vcpu); |
4540 | down_read(&vcpu->kvm->slots_lock); | 5372 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
4541 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); | 5373 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); |
4542 | up_read(&vcpu->kvm->slots_lock); | 5374 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
4543 | tr->physical_address = gpa; | 5375 | tr->physical_address = gpa; |
4544 | tr->valid = gpa != UNMAPPED_GVA; | 5376 | tr->valid = gpa != UNMAPPED_GVA; |
4545 | tr->writeable = 1; | 5377 | tr->writeable = 1; |
@@ -4620,14 +5452,14 @@ EXPORT_SYMBOL_GPL(fx_init); | |||
4620 | 5452 | ||
4621 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) | 5453 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) |
4622 | { | 5454 | { |
4623 | if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) | 5455 | if (vcpu->guest_fpu_loaded) |
4624 | return; | 5456 | return; |
4625 | 5457 | ||
4626 | vcpu->guest_fpu_loaded = 1; | 5458 | vcpu->guest_fpu_loaded = 1; |
4627 | kvm_fx_save(&vcpu->arch.host_fx_image); | 5459 | kvm_fx_save(&vcpu->arch.host_fx_image); |
4628 | kvm_fx_restore(&vcpu->arch.guest_fx_image); | 5460 | kvm_fx_restore(&vcpu->arch.guest_fx_image); |
5461 | trace_kvm_fpu(1); | ||
4629 | } | 5462 | } |
4630 | EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); | ||
4631 | 5463 | ||
4632 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | 5464 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) |
4633 | { | 5465 | { |
@@ -4638,8 +5470,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | |||
4638 | kvm_fx_save(&vcpu->arch.guest_fx_image); | 5470 | kvm_fx_save(&vcpu->arch.guest_fx_image); |
4639 | kvm_fx_restore(&vcpu->arch.host_fx_image); | 5471 | kvm_fx_restore(&vcpu->arch.host_fx_image); |
4640 | ++vcpu->stat.fpu_reload; | 5472 | ++vcpu->stat.fpu_reload; |
5473 | set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests); | ||
5474 | trace_kvm_fpu(0); | ||
4641 | } | 5475 | } |
4642 | EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); | ||
4643 | 5476 | ||
4644 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | 5477 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) |
4645 | { | 5478 | { |
@@ -4701,14 +5534,26 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4701 | return kvm_x86_ops->vcpu_reset(vcpu); | 5534 | return kvm_x86_ops->vcpu_reset(vcpu); |
4702 | } | 5535 | } |
4703 | 5536 | ||
4704 | void kvm_arch_hardware_enable(void *garbage) | 5537 | int kvm_arch_hardware_enable(void *garbage) |
4705 | { | 5538 | { |
4706 | kvm_x86_ops->hardware_enable(garbage); | 5539 | /* |
5540 | * Since this may be called from a hotplug notifcation, | ||
5541 | * we can't get the CPU frequency directly. | ||
5542 | */ | ||
5543 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | ||
5544 | int cpu = raw_smp_processor_id(); | ||
5545 | per_cpu(cpu_tsc_khz, cpu) = 0; | ||
5546 | } | ||
5547 | |||
5548 | kvm_shared_msr_cpu_online(); | ||
5549 | |||
5550 | return kvm_x86_ops->hardware_enable(garbage); | ||
4707 | } | 5551 | } |
4708 | 5552 | ||
4709 | void kvm_arch_hardware_disable(void *garbage) | 5553 | void kvm_arch_hardware_disable(void *garbage) |
4710 | { | 5554 | { |
4711 | kvm_x86_ops->hardware_disable(garbage); | 5555 | kvm_x86_ops->hardware_disable(garbage); |
5556 | drop_user_return_notifiers(garbage); | ||
4712 | } | 5557 | } |
4713 | 5558 | ||
4714 | int kvm_arch_hardware_setup(void) | 5559 | int kvm_arch_hardware_setup(void) |
@@ -4762,12 +5607,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
4762 | GFP_KERNEL); | 5607 | GFP_KERNEL); |
4763 | if (!vcpu->arch.mce_banks) { | 5608 | if (!vcpu->arch.mce_banks) { |
4764 | r = -ENOMEM; | 5609 | r = -ENOMEM; |
4765 | goto fail_mmu_destroy; | 5610 | goto fail_free_lapic; |
4766 | } | 5611 | } |
4767 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; | 5612 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; |
4768 | 5613 | ||
4769 | return 0; | 5614 | return 0; |
4770 | 5615 | fail_free_lapic: | |
5616 | kvm_free_lapic(vcpu); | ||
4771 | fail_mmu_destroy: | 5617 | fail_mmu_destroy: |
4772 | kvm_mmu_destroy(vcpu); | 5618 | kvm_mmu_destroy(vcpu); |
4773 | fail_free_pio_data: | 5619 | fail_free_pio_data: |
@@ -4778,10 +5624,13 @@ fail: | |||
4778 | 5624 | ||
4779 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | 5625 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) |
4780 | { | 5626 | { |
5627 | int idx; | ||
5628 | |||
5629 | kfree(vcpu->arch.mce_banks); | ||
4781 | kvm_free_lapic(vcpu); | 5630 | kvm_free_lapic(vcpu); |
4782 | down_read(&vcpu->kvm->slots_lock); | 5631 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
4783 | kvm_mmu_destroy(vcpu); | 5632 | kvm_mmu_destroy(vcpu); |
4784 | up_read(&vcpu->kvm->slots_lock); | 5633 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
4785 | free_page((unsigned long)vcpu->arch.pio_data); | 5634 | free_page((unsigned long)vcpu->arch.pio_data); |
4786 | } | 5635 | } |
4787 | 5636 | ||
@@ -4792,6 +5641,12 @@ struct kvm *kvm_arch_create_vm(void) | |||
4792 | if (!kvm) | 5641 | if (!kvm) |
4793 | return ERR_PTR(-ENOMEM); | 5642 | return ERR_PTR(-ENOMEM); |
4794 | 5643 | ||
5644 | kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
5645 | if (!kvm->arch.aliases) { | ||
5646 | kfree(kvm); | ||
5647 | return ERR_PTR(-ENOMEM); | ||
5648 | } | ||
5649 | |||
4795 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 5650 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
4796 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 5651 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
4797 | 5652 | ||
@@ -4848,16 +5703,18 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
4848 | put_page(kvm->arch.apic_access_page); | 5703 | put_page(kvm->arch.apic_access_page); |
4849 | if (kvm->arch.ept_identity_pagetable) | 5704 | if (kvm->arch.ept_identity_pagetable) |
4850 | put_page(kvm->arch.ept_identity_pagetable); | 5705 | put_page(kvm->arch.ept_identity_pagetable); |
5706 | cleanup_srcu_struct(&kvm->srcu); | ||
5707 | kfree(kvm->arch.aliases); | ||
4851 | kfree(kvm); | 5708 | kfree(kvm); |
4852 | } | 5709 | } |
4853 | 5710 | ||
4854 | int kvm_arch_set_memory_region(struct kvm *kvm, | 5711 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
4855 | struct kvm_userspace_memory_region *mem, | 5712 | struct kvm_memory_slot *memslot, |
4856 | struct kvm_memory_slot old, | 5713 | struct kvm_memory_slot old, |
5714 | struct kvm_userspace_memory_region *mem, | ||
4857 | int user_alloc) | 5715 | int user_alloc) |
4858 | { | 5716 | { |
4859 | int npages = mem->memory_size >> PAGE_SHIFT; | 5717 | int npages = memslot->npages; |
4860 | struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; | ||
4861 | 5718 | ||
4862 | /*To keep backward compatibility with older userspace, | 5719 | /*To keep backward compatibility with older userspace, |
4863 | *x86 needs to hanlde !user_alloc case. | 5720 | *x86 needs to hanlde !user_alloc case. |
@@ -4877,26 +5734,35 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
4877 | if (IS_ERR((void *)userspace_addr)) | 5734 | if (IS_ERR((void *)userspace_addr)) |
4878 | return PTR_ERR((void *)userspace_addr); | 5735 | return PTR_ERR((void *)userspace_addr); |
4879 | 5736 | ||
4880 | /* set userspace_addr atomically for kvm_hva_to_rmapp */ | ||
4881 | spin_lock(&kvm->mmu_lock); | ||
4882 | memslot->userspace_addr = userspace_addr; | 5737 | memslot->userspace_addr = userspace_addr; |
4883 | spin_unlock(&kvm->mmu_lock); | ||
4884 | } else { | ||
4885 | if (!old.user_alloc && old.rmap) { | ||
4886 | int ret; | ||
4887 | |||
4888 | down_write(¤t->mm->mmap_sem); | ||
4889 | ret = do_munmap(current->mm, old.userspace_addr, | ||
4890 | old.npages * PAGE_SIZE); | ||
4891 | up_write(¤t->mm->mmap_sem); | ||
4892 | if (ret < 0) | ||
4893 | printk(KERN_WARNING | ||
4894 | "kvm_vm_ioctl_set_memory_region: " | ||
4895 | "failed to munmap memory\n"); | ||
4896 | } | ||
4897 | } | 5738 | } |
4898 | } | 5739 | } |
4899 | 5740 | ||
5741 | |||
5742 | return 0; | ||
5743 | } | ||
5744 | |||
5745 | void kvm_arch_commit_memory_region(struct kvm *kvm, | ||
5746 | struct kvm_userspace_memory_region *mem, | ||
5747 | struct kvm_memory_slot old, | ||
5748 | int user_alloc) | ||
5749 | { | ||
5750 | |||
5751 | int npages = mem->memory_size >> PAGE_SHIFT; | ||
5752 | |||
5753 | if (!user_alloc && !old.user_alloc && old.rmap && !npages) { | ||
5754 | int ret; | ||
5755 | |||
5756 | down_write(¤t->mm->mmap_sem); | ||
5757 | ret = do_munmap(current->mm, old.userspace_addr, | ||
5758 | old.npages * PAGE_SIZE); | ||
5759 | up_write(¤t->mm->mmap_sem); | ||
5760 | if (ret < 0) | ||
5761 | printk(KERN_WARNING | ||
5762 | "kvm_vm_ioctl_set_memory_region: " | ||
5763 | "failed to munmap memory\n"); | ||
5764 | } | ||
5765 | |||
4900 | spin_lock(&kvm->mmu_lock); | 5766 | spin_lock(&kvm->mmu_lock); |
4901 | if (!kvm->arch.n_requested_mmu_pages) { | 5767 | if (!kvm->arch.n_requested_mmu_pages) { |
4902 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | 5768 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); |
@@ -4905,8 +5771,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
4905 | 5771 | ||
4906 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 5772 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
4907 | spin_unlock(&kvm->mmu_lock); | 5773 | spin_unlock(&kvm->mmu_lock); |
4908 | |||
4909 | return 0; | ||
4910 | } | 5774 | } |
4911 | 5775 | ||
4912 | void kvm_arch_flush_shadow(struct kvm *kvm) | 5776 | void kvm_arch_flush_shadow(struct kvm *kvm) |
@@ -4946,8 +5810,36 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
4946 | return kvm_x86_ops->interrupt_allowed(vcpu); | 5810 | return kvm_x86_ops->interrupt_allowed(vcpu); |
4947 | } | 5811 | } |
4948 | 5812 | ||
5813 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) | ||
5814 | { | ||
5815 | unsigned long rflags; | ||
5816 | |||
5817 | rflags = kvm_x86_ops->get_rflags(vcpu); | ||
5818 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
5819 | rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
5820 | return rflags; | ||
5821 | } | ||
5822 | EXPORT_SYMBOL_GPL(kvm_get_rflags); | ||
5823 | |||
5824 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | ||
5825 | { | ||
5826 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && | ||
5827 | vcpu->arch.singlestep_cs == | ||
5828 | get_segment_selector(vcpu, VCPU_SREG_CS) && | ||
5829 | vcpu->arch.singlestep_rip == kvm_rip_read(vcpu)) | ||
5830 | rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
5831 | kvm_x86_ops->set_rflags(vcpu, rflags); | ||
5832 | } | ||
5833 | EXPORT_SYMBOL_GPL(kvm_set_rflags); | ||
5834 | |||
4949 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); | 5835 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); |
4950 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); | 5836 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); |
4951 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); | 5837 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); |
4952 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); | 5838 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); |
4953 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); | 5839 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); |
5840 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); | ||
5841 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); | ||
5842 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); | ||
5843 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); | ||
5844 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); | ||
5845 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 5eadea585d2a..2d101639bd8d 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define ARCH_X86_KVM_X86_H | 2 | #define ARCH_X86_KVM_X86_H |
3 | 3 | ||
4 | #include <linux/kvm_host.h> | 4 | #include <linux/kvm_host.h> |
5 | #include "kvm_cache_regs.h" | ||
5 | 6 | ||
6 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) | 7 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) |
7 | { | 8 | { |
@@ -35,4 +36,33 @@ static inline bool kvm_exception_is_soft(unsigned int nr) | |||
35 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | 36 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, |
36 | u32 function, u32 index); | 37 | u32 function, u32 index); |
37 | 38 | ||
39 | static inline bool is_protmode(struct kvm_vcpu *vcpu) | ||
40 | { | ||
41 | return kvm_read_cr0_bits(vcpu, X86_CR0_PE); | ||
42 | } | ||
43 | |||
44 | static inline int is_long_mode(struct kvm_vcpu *vcpu) | ||
45 | { | ||
46 | #ifdef CONFIG_X86_64 | ||
47 | return vcpu->arch.efer & EFER_LMA; | ||
48 | #else | ||
49 | return 0; | ||
50 | #endif | ||
51 | } | ||
52 | |||
53 | static inline int is_pae(struct kvm_vcpu *vcpu) | ||
54 | { | ||
55 | return kvm_read_cr4_bits(vcpu, X86_CR4_PAE); | ||
56 | } | ||
57 | |||
58 | static inline int is_pse(struct kvm_vcpu *vcpu) | ||
59 | { | ||
60 | return kvm_read_cr4_bits(vcpu, X86_CR4_PSE); | ||
61 | } | ||
62 | |||
63 | static inline int is_paging(struct kvm_vcpu *vcpu) | ||
64 | { | ||
65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | ||
66 | } | ||
67 | |||
38 | #endif | 68 | #endif |