diff options
Diffstat (limited to 'drivers/kvm/x86.c')
-rw-r--r-- | drivers/kvm/x86.c | 224 |
1 files changed, 224 insertions, 0 deletions
diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c index c26e3715bee8..a728af8a83e5 100644 --- a/drivers/kvm/x86.c +++ b/drivers/kvm/x86.c | |||
@@ -27,6 +27,17 @@ | |||
27 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
28 | 28 | ||
29 | #define MAX_IO_MSRS 256 | 29 | #define MAX_IO_MSRS 256 |
30 | #define CR0_RESERVED_BITS \ | ||
31 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | ||
32 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | ||
33 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) | ||
34 | #define CR4_RESERVED_BITS \ | ||
35 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | ||
36 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | ||
37 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ | ||
38 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | ||
39 | |||
40 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | ||
30 | 41 | ||
31 | unsigned long segment_base(u16 selector) | 42 | unsigned long segment_base(u16 selector) |
32 | { | 43 | { |
@@ -78,6 +89,219 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) | |||
78 | } | 89 | } |
79 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); | 90 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); |
80 | 91 | ||
92 | static void inject_gp(struct kvm_vcpu *vcpu) | ||
93 | { | ||
94 | kvm_x86_ops->inject_gp(vcpu, 0); | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Load the pae pdptrs. Return true is they are all valid. | ||
99 | */ | ||
100 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) | ||
101 | { | ||
102 | gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; | ||
103 | unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; | ||
104 | int i; | ||
105 | int ret; | ||
106 | u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)]; | ||
107 | |||
108 | mutex_lock(&vcpu->kvm->lock); | ||
109 | ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, | ||
110 | offset * sizeof(u64), sizeof(pdpte)); | ||
111 | if (ret < 0) { | ||
112 | ret = 0; | ||
113 | goto out; | ||
114 | } | ||
115 | for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { | ||
116 | if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) { | ||
117 | ret = 0; | ||
118 | goto out; | ||
119 | } | ||
120 | } | ||
121 | ret = 1; | ||
122 | |||
123 | memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs)); | ||
124 | out: | ||
125 | mutex_unlock(&vcpu->kvm->lock); | ||
126 | |||
127 | return ret; | ||
128 | } | ||
129 | |||
130 | void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | ||
131 | { | ||
132 | if (cr0 & CR0_RESERVED_BITS) { | ||
133 | printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", | ||
134 | cr0, vcpu->cr0); | ||
135 | inject_gp(vcpu); | ||
136 | return; | ||
137 | } | ||
138 | |||
139 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { | ||
140 | printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); | ||
141 | inject_gp(vcpu); | ||
142 | return; | ||
143 | } | ||
144 | |||
145 | if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { | ||
146 | printk(KERN_DEBUG "set_cr0: #GP, set PG flag " | ||
147 | "and a clear PE flag\n"); | ||
148 | inject_gp(vcpu); | ||
149 | return; | ||
150 | } | ||
151 | |||
152 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | ||
153 | #ifdef CONFIG_X86_64 | ||
154 | if ((vcpu->shadow_efer & EFER_LME)) { | ||
155 | int cs_db, cs_l; | ||
156 | |||
157 | if (!is_pae(vcpu)) { | ||
158 | printk(KERN_DEBUG "set_cr0: #GP, start paging " | ||
159 | "in long mode while PAE is disabled\n"); | ||
160 | inject_gp(vcpu); | ||
161 | return; | ||
162 | } | ||
163 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | ||
164 | if (cs_l) { | ||
165 | printk(KERN_DEBUG "set_cr0: #GP, start paging " | ||
166 | "in long mode while CS.L == 1\n"); | ||
167 | inject_gp(vcpu); | ||
168 | return; | ||
169 | |||
170 | } | ||
171 | } else | ||
172 | #endif | ||
173 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) { | ||
174 | printk(KERN_DEBUG "set_cr0: #GP, pdptrs " | ||
175 | "reserved bits\n"); | ||
176 | inject_gp(vcpu); | ||
177 | return; | ||
178 | } | ||
179 | |||
180 | } | ||
181 | |||
182 | kvm_x86_ops->set_cr0(vcpu, cr0); | ||
183 | vcpu->cr0 = cr0; | ||
184 | |||
185 | mutex_lock(&vcpu->kvm->lock); | ||
186 | kvm_mmu_reset_context(vcpu); | ||
187 | mutex_unlock(&vcpu->kvm->lock); | ||
188 | return; | ||
189 | } | ||
190 | EXPORT_SYMBOL_GPL(set_cr0); | ||
191 | |||
192 | void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | ||
193 | { | ||
194 | set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f)); | ||
195 | } | ||
196 | EXPORT_SYMBOL_GPL(lmsw); | ||
197 | |||
198 | void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | ||
199 | { | ||
200 | if (cr4 & CR4_RESERVED_BITS) { | ||
201 | printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); | ||
202 | inject_gp(vcpu); | ||
203 | return; | ||
204 | } | ||
205 | |||
206 | if (is_long_mode(vcpu)) { | ||
207 | if (!(cr4 & X86_CR4_PAE)) { | ||
208 | printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " | ||
209 | "in long mode\n"); | ||
210 | inject_gp(vcpu); | ||
211 | return; | ||
212 | } | ||
213 | } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE) | ||
214 | && !load_pdptrs(vcpu, vcpu->cr3)) { | ||
215 | printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); | ||
216 | inject_gp(vcpu); | ||
217 | return; | ||
218 | } | ||
219 | |||
220 | if (cr4 & X86_CR4_VMXE) { | ||
221 | printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); | ||
222 | inject_gp(vcpu); | ||
223 | return; | ||
224 | } | ||
225 | kvm_x86_ops->set_cr4(vcpu, cr4); | ||
226 | vcpu->cr4 = cr4; | ||
227 | mutex_lock(&vcpu->kvm->lock); | ||
228 | kvm_mmu_reset_context(vcpu); | ||
229 | mutex_unlock(&vcpu->kvm->lock); | ||
230 | } | ||
231 | EXPORT_SYMBOL_GPL(set_cr4); | ||
232 | |||
233 | void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | ||
234 | { | ||
235 | if (is_long_mode(vcpu)) { | ||
236 | if (cr3 & CR3_L_MODE_RESERVED_BITS) { | ||
237 | printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); | ||
238 | inject_gp(vcpu); | ||
239 | return; | ||
240 | } | ||
241 | } else { | ||
242 | if (is_pae(vcpu)) { | ||
243 | if (cr3 & CR3_PAE_RESERVED_BITS) { | ||
244 | printk(KERN_DEBUG | ||
245 | "set_cr3: #GP, reserved bits\n"); | ||
246 | inject_gp(vcpu); | ||
247 | return; | ||
248 | } | ||
249 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { | ||
250 | printk(KERN_DEBUG "set_cr3: #GP, pdptrs " | ||
251 | "reserved bits\n"); | ||
252 | inject_gp(vcpu); | ||
253 | return; | ||
254 | } | ||
255 | } | ||
256 | /* | ||
257 | * We don't check reserved bits in nonpae mode, because | ||
258 | * this isn't enforced, and VMware depends on this. | ||
259 | */ | ||
260 | } | ||
261 | |||
262 | mutex_lock(&vcpu->kvm->lock); | ||
263 | /* | ||
264 | * Does the new cr3 value map to physical memory? (Note, we | ||
265 | * catch an invalid cr3 even in real-mode, because it would | ||
266 | * cause trouble later on when we turn on paging anyway.) | ||
267 | * | ||
268 | * A real CPU would silently accept an invalid cr3 and would | ||
269 | * attempt to use it - with largely undefined (and often hard | ||
270 | * to debug) behavior on the guest side. | ||
271 | */ | ||
272 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) | ||
273 | inject_gp(vcpu); | ||
274 | else { | ||
275 | vcpu->cr3 = cr3; | ||
276 | vcpu->mmu.new_cr3(vcpu); | ||
277 | } | ||
278 | mutex_unlock(&vcpu->kvm->lock); | ||
279 | } | ||
280 | EXPORT_SYMBOL_GPL(set_cr3); | ||
281 | |||
282 | void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | ||
283 | { | ||
284 | if (cr8 & CR8_RESERVED_BITS) { | ||
285 | printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); | ||
286 | inject_gp(vcpu); | ||
287 | return; | ||
288 | } | ||
289 | if (irqchip_in_kernel(vcpu->kvm)) | ||
290 | kvm_lapic_set_tpr(vcpu, cr8); | ||
291 | else | ||
292 | vcpu->cr8 = cr8; | ||
293 | } | ||
294 | EXPORT_SYMBOL_GPL(set_cr8); | ||
295 | |||
296 | unsigned long get_cr8(struct kvm_vcpu *vcpu) | ||
297 | { | ||
298 | if (irqchip_in_kernel(vcpu->kvm)) | ||
299 | return kvm_lapic_get_cr8(vcpu); | ||
300 | else | ||
301 | return vcpu->cr8; | ||
302 | } | ||
303 | EXPORT_SYMBOL_GPL(get_cr8); | ||
304 | |||
81 | /* | 305 | /* |
82 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS | 306 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS |
83 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. | 307 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. |