aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/kvm')
-rw-r--r--drivers/kvm/kvm_main.c219
-rw-r--r--drivers/kvm/x86.c224
-rw-r--r--drivers/kvm/x86.h2
3 files changed, 225 insertions, 220 deletions
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index e8972a82544b..8f7125710d02 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -90,17 +90,6 @@ static struct kvm_stats_debugfs_item {
90 90
91static struct dentry *debugfs_dir; 91static struct dentry *debugfs_dir;
92 92
93#define CR0_RESERVED_BITS \
94 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
95 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
96 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
97#define CR4_RESERVED_BITS \
98 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
99 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
100 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
101 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
102
103#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
104#define EFER_RESERVED_BITS 0xfffffffffffff2fe 93#define EFER_RESERVED_BITS 0xfffffffffffff2fe
105 94
106static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 95static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
@@ -348,214 +337,6 @@ static void inject_gp(struct kvm_vcpu *vcpu)
348 kvm_x86_ops->inject_gp(vcpu, 0); 337 kvm_x86_ops->inject_gp(vcpu, 0);
349} 338}
350 339
351/*
352 * Load the pae pdptrs. Return true is they are all valid.
353 */
354static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
355{
356 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
357 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
358 int i;
359 int ret;
360 u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)];
361
362 mutex_lock(&vcpu->kvm->lock);
363 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
364 offset * sizeof(u64), sizeof(pdpte));
365 if (ret < 0) {
366 ret = 0;
367 goto out;
368 }
369 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
370 if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
371 ret = 0;
372 goto out;
373 }
374 }
375 ret = 1;
376
377 memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs));
378out:
379 mutex_unlock(&vcpu->kvm->lock);
380
381 return ret;
382}
383
384void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
385{
386 if (cr0 & CR0_RESERVED_BITS) {
387 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
388 cr0, vcpu->cr0);
389 inject_gp(vcpu);
390 return;
391 }
392
393 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
394 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
395 inject_gp(vcpu);
396 return;
397 }
398
399 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
400 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
401 "and a clear PE flag\n");
402 inject_gp(vcpu);
403 return;
404 }
405
406 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
407#ifdef CONFIG_X86_64
408 if ((vcpu->shadow_efer & EFER_LME)) {
409 int cs_db, cs_l;
410
411 if (!is_pae(vcpu)) {
412 printk(KERN_DEBUG "set_cr0: #GP, start paging "
413 "in long mode while PAE is disabled\n");
414 inject_gp(vcpu);
415 return;
416 }
417 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
418 if (cs_l) {
419 printk(KERN_DEBUG "set_cr0: #GP, start paging "
420 "in long mode while CS.L == 1\n");
421 inject_gp(vcpu);
422 return;
423
424 }
425 } else
426#endif
427 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) {
428 printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
429 "reserved bits\n");
430 inject_gp(vcpu);
431 return;
432 }
433
434 }
435
436 kvm_x86_ops->set_cr0(vcpu, cr0);
437 vcpu->cr0 = cr0;
438
439 mutex_lock(&vcpu->kvm->lock);
440 kvm_mmu_reset_context(vcpu);
441 mutex_unlock(&vcpu->kvm->lock);
442 return;
443}
444EXPORT_SYMBOL_GPL(set_cr0);
445
446void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
447{
448 set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
449}
450EXPORT_SYMBOL_GPL(lmsw);
451
452void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
453{
454 if (cr4 & CR4_RESERVED_BITS) {
455 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
456 inject_gp(vcpu);
457 return;
458 }
459
460 if (is_long_mode(vcpu)) {
461 if (!(cr4 & X86_CR4_PAE)) {
462 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
463 "in long mode\n");
464 inject_gp(vcpu);
465 return;
466 }
467 } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
468 && !load_pdptrs(vcpu, vcpu->cr3)) {
469 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
470 inject_gp(vcpu);
471 return;
472 }
473
474 if (cr4 & X86_CR4_VMXE) {
475 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
476 inject_gp(vcpu);
477 return;
478 }
479 kvm_x86_ops->set_cr4(vcpu, cr4);
480 vcpu->cr4 = cr4;
481 mutex_lock(&vcpu->kvm->lock);
482 kvm_mmu_reset_context(vcpu);
483 mutex_unlock(&vcpu->kvm->lock);
484}
485EXPORT_SYMBOL_GPL(set_cr4);
486
487void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
488{
489 if (is_long_mode(vcpu)) {
490 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
491 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
492 inject_gp(vcpu);
493 return;
494 }
495 } else {
496 if (is_pae(vcpu)) {
497 if (cr3 & CR3_PAE_RESERVED_BITS) {
498 printk(KERN_DEBUG
499 "set_cr3: #GP, reserved bits\n");
500 inject_gp(vcpu);
501 return;
502 }
503 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
504 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
505 "reserved bits\n");
506 inject_gp(vcpu);
507 return;
508 }
509 }
510 /*
511 * We don't check reserved bits in nonpae mode, because
512 * this isn't enforced, and VMware depends on this.
513 */
514 }
515
516 mutex_lock(&vcpu->kvm->lock);
517 /*
518 * Does the new cr3 value map to physical memory? (Note, we
519 * catch an invalid cr3 even in real-mode, because it would
520 * cause trouble later on when we turn on paging anyway.)
521 *
522 * A real CPU would silently accept an invalid cr3 and would
523 * attempt to use it - with largely undefined (and often hard
524 * to debug) behavior on the guest side.
525 */
526 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
527 inject_gp(vcpu);
528 else {
529 vcpu->cr3 = cr3;
530 vcpu->mmu.new_cr3(vcpu);
531 }
532 mutex_unlock(&vcpu->kvm->lock);
533}
534EXPORT_SYMBOL_GPL(set_cr3);
535
536void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
537{
538 if (cr8 & CR8_RESERVED_BITS) {
539 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
540 inject_gp(vcpu);
541 return;
542 }
543 if (irqchip_in_kernel(vcpu->kvm))
544 kvm_lapic_set_tpr(vcpu, cr8);
545 else
546 vcpu->cr8 = cr8;
547}
548EXPORT_SYMBOL_GPL(set_cr8);
549
550unsigned long get_cr8(struct kvm_vcpu *vcpu)
551{
552 if (irqchip_in_kernel(vcpu->kvm))
553 return kvm_lapic_get_cr8(vcpu);
554 else
555 return vcpu->cr8;
556}
557EXPORT_SYMBOL_GPL(get_cr8);
558
559void fx_init(struct kvm_vcpu *vcpu) 340void fx_init(struct kvm_vcpu *vcpu)
560{ 341{
561 unsigned after_mxcsr_mask; 342 unsigned after_mxcsr_mask;
diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c
index c26e3715bee8..a728af8a83e5 100644
--- a/drivers/kvm/x86.c
+++ b/drivers/kvm/x86.c
@@ -27,6 +27,17 @@
27#include <asm/uaccess.h> 27#include <asm/uaccess.h>
28 28
29#define MAX_IO_MSRS 256 29#define MAX_IO_MSRS 256
30#define CR0_RESERVED_BITS \
31 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
32 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
33 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
34#define CR4_RESERVED_BITS \
35 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
36 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
37 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
38 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
39
40#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
30 41
31unsigned long segment_base(u16 selector) 42unsigned long segment_base(u16 selector)
32{ 43{
@@ -78,6 +89,219 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
78} 89}
79EXPORT_SYMBOL_GPL(kvm_set_apic_base); 90EXPORT_SYMBOL_GPL(kvm_set_apic_base);
80 91
92static void inject_gp(struct kvm_vcpu *vcpu)
93{
94 kvm_x86_ops->inject_gp(vcpu, 0);
95}
96
97/*
98 * Load the pae pdptrs. Return true is they are all valid.
99 */
100int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
101{
102 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
103 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
104 int i;
105 int ret;
106 u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)];
107
108 mutex_lock(&vcpu->kvm->lock);
109 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
110 offset * sizeof(u64), sizeof(pdpte));
111 if (ret < 0) {
112 ret = 0;
113 goto out;
114 }
115 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
116 if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
117 ret = 0;
118 goto out;
119 }
120 }
121 ret = 1;
122
123 memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs));
124out:
125 mutex_unlock(&vcpu->kvm->lock);
126
127 return ret;
128}
129
130void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
131{
132 if (cr0 & CR0_RESERVED_BITS) {
133 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
134 cr0, vcpu->cr0);
135 inject_gp(vcpu);
136 return;
137 }
138
139 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
140 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
141 inject_gp(vcpu);
142 return;
143 }
144
145 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
146 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
147 "and a clear PE flag\n");
148 inject_gp(vcpu);
149 return;
150 }
151
152 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
153#ifdef CONFIG_X86_64
154 if ((vcpu->shadow_efer & EFER_LME)) {
155 int cs_db, cs_l;
156
157 if (!is_pae(vcpu)) {
158 printk(KERN_DEBUG "set_cr0: #GP, start paging "
159 "in long mode while PAE is disabled\n");
160 inject_gp(vcpu);
161 return;
162 }
163 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
164 if (cs_l) {
165 printk(KERN_DEBUG "set_cr0: #GP, start paging "
166 "in long mode while CS.L == 1\n");
167 inject_gp(vcpu);
168 return;
169
170 }
171 } else
172#endif
173 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) {
174 printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
175 "reserved bits\n");
176 inject_gp(vcpu);
177 return;
178 }
179
180 }
181
182 kvm_x86_ops->set_cr0(vcpu, cr0);
183 vcpu->cr0 = cr0;
184
185 mutex_lock(&vcpu->kvm->lock);
186 kvm_mmu_reset_context(vcpu);
187 mutex_unlock(&vcpu->kvm->lock);
188 return;
189}
190EXPORT_SYMBOL_GPL(set_cr0);
191
192void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
193{
194 set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
195}
196EXPORT_SYMBOL_GPL(lmsw);
197
198void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
199{
200 if (cr4 & CR4_RESERVED_BITS) {
201 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
202 inject_gp(vcpu);
203 return;
204 }
205
206 if (is_long_mode(vcpu)) {
207 if (!(cr4 & X86_CR4_PAE)) {
208 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
209 "in long mode\n");
210 inject_gp(vcpu);
211 return;
212 }
213 } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
214 && !load_pdptrs(vcpu, vcpu->cr3)) {
215 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
216 inject_gp(vcpu);
217 return;
218 }
219
220 if (cr4 & X86_CR4_VMXE) {
221 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
222 inject_gp(vcpu);
223 return;
224 }
225 kvm_x86_ops->set_cr4(vcpu, cr4);
226 vcpu->cr4 = cr4;
227 mutex_lock(&vcpu->kvm->lock);
228 kvm_mmu_reset_context(vcpu);
229 mutex_unlock(&vcpu->kvm->lock);
230}
231EXPORT_SYMBOL_GPL(set_cr4);
232
233void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
234{
235 if (is_long_mode(vcpu)) {
236 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
237 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
238 inject_gp(vcpu);
239 return;
240 }
241 } else {
242 if (is_pae(vcpu)) {
243 if (cr3 & CR3_PAE_RESERVED_BITS) {
244 printk(KERN_DEBUG
245 "set_cr3: #GP, reserved bits\n");
246 inject_gp(vcpu);
247 return;
248 }
249 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
250 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
251 "reserved bits\n");
252 inject_gp(vcpu);
253 return;
254 }
255 }
256 /*
257 * We don't check reserved bits in nonpae mode, because
258 * this isn't enforced, and VMware depends on this.
259 */
260 }
261
262 mutex_lock(&vcpu->kvm->lock);
263 /*
264 * Does the new cr3 value map to physical memory? (Note, we
265 * catch an invalid cr3 even in real-mode, because it would
266 * cause trouble later on when we turn on paging anyway.)
267 *
268 * A real CPU would silently accept an invalid cr3 and would
269 * attempt to use it - with largely undefined (and often hard
270 * to debug) behavior on the guest side.
271 */
272 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
273 inject_gp(vcpu);
274 else {
275 vcpu->cr3 = cr3;
276 vcpu->mmu.new_cr3(vcpu);
277 }
278 mutex_unlock(&vcpu->kvm->lock);
279}
280EXPORT_SYMBOL_GPL(set_cr3);
281
282void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
283{
284 if (cr8 & CR8_RESERVED_BITS) {
285 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
286 inject_gp(vcpu);
287 return;
288 }
289 if (irqchip_in_kernel(vcpu->kvm))
290 kvm_lapic_set_tpr(vcpu, cr8);
291 else
292 vcpu->cr8 = cr8;
293}
294EXPORT_SYMBOL_GPL(set_cr8);
295
296unsigned long get_cr8(struct kvm_vcpu *vcpu)
297{
298 if (irqchip_in_kernel(vcpu->kvm))
299 return kvm_lapic_get_cr8(vcpu);
300 else
301 return vcpu->cr8;
302}
303EXPORT_SYMBOL_GPL(get_cr8);
304
81/* 305/*
82 * List of msr numbers which we expose to userspace through KVM_GET_MSRS 306 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
83 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. 307 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
diff --git a/drivers/kvm/x86.h b/drivers/kvm/x86.h
index 20da8e9751c0..5592456c36ad 100644
--- a/drivers/kvm/x86.h
+++ b/drivers/kvm/x86.h
@@ -125,5 +125,5 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
125 return vcpu->cr0 & X86_CR0_PG; 125 return vcpu->cr0 & X86_CR0_PG;
126} 126}
127 127
128 128int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
129#endif 129#endif