aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c897
1 files changed, 793 insertions, 104 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b01552bd1f1..0ce556372a4d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -15,10 +15,12 @@
15 */ 15 */
16 16
17#include <linux/kvm_host.h> 17#include <linux/kvm_host.h>
18#include "segment_descriptor.h"
19#include "irq.h" 18#include "irq.h"
20#include "mmu.h" 19#include "mmu.h"
20#include "i8254.h"
21#include "tss.h"
21 22
23#include <linux/clocksource.h>
22#include <linux/kvm.h> 24#include <linux/kvm.h>
23#include <linux/fs.h> 25#include <linux/fs.h>
24#include <linux/vmalloc.h> 26#include <linux/vmalloc.h>
@@ -28,6 +30,7 @@
28 30
29#include <asm/uaccess.h> 31#include <asm/uaccess.h>
30#include <asm/msr.h> 32#include <asm/msr.h>
33#include <asm/desc.h>
31 34
32#define MAX_IO_MSRS 256 35#define MAX_IO_MSRS 256
33#define CR0_RESERVED_BITS \ 36#define CR0_RESERVED_BITS \
@@ -41,7 +44,15 @@
41 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) 44 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
42 45
43#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) 46#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
44#define EFER_RESERVED_BITS 0xfffffffffffff2fe 47/* EFER defaults:
48 * - enable syscall per default because its emulated by KVM
49 * - enable LME and LMA per default on 64 bit KVM
50 */
51#ifdef CONFIG_X86_64
52static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL;
53#else
54static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
55#endif
45 56
46#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 57#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
47#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 58#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
@@ -63,6 +74,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
63 { "irq_window", VCPU_STAT(irq_window_exits) }, 74 { "irq_window", VCPU_STAT(irq_window_exits) },
64 { "halt_exits", VCPU_STAT(halt_exits) }, 75 { "halt_exits", VCPU_STAT(halt_exits) },
65 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 76 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 { "hypercalls", VCPU_STAT(hypercalls) },
66 { "request_irq", VCPU_STAT(request_irq_exits) }, 78 { "request_irq", VCPU_STAT(request_irq_exits) },
67 { "irq_exits", VCPU_STAT(irq_exits) }, 79 { "irq_exits", VCPU_STAT(irq_exits) },
68 { "host_state_reload", VCPU_STAT(host_state_reload) }, 80 { "host_state_reload", VCPU_STAT(host_state_reload) },
@@ -78,6 +90,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
78 { "mmu_recycled", VM_STAT(mmu_recycled) }, 90 { "mmu_recycled", VM_STAT(mmu_recycled) },
79 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, 91 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
80 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, 92 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
93 { "largepages", VM_STAT(lpages) },
81 { NULL } 94 { NULL }
82}; 95};
83 96
@@ -85,7 +98,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
85unsigned long segment_base(u16 selector) 98unsigned long segment_base(u16 selector)
86{ 99{
87 struct descriptor_table gdt; 100 struct descriptor_table gdt;
88 struct segment_descriptor *d; 101 struct desc_struct *d;
89 unsigned long table_base; 102 unsigned long table_base;
90 unsigned long v; 103 unsigned long v;
91 104
@@ -101,13 +114,12 @@ unsigned long segment_base(u16 selector)
101 asm("sldt %0" : "=g"(ldt_selector)); 114 asm("sldt %0" : "=g"(ldt_selector));
102 table_base = segment_base(ldt_selector); 115 table_base = segment_base(ldt_selector);
103 } 116 }
104 d = (struct segment_descriptor *)(table_base + (selector & ~7)); 117 d = (struct desc_struct *)(table_base + (selector & ~7));
105 v = d->base_low | ((unsigned long)d->base_mid << 16) | 118 v = d->base0 | ((unsigned long)d->base1 << 16) |
106 ((unsigned long)d->base_high << 24); 119 ((unsigned long)d->base2 << 24);
107#ifdef CONFIG_X86_64 120#ifdef CONFIG_X86_64
108 if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) 121 if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
109 v |= ((unsigned long) \ 122 v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
110 ((struct segment_descriptor_64 *)d)->base_higher) << 32;
111#endif 123#endif
112 return v; 124 return v;
113} 125}
@@ -145,11 +157,16 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
145 u32 error_code) 157 u32 error_code)
146{ 158{
147 ++vcpu->stat.pf_guest; 159 ++vcpu->stat.pf_guest;
148 if (vcpu->arch.exception.pending && vcpu->arch.exception.nr == PF_VECTOR) { 160 if (vcpu->arch.exception.pending) {
149 printk(KERN_DEBUG "kvm: inject_page_fault:" 161 if (vcpu->arch.exception.nr == PF_VECTOR) {
150 " double fault 0x%lx\n", addr); 162 printk(KERN_DEBUG "kvm: inject_page_fault:"
151 vcpu->arch.exception.nr = DF_VECTOR; 163 " double fault 0x%lx\n", addr);
152 vcpu->arch.exception.error_code = 0; 164 vcpu->arch.exception.nr = DF_VECTOR;
165 vcpu->arch.exception.error_code = 0;
166 } else if (vcpu->arch.exception.nr == DF_VECTOR) {
167 /* triple fault -> shutdown */
168 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
169 }
153 return; 170 return;
154 } 171 }
155 vcpu->arch.cr2 = addr; 172 vcpu->arch.cr2 = addr;
@@ -184,7 +201,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
184 int ret; 201 int ret;
185 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; 202 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
186 203
187 down_read(&vcpu->kvm->slots_lock);
188 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, 204 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
189 offset * sizeof(u64), sizeof(pdpte)); 205 offset * sizeof(u64), sizeof(pdpte));
190 if (ret < 0) { 206 if (ret < 0) {
@@ -201,10 +217,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
201 217
202 memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); 218 memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
203out: 219out:
204 up_read(&vcpu->kvm->slots_lock);
205 220
206 return ret; 221 return ret;
207} 222}
223EXPORT_SYMBOL_GPL(load_pdptrs);
208 224
209static bool pdptrs_changed(struct kvm_vcpu *vcpu) 225static bool pdptrs_changed(struct kvm_vcpu *vcpu)
210{ 226{
@@ -215,18 +231,16 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
215 if (is_long_mode(vcpu) || !is_pae(vcpu)) 231 if (is_long_mode(vcpu) || !is_pae(vcpu))
216 return false; 232 return false;
217 233
218 down_read(&vcpu->kvm->slots_lock);
219 r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); 234 r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
220 if (r < 0) 235 if (r < 0)
221 goto out; 236 goto out;
222 changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; 237 changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
223out: 238out:
224 up_read(&vcpu->kvm->slots_lock);
225 239
226 return changed; 240 return changed;
227} 241}
228 242
229void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 243void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
230{ 244{
231 if (cr0 & CR0_RESERVED_BITS) { 245 if (cr0 & CR0_RESERVED_BITS) {
232 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", 246 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
@@ -284,15 +298,18 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
284 kvm_mmu_reset_context(vcpu); 298 kvm_mmu_reset_context(vcpu);
285 return; 299 return;
286} 300}
287EXPORT_SYMBOL_GPL(set_cr0); 301EXPORT_SYMBOL_GPL(kvm_set_cr0);
288 302
289void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 303void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
290{ 304{
291 set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); 305 kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
306 KVMTRACE_1D(LMSW, vcpu,
307 (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)),
308 handler);
292} 309}
293EXPORT_SYMBOL_GPL(lmsw); 310EXPORT_SYMBOL_GPL(kvm_lmsw);
294 311
295void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 312void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
296{ 313{
297 if (cr4 & CR4_RESERVED_BITS) { 314 if (cr4 & CR4_RESERVED_BITS) {
298 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); 315 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
@@ -323,9 +340,9 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
323 vcpu->arch.cr4 = cr4; 340 vcpu->arch.cr4 = cr4;
324 kvm_mmu_reset_context(vcpu); 341 kvm_mmu_reset_context(vcpu);
325} 342}
326EXPORT_SYMBOL_GPL(set_cr4); 343EXPORT_SYMBOL_GPL(kvm_set_cr4);
327 344
328void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 345void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
329{ 346{
330 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { 347 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
331 kvm_mmu_flush_tlb(vcpu); 348 kvm_mmu_flush_tlb(vcpu);
@@ -359,7 +376,6 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
359 */ 376 */
360 } 377 }
361 378
362 down_read(&vcpu->kvm->slots_lock);
363 /* 379 /*
364 * Does the new cr3 value map to physical memory? (Note, we 380 * Does the new cr3 value map to physical memory? (Note, we
365 * catch an invalid cr3 even in real-mode, because it would 381 * catch an invalid cr3 even in real-mode, because it would
@@ -375,11 +391,10 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
375 vcpu->arch.cr3 = cr3; 391 vcpu->arch.cr3 = cr3;
376 vcpu->arch.mmu.new_cr3(vcpu); 392 vcpu->arch.mmu.new_cr3(vcpu);
377 } 393 }
378 up_read(&vcpu->kvm->slots_lock);
379} 394}
380EXPORT_SYMBOL_GPL(set_cr3); 395EXPORT_SYMBOL_GPL(kvm_set_cr3);
381 396
382void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 397void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
383{ 398{
384 if (cr8 & CR8_RESERVED_BITS) { 399 if (cr8 & CR8_RESERVED_BITS) {
385 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); 400 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
@@ -391,16 +406,16 @@ void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
391 else 406 else
392 vcpu->arch.cr8 = cr8; 407 vcpu->arch.cr8 = cr8;
393} 408}
394EXPORT_SYMBOL_GPL(set_cr8); 409EXPORT_SYMBOL_GPL(kvm_set_cr8);
395 410
396unsigned long get_cr8(struct kvm_vcpu *vcpu) 411unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
397{ 412{
398 if (irqchip_in_kernel(vcpu->kvm)) 413 if (irqchip_in_kernel(vcpu->kvm))
399 return kvm_lapic_get_cr8(vcpu); 414 return kvm_lapic_get_cr8(vcpu);
400 else 415 else
401 return vcpu->arch.cr8; 416 return vcpu->arch.cr8;
402} 417}
403EXPORT_SYMBOL_GPL(get_cr8); 418EXPORT_SYMBOL_GPL(kvm_get_cr8);
404 419
405/* 420/*
406 * List of msr numbers which we expose to userspace through KVM_GET_MSRS 421 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
@@ -415,7 +430,8 @@ static u32 msrs_to_save[] = {
415#ifdef CONFIG_X86_64 430#ifdef CONFIG_X86_64
416 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 431 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
417#endif 432#endif
418 MSR_IA32_TIME_STAMP_COUNTER, 433 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
434 MSR_IA32_PERF_STATUS,
419}; 435};
420 436
421static unsigned num_msrs_to_save; 437static unsigned num_msrs_to_save;
@@ -424,11 +440,9 @@ static u32 emulated_msrs[] = {
424 MSR_IA32_MISC_ENABLE, 440 MSR_IA32_MISC_ENABLE,
425}; 441};
426 442
427#ifdef CONFIG_X86_64
428
429static void set_efer(struct kvm_vcpu *vcpu, u64 efer) 443static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
430{ 444{
431 if (efer & EFER_RESERVED_BITS) { 445 if (efer & efer_reserved_bits) {
432 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", 446 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
433 efer); 447 efer);
434 kvm_inject_gp(vcpu, 0); 448 kvm_inject_gp(vcpu, 0);
@@ -450,7 +464,12 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
450 vcpu->arch.shadow_efer = efer; 464 vcpu->arch.shadow_efer = efer;
451} 465}
452 466
453#endif 467void kvm_enable_efer_bits(u64 mask)
468{
469 efer_reserved_bits &= ~mask;
470}
471EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
472
454 473
455/* 474/*
456 * Writes msr value into into the appropriate "register". 475 * Writes msr value into into the appropriate "register".
@@ -470,26 +489,86 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
470 return kvm_set_msr(vcpu, index, *data); 489 return kvm_set_msr(vcpu, index, *data);
471} 490}
472 491
492static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
493{
494 static int version;
495 struct kvm_wall_clock wc;
496 struct timespec wc_ts;
497
498 if (!wall_clock)
499 return;
500
501 version++;
502
503 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
504
505 wc_ts = current_kernel_time();
506 wc.wc_sec = wc_ts.tv_sec;
507 wc.wc_nsec = wc_ts.tv_nsec;
508 wc.wc_version = version;
509
510 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
511
512 version++;
513 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
514}
515
516static void kvm_write_guest_time(struct kvm_vcpu *v)
517{
518 struct timespec ts;
519 unsigned long flags;
520 struct kvm_vcpu_arch *vcpu = &v->arch;
521 void *shared_kaddr;
522
523 if ((!vcpu->time_page))
524 return;
525
526 /* Keep irq disabled to prevent changes to the clock */
527 local_irq_save(flags);
528 kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
529 &vcpu->hv_clock.tsc_timestamp);
530 ktime_get_ts(&ts);
531 local_irq_restore(flags);
532
533 /* With all the info we got, fill in the values */
534
535 vcpu->hv_clock.system_time = ts.tv_nsec +
536 (NSEC_PER_SEC * (u64)ts.tv_sec);
537 /*
538 * The interface expects us to write an even number signaling that the
539 * update is finished. Since the guest won't see the intermediate
540 * state, we just write "2" at the end
541 */
542 vcpu->hv_clock.version = 2;
543
544 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
545
546 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
547 sizeof(vcpu->hv_clock));
548
549 kunmap_atomic(shared_kaddr, KM_USER0);
550
551 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
552}
553
473 554
474int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 555int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
475{ 556{
476 switch (msr) { 557 switch (msr) {
477#ifdef CONFIG_X86_64
478 case MSR_EFER: 558 case MSR_EFER:
479 set_efer(vcpu, data); 559 set_efer(vcpu, data);
480 break; 560 break;
481#endif
482 case MSR_IA32_MC0_STATUS: 561 case MSR_IA32_MC0_STATUS:
483 pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", 562 pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
484 __FUNCTION__, data); 563 __func__, data);
485 break; 564 break;
486 case MSR_IA32_MCG_STATUS: 565 case MSR_IA32_MCG_STATUS:
487 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", 566 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
488 __FUNCTION__, data); 567 __func__, data);
489 break; 568 break;
490 case MSR_IA32_MCG_CTL: 569 case MSR_IA32_MCG_CTL:
491 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", 570 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
492 __FUNCTION__, data); 571 __func__, data);
493 break; 572 break;
494 case MSR_IA32_UCODE_REV: 573 case MSR_IA32_UCODE_REV:
495 case MSR_IA32_UCODE_WRITE: 574 case MSR_IA32_UCODE_WRITE:
@@ -501,6 +580,42 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
501 case MSR_IA32_MISC_ENABLE: 580 case MSR_IA32_MISC_ENABLE:
502 vcpu->arch.ia32_misc_enable_msr = data; 581 vcpu->arch.ia32_misc_enable_msr = data;
503 break; 582 break;
583 case MSR_KVM_WALL_CLOCK:
584 vcpu->kvm->arch.wall_clock = data;
585 kvm_write_wall_clock(vcpu->kvm, data);
586 break;
587 case MSR_KVM_SYSTEM_TIME: {
588 if (vcpu->arch.time_page) {
589 kvm_release_page_dirty(vcpu->arch.time_page);
590 vcpu->arch.time_page = NULL;
591 }
592
593 vcpu->arch.time = data;
594
595 /* we verify if the enable bit is set... */
596 if (!(data & 1))
597 break;
598
599 /* ...but clean it before doing the actual write */
600 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
601
602 vcpu->arch.hv_clock.tsc_to_system_mul =
603 clocksource_khz2mult(tsc_khz, 22);
604 vcpu->arch.hv_clock.tsc_shift = 22;
605
606 down_read(&current->mm->mmap_sem);
607 vcpu->arch.time_page =
608 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
609 up_read(&current->mm->mmap_sem);
610
611 if (is_error_page(vcpu->arch.time_page)) {
612 kvm_release_page_clean(vcpu->arch.time_page);
613 vcpu->arch.time_page = NULL;
614 }
615
616 kvm_write_guest_time(vcpu);
617 break;
618 }
504 default: 619 default:
505 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); 620 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
506 return 1; 621 return 1;
@@ -540,7 +655,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
540 case MSR_IA32_MC0_MISC+12: 655 case MSR_IA32_MC0_MISC+12:
541 case MSR_IA32_MC0_MISC+16: 656 case MSR_IA32_MC0_MISC+16:
542 case MSR_IA32_UCODE_REV: 657 case MSR_IA32_UCODE_REV:
543 case MSR_IA32_PERF_STATUS:
544 case MSR_IA32_EBL_CR_POWERON: 658 case MSR_IA32_EBL_CR_POWERON:
545 /* MTRR registers */ 659 /* MTRR registers */
546 case 0xfe: 660 case 0xfe:
@@ -556,11 +670,21 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
556 case MSR_IA32_MISC_ENABLE: 670 case MSR_IA32_MISC_ENABLE:
557 data = vcpu->arch.ia32_misc_enable_msr; 671 data = vcpu->arch.ia32_misc_enable_msr;
558 break; 672 break;
559#ifdef CONFIG_X86_64 673 case MSR_IA32_PERF_STATUS:
674 /* TSC increment by tick */
675 data = 1000ULL;
676 /* CPU multiplier */
677 data |= (((uint64_t)4ULL) << 40);
678 break;
560 case MSR_EFER: 679 case MSR_EFER:
561 data = vcpu->arch.shadow_efer; 680 data = vcpu->arch.shadow_efer;
562 break; 681 break;
563#endif 682 case MSR_KVM_WALL_CLOCK:
683 data = vcpu->kvm->arch.wall_clock;
684 break;
685 case MSR_KVM_SYSTEM_TIME:
686 data = vcpu->arch.time;
687 break;
564 default: 688 default:
565 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); 689 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
566 return 1; 690 return 1;
@@ -584,9 +708,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
584 708
585 vcpu_load(vcpu); 709 vcpu_load(vcpu);
586 710
711 down_read(&vcpu->kvm->slots_lock);
587 for (i = 0; i < msrs->nmsrs; ++i) 712 for (i = 0; i < msrs->nmsrs; ++i)
588 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 713 if (do_msr(vcpu, entries[i].index, &entries[i].data))
589 break; 714 break;
715 up_read(&vcpu->kvm->slots_lock);
590 716
591 vcpu_put(vcpu); 717 vcpu_put(vcpu);
592 718
@@ -688,11 +814,24 @@ int kvm_dev_ioctl_check_extension(long ext)
688 case KVM_CAP_USER_MEMORY: 814 case KVM_CAP_USER_MEMORY:
689 case KVM_CAP_SET_TSS_ADDR: 815 case KVM_CAP_SET_TSS_ADDR:
690 case KVM_CAP_EXT_CPUID: 816 case KVM_CAP_EXT_CPUID:
817 case KVM_CAP_CLOCKSOURCE:
818 case KVM_CAP_PIT:
819 case KVM_CAP_NOP_IO_DELAY:
820 case KVM_CAP_MP_STATE:
691 r = 1; 821 r = 1;
692 break; 822 break;
693 case KVM_CAP_VAPIC: 823 case KVM_CAP_VAPIC:
694 r = !kvm_x86_ops->cpu_has_accelerated_tpr(); 824 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
695 break; 825 break;
826 case KVM_CAP_NR_VCPUS:
827 r = KVM_MAX_VCPUS;
828 break;
829 case KVM_CAP_NR_MEMSLOTS:
830 r = KVM_MEMORY_SLOTS;
831 break;
832 case KVM_CAP_PV_MMU:
833 r = !tdp_enabled;
834 break;
696 default: 835 default:
697 r = 0; 836 r = 0;
698 break; 837 break;
@@ -763,6 +902,7 @@ out:
763void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 902void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
764{ 903{
765 kvm_x86_ops->vcpu_load(vcpu, cpu); 904 kvm_x86_ops->vcpu_load(vcpu, cpu);
905 kvm_write_guest_time(vcpu);
766} 906}
767 907
768void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 908void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -958,32 +1098,32 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
958 } 1098 }
959 /* function 4 and 0xb have additional index. */ 1099 /* function 4 and 0xb have additional index. */
960 case 4: { 1100 case 4: {
961 int index, cache_type; 1101 int i, cache_type;
962 1102
963 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1103 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
964 /* read more entries until cache_type is zero */ 1104 /* read more entries until cache_type is zero */
965 for (index = 1; *nent < maxnent; ++index) { 1105 for (i = 1; *nent < maxnent; ++i) {
966 cache_type = entry[index - 1].eax & 0x1f; 1106 cache_type = entry[i - 1].eax & 0x1f;
967 if (!cache_type) 1107 if (!cache_type)
968 break; 1108 break;
969 do_cpuid_1_ent(&entry[index], function, index); 1109 do_cpuid_1_ent(&entry[i], function, i);
970 entry[index].flags |= 1110 entry[i].flags |=
971 KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1111 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
972 ++*nent; 1112 ++*nent;
973 } 1113 }
974 break; 1114 break;
975 } 1115 }
976 case 0xb: { 1116 case 0xb: {
977 int index, level_type; 1117 int i, level_type;
978 1118
979 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1119 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
980 /* read more entries until level_type is zero */ 1120 /* read more entries until level_type is zero */
981 for (index = 1; *nent < maxnent; ++index) { 1121 for (i = 1; *nent < maxnent; ++i) {
982 level_type = entry[index - 1].ecx & 0xff; 1122 level_type = entry[i - 1].ecx & 0xff;
983 if (!level_type) 1123 if (!level_type)
984 break; 1124 break;
985 do_cpuid_1_ent(&entry[index], function, index); 1125 do_cpuid_1_ent(&entry[i], function, i);
986 entry[index].flags |= 1126 entry[i].flags |=
987 KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1127 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
988 ++*nent; 1128 ++*nent;
989 } 1129 }
@@ -1365,6 +1505,23 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
1365 return r; 1505 return r;
1366} 1506}
1367 1507
1508static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
1509{
1510 int r = 0;
1511
1512 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
1513 return r;
1514}
1515
1516static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
1517{
1518 int r = 0;
1519
1520 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
1521 kvm_pit_load_count(kvm, 0, ps->channels[0].count);
1522 return r;
1523}
1524
1368/* 1525/*
1369 * Get (and clear) the dirty memory log for a memory slot. 1526 * Get (and clear) the dirty memory log for a memory slot.
1370 */ 1527 */
@@ -1457,6 +1614,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
1457 } else 1614 } else
1458 goto out; 1615 goto out;
1459 break; 1616 break;
1617 case KVM_CREATE_PIT:
1618 r = -ENOMEM;
1619 kvm->arch.vpit = kvm_create_pit(kvm);
1620 if (kvm->arch.vpit)
1621 r = 0;
1622 break;
1460 case KVM_IRQ_LINE: { 1623 case KVM_IRQ_LINE: {
1461 struct kvm_irq_level irq_event; 1624 struct kvm_irq_level irq_event;
1462 1625
@@ -1512,6 +1675,37 @@ long kvm_arch_vm_ioctl(struct file *filp,
1512 r = 0; 1675 r = 0;
1513 break; 1676 break;
1514 } 1677 }
1678 case KVM_GET_PIT: {
1679 struct kvm_pit_state ps;
1680 r = -EFAULT;
1681 if (copy_from_user(&ps, argp, sizeof ps))
1682 goto out;
1683 r = -ENXIO;
1684 if (!kvm->arch.vpit)
1685 goto out;
1686 r = kvm_vm_ioctl_get_pit(kvm, &ps);
1687 if (r)
1688 goto out;
1689 r = -EFAULT;
1690 if (copy_to_user(argp, &ps, sizeof ps))
1691 goto out;
1692 r = 0;
1693 break;
1694 }
1695 case KVM_SET_PIT: {
1696 struct kvm_pit_state ps;
1697 r = -EFAULT;
1698 if (copy_from_user(&ps, argp, sizeof ps))
1699 goto out;
1700 r = -ENXIO;
1701 if (!kvm->arch.vpit)
1702 goto out;
1703 r = kvm_vm_ioctl_set_pit(kvm, &ps);
1704 if (r)
1705 goto out;
1706 r = 0;
1707 break;
1708 }
1515 default: 1709 default:
1516 ; 1710 ;
1517 } 1711 }
@@ -1570,7 +1764,6 @@ int emulator_read_std(unsigned long addr,
1570 void *data = val; 1764 void *data = val;
1571 int r = X86EMUL_CONTINUE; 1765 int r = X86EMUL_CONTINUE;
1572 1766
1573 down_read(&vcpu->kvm->slots_lock);
1574 while (bytes) { 1767 while (bytes) {
1575 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1768 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1576 unsigned offset = addr & (PAGE_SIZE-1); 1769 unsigned offset = addr & (PAGE_SIZE-1);
@@ -1592,7 +1785,6 @@ int emulator_read_std(unsigned long addr,
1592 addr += tocopy; 1785 addr += tocopy;
1593 } 1786 }
1594out: 1787out:
1595 up_read(&vcpu->kvm->slots_lock);
1596 return r; 1788 return r;
1597} 1789}
1598EXPORT_SYMBOL_GPL(emulator_read_std); 1790EXPORT_SYMBOL_GPL(emulator_read_std);
@@ -1611,9 +1803,7 @@ static int emulator_read_emulated(unsigned long addr,
1611 return X86EMUL_CONTINUE; 1803 return X86EMUL_CONTINUE;
1612 } 1804 }
1613 1805
1614 down_read(&vcpu->kvm->slots_lock);
1615 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1806 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1616 up_read(&vcpu->kvm->slots_lock);
1617 1807
1618 /* For APIC access vmexit */ 1808 /* For APIC access vmexit */
1619 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 1809 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -1646,19 +1836,15 @@ mmio:
1646 return X86EMUL_UNHANDLEABLE; 1836 return X86EMUL_UNHANDLEABLE;
1647} 1837}
1648 1838
1649static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, 1839int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1650 const void *val, int bytes) 1840 const void *val, int bytes)
1651{ 1841{
1652 int ret; 1842 int ret;
1653 1843
1654 down_read(&vcpu->kvm->slots_lock);
1655 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); 1844 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
1656 if (ret < 0) { 1845 if (ret < 0)
1657 up_read(&vcpu->kvm->slots_lock);
1658 return 0; 1846 return 0;
1659 }
1660 kvm_mmu_pte_write(vcpu, gpa, val, bytes); 1847 kvm_mmu_pte_write(vcpu, gpa, val, bytes);
1661 up_read(&vcpu->kvm->slots_lock);
1662 return 1; 1848 return 1;
1663} 1849}
1664 1850
@@ -1670,9 +1856,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
1670 struct kvm_io_device *mmio_dev; 1856 struct kvm_io_device *mmio_dev;
1671 gpa_t gpa; 1857 gpa_t gpa;
1672 1858
1673 down_read(&vcpu->kvm->slots_lock);
1674 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1859 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1675 up_read(&vcpu->kvm->slots_lock);
1676 1860
1677 if (gpa == UNMAPPED_GVA) { 1861 if (gpa == UNMAPPED_GVA) {
1678 kvm_inject_page_fault(vcpu, addr, 2); 1862 kvm_inject_page_fault(vcpu, addr, 2);
@@ -1749,7 +1933,6 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
1749 char *kaddr; 1933 char *kaddr;
1750 u64 val; 1934 u64 val;
1751 1935
1752 down_read(&vcpu->kvm->slots_lock);
1753 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1936 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1754 1937
1755 if (gpa == UNMAPPED_GVA || 1938 if (gpa == UNMAPPED_GVA ||
@@ -1769,9 +1952,8 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
1769 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); 1952 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
1770 kunmap_atomic(kaddr, KM_USER0); 1953 kunmap_atomic(kaddr, KM_USER0);
1771 kvm_release_page_dirty(page); 1954 kvm_release_page_dirty(page);
1772 emul_write:
1773 up_read(&vcpu->kvm->slots_lock);
1774 } 1955 }
1956emul_write:
1775#endif 1957#endif
1776 1958
1777 return emulator_write_emulated(addr, new, bytes, vcpu); 1959 return emulator_write_emulated(addr, new, bytes, vcpu);
@@ -1802,7 +1984,7 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
1802 *dest = kvm_x86_ops->get_dr(vcpu, dr); 1984 *dest = kvm_x86_ops->get_dr(vcpu, dr);
1803 return X86EMUL_CONTINUE; 1985 return X86EMUL_CONTINUE;
1804 default: 1986 default:
1805 pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr); 1987 pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
1806 return X86EMUL_UNHANDLEABLE; 1988 return X86EMUL_UNHANDLEABLE;
1807 } 1989 }
1808} 1990}
@@ -1840,7 +2022,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
1840} 2022}
1841EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 2023EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
1842 2024
1843struct x86_emulate_ops emulate_ops = { 2025static struct x86_emulate_ops emulate_ops = {
1844 .read_std = emulator_read_std, 2026 .read_std = emulator_read_std,
1845 .read_emulated = emulator_read_emulated, 2027 .read_emulated = emulator_read_emulated,
1846 .write_emulated = emulator_write_emulated, 2028 .write_emulated = emulator_write_emulated,
@@ -2091,6 +2273,13 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2091 vcpu->arch.pio.guest_page_offset = 0; 2273 vcpu->arch.pio.guest_page_offset = 0;
2092 vcpu->arch.pio.rep = 0; 2274 vcpu->arch.pio.rep = 0;
2093 2275
2276 if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
2277 KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
2278 handler);
2279 else
2280 KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
2281 handler);
2282
2094 kvm_x86_ops->cache_regs(vcpu); 2283 kvm_x86_ops->cache_regs(vcpu);
2095 memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); 2284 memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4);
2096 kvm_x86_ops->decache_regs(vcpu); 2285 kvm_x86_ops->decache_regs(vcpu);
@@ -2129,6 +2318,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2129 vcpu->arch.pio.guest_page_offset = offset_in_page(address); 2318 vcpu->arch.pio.guest_page_offset = offset_in_page(address);
2130 vcpu->arch.pio.rep = rep; 2319 vcpu->arch.pio.rep = rep;
2131 2320
2321 if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
2322 KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
2323 handler);
2324 else
2325 KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
2326 handler);
2327
2132 if (!count) { 2328 if (!count) {
2133 kvm_x86_ops->skip_emulated_instruction(vcpu); 2329 kvm_x86_ops->skip_emulated_instruction(vcpu);
2134 return 1; 2330 return 1;
@@ -2163,10 +2359,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2163 kvm_x86_ops->skip_emulated_instruction(vcpu); 2359 kvm_x86_ops->skip_emulated_instruction(vcpu);
2164 2360
2165 for (i = 0; i < nr_pages; ++i) { 2361 for (i = 0; i < nr_pages; ++i) {
2166 down_read(&vcpu->kvm->slots_lock);
2167 page = gva_to_page(vcpu, address + i * PAGE_SIZE); 2362 page = gva_to_page(vcpu, address + i * PAGE_SIZE);
2168 vcpu->arch.pio.guest_pages[i] = page; 2363 vcpu->arch.pio.guest_pages[i] = page;
2169 up_read(&vcpu->kvm->slots_lock);
2170 if (!page) { 2364 if (!page) {
2171 kvm_inject_gp(vcpu, 0); 2365 kvm_inject_gp(vcpu, 0);
2172 free_pio_guest_pages(vcpu); 2366 free_pio_guest_pages(vcpu);
@@ -2238,10 +2432,13 @@ void kvm_arch_exit(void)
2238int kvm_emulate_halt(struct kvm_vcpu *vcpu) 2432int kvm_emulate_halt(struct kvm_vcpu *vcpu)
2239{ 2433{
2240 ++vcpu->stat.halt_exits; 2434 ++vcpu->stat.halt_exits;
2435 KVMTRACE_0D(HLT, vcpu, handler);
2241 if (irqchip_in_kernel(vcpu->kvm)) { 2436 if (irqchip_in_kernel(vcpu->kvm)) {
2242 vcpu->arch.mp_state = VCPU_MP_STATE_HALTED; 2437 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
2438 up_read(&vcpu->kvm->slots_lock);
2243 kvm_vcpu_block(vcpu); 2439 kvm_vcpu_block(vcpu);
2244 if (vcpu->arch.mp_state != VCPU_MP_STATE_RUNNABLE) 2440 down_read(&vcpu->kvm->slots_lock);
2441 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
2245 return -EINTR; 2442 return -EINTR;
2246 return 1; 2443 return 1;
2247 } else { 2444 } else {
@@ -2251,9 +2448,19 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
2251} 2448}
2252EXPORT_SYMBOL_GPL(kvm_emulate_halt); 2449EXPORT_SYMBOL_GPL(kvm_emulate_halt);
2253 2450
2451static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
2452 unsigned long a1)
2453{
2454 if (is_long_mode(vcpu))
2455 return a0;
2456 else
2457 return a0 | ((gpa_t)a1 << 32);
2458}
2459
2254int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) 2460int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2255{ 2461{
2256 unsigned long nr, a0, a1, a2, a3, ret; 2462 unsigned long nr, a0, a1, a2, a3, ret;
2463 int r = 1;
2257 2464
2258 kvm_x86_ops->cache_regs(vcpu); 2465 kvm_x86_ops->cache_regs(vcpu);
2259 2466
@@ -2263,6 +2470,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2263 a2 = vcpu->arch.regs[VCPU_REGS_RDX]; 2470 a2 = vcpu->arch.regs[VCPU_REGS_RDX];
2264 a3 = vcpu->arch.regs[VCPU_REGS_RSI]; 2471 a3 = vcpu->arch.regs[VCPU_REGS_RSI];
2265 2472
2473 KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
2474
2266 if (!is_long_mode(vcpu)) { 2475 if (!is_long_mode(vcpu)) {
2267 nr &= 0xFFFFFFFF; 2476 nr &= 0xFFFFFFFF;
2268 a0 &= 0xFFFFFFFF; 2477 a0 &= 0xFFFFFFFF;
@@ -2275,13 +2484,17 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2275 case KVM_HC_VAPIC_POLL_IRQ: 2484 case KVM_HC_VAPIC_POLL_IRQ:
2276 ret = 0; 2485 ret = 0;
2277 break; 2486 break;
2487 case KVM_HC_MMU_OP:
2488 r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
2489 break;
2278 default: 2490 default:
2279 ret = -KVM_ENOSYS; 2491 ret = -KVM_ENOSYS;
2280 break; 2492 break;
2281 } 2493 }
2282 vcpu->arch.regs[VCPU_REGS_RAX] = ret; 2494 vcpu->arch.regs[VCPU_REGS_RAX] = ret;
2283 kvm_x86_ops->decache_regs(vcpu); 2495 kvm_x86_ops->decache_regs(vcpu);
2284 return 0; 2496 ++vcpu->stat.hypercalls;
2497 return r;
2285} 2498}
2286EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); 2499EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
2287 2500
@@ -2329,7 +2542,7 @@ void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
2329void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, 2542void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
2330 unsigned long *rflags) 2543 unsigned long *rflags)
2331{ 2544{
2332 lmsw(vcpu, msw); 2545 kvm_lmsw(vcpu, msw);
2333 *rflags = kvm_x86_ops->get_rflags(vcpu); 2546 *rflags = kvm_x86_ops->get_rflags(vcpu);
2334} 2547}
2335 2548
@@ -2346,9 +2559,9 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
2346 case 4: 2559 case 4:
2347 return vcpu->arch.cr4; 2560 return vcpu->arch.cr4;
2348 case 8: 2561 case 8:
2349 return get_cr8(vcpu); 2562 return kvm_get_cr8(vcpu);
2350 default: 2563 default:
2351 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr); 2564 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
2352 return 0; 2565 return 0;
2353 } 2566 }
2354} 2567}
@@ -2358,23 +2571,23 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
2358{ 2571{
2359 switch (cr) { 2572 switch (cr) {
2360 case 0: 2573 case 0:
2361 set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); 2574 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
2362 *rflags = kvm_x86_ops->get_rflags(vcpu); 2575 *rflags = kvm_x86_ops->get_rflags(vcpu);
2363 break; 2576 break;
2364 case 2: 2577 case 2:
2365 vcpu->arch.cr2 = val; 2578 vcpu->arch.cr2 = val;
2366 break; 2579 break;
2367 case 3: 2580 case 3:
2368 set_cr3(vcpu, val); 2581 kvm_set_cr3(vcpu, val);
2369 break; 2582 break;
2370 case 4: 2583 case 4:
2371 set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); 2584 kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
2372 break; 2585 break;
2373 case 8: 2586 case 8:
2374 set_cr8(vcpu, val & 0xfUL); 2587 kvm_set_cr8(vcpu, val & 0xfUL);
2375 break; 2588 break;
2376 default: 2589 default:
2377 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr); 2590 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
2378 } 2591 }
2379} 2592}
2380 2593
@@ -2447,6 +2660,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
2447 } 2660 }
2448 kvm_x86_ops->decache_regs(vcpu); 2661 kvm_x86_ops->decache_regs(vcpu);
2449 kvm_x86_ops->skip_emulated_instruction(vcpu); 2662 kvm_x86_ops->skip_emulated_instruction(vcpu);
2663 KVMTRACE_5D(CPUID, vcpu, function,
2664 (u32)vcpu->arch.regs[VCPU_REGS_RAX],
2665 (u32)vcpu->arch.regs[VCPU_REGS_RBX],
2666 (u32)vcpu->arch.regs[VCPU_REGS_RCX],
2667 (u32)vcpu->arch.regs[VCPU_REGS_RDX], handler);
2450} 2668}
2451EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); 2669EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
2452 2670
@@ -2469,7 +2687,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
2469 struct kvm_run *kvm_run) 2687 struct kvm_run *kvm_run)
2470{ 2688{
2471 kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; 2689 kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
2472 kvm_run->cr8 = get_cr8(vcpu); 2690 kvm_run->cr8 = kvm_get_cr8(vcpu);
2473 kvm_run->apic_base = kvm_get_apic_base(vcpu); 2691 kvm_run->apic_base = kvm_get_apic_base(vcpu);
2474 if (irqchip_in_kernel(vcpu->kvm)) 2692 if (irqchip_in_kernel(vcpu->kvm))
2475 kvm_run->ready_for_interrupt_injection = 1; 2693 kvm_run->ready_for_interrupt_injection = 1;
@@ -2509,16 +2727,17 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2509{ 2727{
2510 int r; 2728 int r;
2511 2729
2512 if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) { 2730 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
2513 pr_debug("vcpu %d received sipi with vector # %x\n", 2731 pr_debug("vcpu %d received sipi with vector # %x\n",
2514 vcpu->vcpu_id, vcpu->arch.sipi_vector); 2732 vcpu->vcpu_id, vcpu->arch.sipi_vector);
2515 kvm_lapic_reset(vcpu); 2733 kvm_lapic_reset(vcpu);
2516 r = kvm_x86_ops->vcpu_reset(vcpu); 2734 r = kvm_x86_ops->vcpu_reset(vcpu);
2517 if (r) 2735 if (r)
2518 return r; 2736 return r;
2519 vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; 2737 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2520 } 2738 }
2521 2739
2740 down_read(&vcpu->kvm->slots_lock);
2522 vapic_enter(vcpu); 2741 vapic_enter(vcpu);
2523 2742
2524preempted: 2743preempted:
@@ -2526,6 +2745,10 @@ preempted:
2526 kvm_x86_ops->guest_debug_pre(vcpu); 2745 kvm_x86_ops->guest_debug_pre(vcpu);
2527 2746
2528again: 2747again:
2748 if (vcpu->requests)
2749 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
2750 kvm_mmu_unload(vcpu);
2751
2529 r = kvm_mmu_reload(vcpu); 2752 r = kvm_mmu_reload(vcpu);
2530 if (unlikely(r)) 2753 if (unlikely(r))
2531 goto out; 2754 goto out;
@@ -2539,6 +2762,11 @@ again:
2539 r = 0; 2762 r = 0;
2540 goto out; 2763 goto out;
2541 } 2764 }
2765 if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
2766 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
2767 r = 0;
2768 goto out;
2769 }
2542 } 2770 }
2543 2771
2544 kvm_inject_pending_timer_irqs(vcpu); 2772 kvm_inject_pending_timer_irqs(vcpu);
@@ -2557,6 +2785,14 @@ again:
2557 goto out; 2785 goto out;
2558 } 2786 }
2559 2787
2788 if (vcpu->requests)
2789 if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) {
2790 local_irq_enable();
2791 preempt_enable();
2792 r = 1;
2793 goto out;
2794 }
2795
2560 if (signal_pending(current)) { 2796 if (signal_pending(current)) {
2561 local_irq_enable(); 2797 local_irq_enable();
2562 preempt_enable(); 2798 preempt_enable();
@@ -2566,6 +2802,13 @@ again:
2566 goto out; 2802 goto out;
2567 } 2803 }
2568 2804
2805 vcpu->guest_mode = 1;
2806 /*
2807 * Make sure that guest_mode assignment won't happen after
2808 * testing the pending IRQ vector bitmap.
2809 */
2810 smp_wmb();
2811
2569 if (vcpu->arch.exception.pending) 2812 if (vcpu->arch.exception.pending)
2570 __queue_exception(vcpu); 2813 __queue_exception(vcpu);
2571 else if (irqchip_in_kernel(vcpu->kvm)) 2814 else if (irqchip_in_kernel(vcpu->kvm))
@@ -2575,13 +2818,15 @@ again:
2575 2818
2576 kvm_lapic_sync_to_vapic(vcpu); 2819 kvm_lapic_sync_to_vapic(vcpu);
2577 2820
2578 vcpu->guest_mode = 1; 2821 up_read(&vcpu->kvm->slots_lock);
2822
2579 kvm_guest_enter(); 2823 kvm_guest_enter();
2580 2824
2581 if (vcpu->requests) 2825 if (vcpu->requests)
2582 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 2826 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
2583 kvm_x86_ops->tlb_flush(vcpu); 2827 kvm_x86_ops->tlb_flush(vcpu);
2584 2828
2829 KVMTRACE_0D(VMENTRY, vcpu, entryexit);
2585 kvm_x86_ops->run(vcpu, kvm_run); 2830 kvm_x86_ops->run(vcpu, kvm_run);
2586 2831
2587 vcpu->guest_mode = 0; 2832 vcpu->guest_mode = 0;
@@ -2601,6 +2846,8 @@ again:
2601 2846
2602 preempt_enable(); 2847 preempt_enable();
2603 2848
2849 down_read(&vcpu->kvm->slots_lock);
2850
2604 /* 2851 /*
2605 * Profile KVM exit RIPs: 2852 * Profile KVM exit RIPs:
2606 */ 2853 */
@@ -2628,14 +2875,18 @@ again:
2628 } 2875 }
2629 2876
2630out: 2877out:
2878 up_read(&vcpu->kvm->slots_lock);
2631 if (r > 0) { 2879 if (r > 0) {
2632 kvm_resched(vcpu); 2880 kvm_resched(vcpu);
2881 down_read(&vcpu->kvm->slots_lock);
2633 goto preempted; 2882 goto preempted;
2634 } 2883 }
2635 2884
2636 post_kvm_run_save(vcpu, kvm_run); 2885 post_kvm_run_save(vcpu, kvm_run);
2637 2886
2887 down_read(&vcpu->kvm->slots_lock);
2638 vapic_exit(vcpu); 2888 vapic_exit(vcpu);
2889 up_read(&vcpu->kvm->slots_lock);
2639 2890
2640 return r; 2891 return r;
2641} 2892}
@@ -2647,7 +2898,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2647 2898
2648 vcpu_load(vcpu); 2899 vcpu_load(vcpu);
2649 2900
2650 if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_UNINITIALIZED)) { 2901 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
2651 kvm_vcpu_block(vcpu); 2902 kvm_vcpu_block(vcpu);
2652 vcpu_put(vcpu); 2903 vcpu_put(vcpu);
2653 return -EAGAIN; 2904 return -EAGAIN;
@@ -2658,7 +2909,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2658 2909
2659 /* re-sync apic's tpr */ 2910 /* re-sync apic's tpr */
2660 if (!irqchip_in_kernel(vcpu->kvm)) 2911 if (!irqchip_in_kernel(vcpu->kvm))
2661 set_cr8(vcpu, kvm_run->cr8); 2912 kvm_set_cr8(vcpu, kvm_run->cr8);
2662 2913
2663 if (vcpu->arch.pio.cur_count) { 2914 if (vcpu->arch.pio.cur_count) {
2664 r = complete_pio(vcpu); 2915 r = complete_pio(vcpu);
@@ -2670,9 +2921,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2670 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 2921 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
2671 vcpu->mmio_read_completed = 1; 2922 vcpu->mmio_read_completed = 1;
2672 vcpu->mmio_needed = 0; 2923 vcpu->mmio_needed = 0;
2924
2925 down_read(&vcpu->kvm->slots_lock);
2673 r = emulate_instruction(vcpu, kvm_run, 2926 r = emulate_instruction(vcpu, kvm_run,
2674 vcpu->arch.mmio_fault_cr2, 0, 2927 vcpu->arch.mmio_fault_cr2, 0,
2675 EMULTYPE_NO_DECODE); 2928 EMULTYPE_NO_DECODE);
2929 up_read(&vcpu->kvm->slots_lock);
2676 if (r == EMULATE_DO_MMIO) { 2930 if (r == EMULATE_DO_MMIO) {
2677 /* 2931 /*
2678 * Read-modify-write. Back to userspace. 2932 * Read-modify-write. Back to userspace.
@@ -2773,7 +3027,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2773static void get_segment(struct kvm_vcpu *vcpu, 3027static void get_segment(struct kvm_vcpu *vcpu,
2774 struct kvm_segment *var, int seg) 3028 struct kvm_segment *var, int seg)
2775{ 3029{
2776 return kvm_x86_ops->get_segment(vcpu, var, seg); 3030 kvm_x86_ops->get_segment(vcpu, var, seg);
2777} 3031}
2778 3032
2779void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 3033void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -2816,7 +3070,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2816 sregs->cr2 = vcpu->arch.cr2; 3070 sregs->cr2 = vcpu->arch.cr2;
2817 sregs->cr3 = vcpu->arch.cr3; 3071 sregs->cr3 = vcpu->arch.cr3;
2818 sregs->cr4 = vcpu->arch.cr4; 3072 sregs->cr4 = vcpu->arch.cr4;
2819 sregs->cr8 = get_cr8(vcpu); 3073 sregs->cr8 = kvm_get_cr8(vcpu);
2820 sregs->efer = vcpu->arch.shadow_efer; 3074 sregs->efer = vcpu->arch.shadow_efer;
2821 sregs->apic_base = kvm_get_apic_base(vcpu); 3075 sregs->apic_base = kvm_get_apic_base(vcpu);
2822 3076
@@ -2836,12 +3090,438 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2836 return 0; 3090 return 0;
2837} 3091}
2838 3092
3093int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3094 struct kvm_mp_state *mp_state)
3095{
3096 vcpu_load(vcpu);
3097 mp_state->mp_state = vcpu->arch.mp_state;
3098 vcpu_put(vcpu);
3099 return 0;
3100}
3101
3102int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3103 struct kvm_mp_state *mp_state)
3104{
3105 vcpu_load(vcpu);
3106 vcpu->arch.mp_state = mp_state->mp_state;
3107 vcpu_put(vcpu);
3108 return 0;
3109}
3110
2839static void set_segment(struct kvm_vcpu *vcpu, 3111static void set_segment(struct kvm_vcpu *vcpu,
2840 struct kvm_segment *var, int seg) 3112 struct kvm_segment *var, int seg)
2841{ 3113{
2842 return kvm_x86_ops->set_segment(vcpu, var, seg); 3114 kvm_x86_ops->set_segment(vcpu, var, seg);
3115}
3116
3117static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
3118 struct kvm_segment *kvm_desct)
3119{
3120 kvm_desct->base = seg_desc->base0;
3121 kvm_desct->base |= seg_desc->base1 << 16;
3122 kvm_desct->base |= seg_desc->base2 << 24;
3123 kvm_desct->limit = seg_desc->limit0;
3124 kvm_desct->limit |= seg_desc->limit << 16;
3125 kvm_desct->selector = selector;
3126 kvm_desct->type = seg_desc->type;
3127 kvm_desct->present = seg_desc->p;
3128 kvm_desct->dpl = seg_desc->dpl;
3129 kvm_desct->db = seg_desc->d;
3130 kvm_desct->s = seg_desc->s;
3131 kvm_desct->l = seg_desc->l;
3132 kvm_desct->g = seg_desc->g;
3133 kvm_desct->avl = seg_desc->avl;
3134 if (!selector)
3135 kvm_desct->unusable = 1;
3136 else
3137 kvm_desct->unusable = 0;
3138 kvm_desct->padding = 0;
3139}
3140
3141static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
3142 u16 selector,
3143 struct descriptor_table *dtable)
3144{
3145 if (selector & 1 << 2) {
3146 struct kvm_segment kvm_seg;
3147
3148 get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
3149
3150 if (kvm_seg.unusable)
3151 dtable->limit = 0;
3152 else
3153 dtable->limit = kvm_seg.limit;
3154 dtable->base = kvm_seg.base;
3155 }
3156 else
3157 kvm_x86_ops->get_gdt(vcpu, dtable);
3158}
3159
3160/* allowed just for 8 bytes segments */
3161static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3162 struct desc_struct *seg_desc)
3163{
3164 struct descriptor_table dtable;
3165 u16 index = selector >> 3;
3166
3167 get_segment_descritptor_dtable(vcpu, selector, &dtable);
3168
3169 if (dtable.limit < index * 8 + 7) {
3170 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
3171 return 1;
3172 }
3173 return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8);
3174}
3175
3176/* allowed just for 8 bytes segments */
3177static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3178 struct desc_struct *seg_desc)
3179{
3180 struct descriptor_table dtable;
3181 u16 index = selector >> 3;
3182
3183 get_segment_descritptor_dtable(vcpu, selector, &dtable);
3184
3185 if (dtable.limit < index * 8 + 7)
3186 return 1;
3187 return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8);
3188}
3189
3190static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
3191 struct desc_struct *seg_desc)
3192{
3193 u32 base_addr;
3194
3195 base_addr = seg_desc->base0;
3196 base_addr |= (seg_desc->base1 << 16);
3197 base_addr |= (seg_desc->base2 << 24);
3198
3199 return base_addr;
3200}
3201
3202static int load_tss_segment32(struct kvm_vcpu *vcpu,
3203 struct desc_struct *seg_desc,
3204 struct tss_segment_32 *tss)
3205{
3206 u32 base_addr;
3207
3208 base_addr = get_tss_base_addr(vcpu, seg_desc);
3209
3210 return kvm_read_guest(vcpu->kvm, base_addr, tss,
3211 sizeof(struct tss_segment_32));
3212}
3213
3214static int save_tss_segment32(struct kvm_vcpu *vcpu,
3215 struct desc_struct *seg_desc,
3216 struct tss_segment_32 *tss)
3217{
3218 u32 base_addr;
3219
3220 base_addr = get_tss_base_addr(vcpu, seg_desc);
3221
3222 return kvm_write_guest(vcpu->kvm, base_addr, tss,
3223 sizeof(struct tss_segment_32));
3224}
3225
3226static int load_tss_segment16(struct kvm_vcpu *vcpu,
3227 struct desc_struct *seg_desc,
3228 struct tss_segment_16 *tss)
3229{
3230 u32 base_addr;
3231
3232 base_addr = get_tss_base_addr(vcpu, seg_desc);
3233
3234 return kvm_read_guest(vcpu->kvm, base_addr, tss,
3235 sizeof(struct tss_segment_16));
3236}
3237
3238static int save_tss_segment16(struct kvm_vcpu *vcpu,
3239 struct desc_struct *seg_desc,
3240 struct tss_segment_16 *tss)
3241{
3242 u32 base_addr;
3243
3244 base_addr = get_tss_base_addr(vcpu, seg_desc);
3245
3246 return kvm_write_guest(vcpu->kvm, base_addr, tss,
3247 sizeof(struct tss_segment_16));
3248}
3249
3250static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
3251{
3252 struct kvm_segment kvm_seg;
3253
3254 get_segment(vcpu, &kvm_seg, seg);
3255 return kvm_seg.selector;
3256}
3257
3258static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
3259 u16 selector,
3260 struct kvm_segment *kvm_seg)
3261{
3262 struct desc_struct seg_desc;
3263
3264 if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
3265 return 1;
3266 seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
3267 return 0;
3268}
3269
3270static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3271 int type_bits, int seg)
3272{
3273 struct kvm_segment kvm_seg;
3274
3275 if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
3276 return 1;
3277 kvm_seg.type |= type_bits;
3278
3279 if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
3280 seg != VCPU_SREG_LDTR)
3281 if (!kvm_seg.s)
3282 kvm_seg.unusable = 1;
3283
3284 set_segment(vcpu, &kvm_seg, seg);
3285 return 0;
3286}
3287
3288static void save_state_to_tss32(struct kvm_vcpu *vcpu,
3289 struct tss_segment_32 *tss)
3290{
3291 tss->cr3 = vcpu->arch.cr3;
3292 tss->eip = vcpu->arch.rip;
3293 tss->eflags = kvm_x86_ops->get_rflags(vcpu);
3294 tss->eax = vcpu->arch.regs[VCPU_REGS_RAX];
3295 tss->ecx = vcpu->arch.regs[VCPU_REGS_RCX];
3296 tss->edx = vcpu->arch.regs[VCPU_REGS_RDX];
3297 tss->ebx = vcpu->arch.regs[VCPU_REGS_RBX];
3298 tss->esp = vcpu->arch.regs[VCPU_REGS_RSP];
3299 tss->ebp = vcpu->arch.regs[VCPU_REGS_RBP];
3300 tss->esi = vcpu->arch.regs[VCPU_REGS_RSI];
3301 tss->edi = vcpu->arch.regs[VCPU_REGS_RDI];
3302
3303 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
3304 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
3305 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
3306 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
3307 tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
3308 tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
3309 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
3310 tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
3311}
3312
3313static int load_state_from_tss32(struct kvm_vcpu *vcpu,
3314 struct tss_segment_32 *tss)
3315{
3316 kvm_set_cr3(vcpu, tss->cr3);
3317
3318 vcpu->arch.rip = tss->eip;
3319 kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
3320
3321 vcpu->arch.regs[VCPU_REGS_RAX] = tss->eax;
3322 vcpu->arch.regs[VCPU_REGS_RCX] = tss->ecx;
3323 vcpu->arch.regs[VCPU_REGS_RDX] = tss->edx;
3324 vcpu->arch.regs[VCPU_REGS_RBX] = tss->ebx;
3325 vcpu->arch.regs[VCPU_REGS_RSP] = tss->esp;
3326 vcpu->arch.regs[VCPU_REGS_RBP] = tss->ebp;
3327 vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi;
3328 vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi;
3329
3330 if (load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
3331 return 1;
3332
3333 if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
3334 return 1;
3335
3336 if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
3337 return 1;
3338
3339 if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
3340 return 1;
3341
3342 if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
3343 return 1;
3344
3345 if (load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
3346 return 1;
3347
3348 if (load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
3349 return 1;
3350 return 0;
3351}
3352
3353static void save_state_to_tss16(struct kvm_vcpu *vcpu,
3354 struct tss_segment_16 *tss)
3355{
3356 tss->ip = vcpu->arch.rip;
3357 tss->flag = kvm_x86_ops->get_rflags(vcpu);
3358 tss->ax = vcpu->arch.regs[VCPU_REGS_RAX];
3359 tss->cx = vcpu->arch.regs[VCPU_REGS_RCX];
3360 tss->dx = vcpu->arch.regs[VCPU_REGS_RDX];
3361 tss->bx = vcpu->arch.regs[VCPU_REGS_RBX];
3362 tss->sp = vcpu->arch.regs[VCPU_REGS_RSP];
3363 tss->bp = vcpu->arch.regs[VCPU_REGS_RBP];
3364 tss->si = vcpu->arch.regs[VCPU_REGS_RSI];
3365 tss->di = vcpu->arch.regs[VCPU_REGS_RDI];
3366
3367 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
3368 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
3369 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
3370 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
3371 tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
3372 tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
3373}
3374
3375static int load_state_from_tss16(struct kvm_vcpu *vcpu,
3376 struct tss_segment_16 *tss)
3377{
3378 vcpu->arch.rip = tss->ip;
3379 kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
3380 vcpu->arch.regs[VCPU_REGS_RAX] = tss->ax;
3381 vcpu->arch.regs[VCPU_REGS_RCX] = tss->cx;
3382 vcpu->arch.regs[VCPU_REGS_RDX] = tss->dx;
3383 vcpu->arch.regs[VCPU_REGS_RBX] = tss->bx;
3384 vcpu->arch.regs[VCPU_REGS_RSP] = tss->sp;
3385 vcpu->arch.regs[VCPU_REGS_RBP] = tss->bp;
3386 vcpu->arch.regs[VCPU_REGS_RSI] = tss->si;
3387 vcpu->arch.regs[VCPU_REGS_RDI] = tss->di;
3388
3389 if (load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
3390 return 1;
3391
3392 if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
3393 return 1;
3394
3395 if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
3396 return 1;
3397
3398 if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
3399 return 1;
3400
3401 if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
3402 return 1;
3403 return 0;
3404}
3405
3406int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
3407 struct desc_struct *cseg_desc,
3408 struct desc_struct *nseg_desc)
3409{
3410 struct tss_segment_16 tss_segment_16;
3411 int ret = 0;
3412
3413 if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16))
3414 goto out;
3415
3416 save_state_to_tss16(vcpu, &tss_segment_16);
3417 save_tss_segment16(vcpu, cseg_desc, &tss_segment_16);
3418
3419 if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16))
3420 goto out;
3421 if (load_state_from_tss16(vcpu, &tss_segment_16))
3422 goto out;
3423
3424 ret = 1;
3425out:
3426 return ret;
3427}
3428
3429int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
3430 struct desc_struct *cseg_desc,
3431 struct desc_struct *nseg_desc)
3432{
3433 struct tss_segment_32 tss_segment_32;
3434 int ret = 0;
3435
3436 if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32))
3437 goto out;
3438
3439 save_state_to_tss32(vcpu, &tss_segment_32);
3440 save_tss_segment32(vcpu, cseg_desc, &tss_segment_32);
3441
3442 if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32))
3443 goto out;
3444 if (load_state_from_tss32(vcpu, &tss_segment_32))
3445 goto out;
3446
3447 ret = 1;
3448out:
3449 return ret;
2843} 3450}
2844 3451
3452int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3453{
3454 struct kvm_segment tr_seg;
3455 struct desc_struct cseg_desc;
3456 struct desc_struct nseg_desc;
3457 int ret = 0;
3458
3459 get_segment(vcpu, &tr_seg, VCPU_SREG_TR);
3460
3461 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
3462 goto out;
3463
3464 if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc))
3465 goto out;
3466
3467
3468 if (reason != TASK_SWITCH_IRET) {
3469 int cpl;
3470
3471 cpl = kvm_x86_ops->get_cpl(vcpu);
3472 if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
3473 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
3474 return 1;
3475 }
3476 }
3477
3478 if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) {
3479 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
3480 return 1;
3481 }
3482
3483 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3484 cseg_desc.type &= ~(1 << 8); //clear the B flag
3485 save_guest_segment_descriptor(vcpu, tr_seg.selector,
3486 &cseg_desc);
3487 }
3488
3489 if (reason == TASK_SWITCH_IRET) {
3490 u32 eflags = kvm_x86_ops->get_rflags(vcpu);
3491 kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
3492 }
3493
3494 kvm_x86_ops->skip_emulated_instruction(vcpu);
3495 kvm_x86_ops->cache_regs(vcpu);
3496
3497 if (nseg_desc.type & 8)
3498 ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc,
3499 &nseg_desc);
3500 else
3501 ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc,
3502 &nseg_desc);
3503
3504 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
3505 u32 eflags = kvm_x86_ops->get_rflags(vcpu);
3506 kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
3507 }
3508
3509 if (reason != TASK_SWITCH_IRET) {
3510 nseg_desc.type |= (1 << 8);
3511 save_guest_segment_descriptor(vcpu, tss_selector,
3512 &nseg_desc);
3513 }
3514
3515 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS);
3516 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
3517 tr_seg.type = 11;
3518 set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
3519out:
3520 kvm_x86_ops->decache_regs(vcpu);
3521 return ret;
3522}
3523EXPORT_SYMBOL_GPL(kvm_task_switch);
3524
2845int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 3525int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2846 struct kvm_sregs *sregs) 3526 struct kvm_sregs *sregs)
2847{ 3527{
@@ -2862,12 +3542,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2862 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; 3542 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
2863 vcpu->arch.cr3 = sregs->cr3; 3543 vcpu->arch.cr3 = sregs->cr3;
2864 3544
2865 set_cr8(vcpu, sregs->cr8); 3545 kvm_set_cr8(vcpu, sregs->cr8);
2866 3546
2867 mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer; 3547 mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
2868#ifdef CONFIG_X86_64
2869 kvm_x86_ops->set_efer(vcpu, sregs->efer); 3548 kvm_x86_ops->set_efer(vcpu, sregs->efer);
2870#endif
2871 kvm_set_apic_base(vcpu, sregs->apic_base); 3549 kvm_set_apic_base(vcpu, sregs->apic_base);
2872 3550
2873 kvm_x86_ops->decache_cr4_guest_bits(vcpu); 3551 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
@@ -3141,9 +3819,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
3141 3819
3142 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 3820 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
3143 if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0) 3821 if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
3144 vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; 3822 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3145 else 3823 else
3146 vcpu->arch.mp_state = VCPU_MP_STATE_UNINITIALIZED; 3824 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
3147 3825
3148 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 3826 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
3149 if (!page) { 3827 if (!page) {
@@ -3175,7 +3853,9 @@ fail:
3175void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 3853void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
3176{ 3854{
3177 kvm_free_lapic(vcpu); 3855 kvm_free_lapic(vcpu);
3856 down_read(&vcpu->kvm->slots_lock);
3178 kvm_mmu_destroy(vcpu); 3857 kvm_mmu_destroy(vcpu);
3858 up_read(&vcpu->kvm->slots_lock);
3179 free_page((unsigned long)vcpu->arch.pio_data); 3859 free_page((unsigned long)vcpu->arch.pio_data);
3180} 3860}
3181 3861
@@ -3219,10 +3899,13 @@ static void kvm_free_vcpus(struct kvm *kvm)
3219 3899
3220void kvm_arch_destroy_vm(struct kvm *kvm) 3900void kvm_arch_destroy_vm(struct kvm *kvm)
3221{ 3901{
3902 kvm_free_pit(kvm);
3222 kfree(kvm->arch.vpic); 3903 kfree(kvm->arch.vpic);
3223 kfree(kvm->arch.vioapic); 3904 kfree(kvm->arch.vioapic);
3224 kvm_free_vcpus(kvm); 3905 kvm_free_vcpus(kvm);
3225 kvm_free_physmem(kvm); 3906 kvm_free_physmem(kvm);
3907 if (kvm->arch.apic_access_page)
3908 put_page(kvm->arch.apic_access_page);
3226 kfree(kvm); 3909 kfree(kvm);
3227} 3910}
3228 3911
@@ -3278,8 +3961,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
3278 3961
3279int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 3962int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3280{ 3963{
3281 return vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE 3964 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
3282 || vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED; 3965 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED;
3283} 3966}
3284 3967
3285static void vcpu_kick_intr(void *info) 3968static void vcpu_kick_intr(void *info)
@@ -3293,11 +3976,17 @@ static void vcpu_kick_intr(void *info)
3293void kvm_vcpu_kick(struct kvm_vcpu *vcpu) 3976void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
3294{ 3977{
3295 int ipi_pcpu = vcpu->cpu; 3978 int ipi_pcpu = vcpu->cpu;
3979 int cpu = get_cpu();
3296 3980
3297 if (waitqueue_active(&vcpu->wq)) { 3981 if (waitqueue_active(&vcpu->wq)) {
3298 wake_up_interruptible(&vcpu->wq); 3982 wake_up_interruptible(&vcpu->wq);
3299 ++vcpu->stat.halt_wakeup; 3983 ++vcpu->stat.halt_wakeup;
3300 } 3984 }
3301 if (vcpu->guest_mode) 3985 /*
3986 * We may be called synchronously with irqs disabled in guest mode,
3987 * So need not to call smp_call_function_single() in that case.
3988 */
3989 if (vcpu->guest_mode && vcpu->cpu != cpu)
3302 smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); 3990 smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
3991 put_cpu();
3303} 3992}