aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-10-28 11:26:12 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-28 11:26:12 -0400
commit7a9787e1eba95a166265e6a260cf30af04ef0a99 (patch)
treee730a4565e0318140d2fbd2f0415d18a339d7336 /arch/x86/kvm/x86.c
parent41b9eb264c8407655db57b60b4457fe1b2ec9977 (diff)
parent0173a3265b228da319ceb9c1ec6a5682fd1b2d92 (diff)
Merge commit 'v2.6.28-rc2' into x86/pci-ioapic-boot-irq-quirks
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c984
1 files changed, 544 insertions, 440 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0faa2546b1cd..4f0677d1eae8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4,10 +4,14 @@
4 * derived from drivers/kvm/kvm_main.c 4 * derived from drivers/kvm/kvm_main.c
5 * 5 *
6 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2006 Qumranet, Inc.
7 * Copyright (C) 2008 Qumranet, Inc.
8 * Copyright IBM Corporation, 2008
7 * 9 *
8 * Authors: 10 * Authors:
9 * Avi Kivity <avi@qumranet.com> 11 * Avi Kivity <avi@qumranet.com>
10 * Yaniv Kamay <yaniv@qumranet.com> 12 * Yaniv Kamay <yaniv@qumranet.com>
13 * Amit Shah <amit.shah@qumranet.com>
14 * Ben-Ami Yassour <benami@il.ibm.com>
11 * 15 *
12 * This work is licensed under the terms of the GNU GPL, version 2. See 16 * This work is licensed under the terms of the GNU GPL, version 2. See
13 * the COPYING file in the top-level directory. 17 * the COPYING file in the top-level directory.
@@ -19,14 +23,18 @@
19#include "mmu.h" 23#include "mmu.h"
20#include "i8254.h" 24#include "i8254.h"
21#include "tss.h" 25#include "tss.h"
26#include "kvm_cache_regs.h"
27#include "x86.h"
22 28
23#include <linux/clocksource.h> 29#include <linux/clocksource.h>
30#include <linux/interrupt.h>
24#include <linux/kvm.h> 31#include <linux/kvm.h>
25#include <linux/fs.h> 32#include <linux/fs.h>
26#include <linux/vmalloc.h> 33#include <linux/vmalloc.h>
27#include <linux/module.h> 34#include <linux/module.h>
28#include <linux/mman.h> 35#include <linux/mman.h>
29#include <linux/highmem.h> 36#include <linux/highmem.h>
37#include <linux/intel-iommu.h>
30 38
31#include <asm/uaccess.h> 39#include <asm/uaccess.h>
32#include <asm/msr.h> 40#include <asm/msr.h>
@@ -61,6 +69,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
61 struct kvm_cpuid_entry2 __user *entries); 69 struct kvm_cpuid_entry2 __user *entries);
62 70
63struct kvm_x86_ops *kvm_x86_ops; 71struct kvm_x86_ops *kvm_x86_ops;
72EXPORT_SYMBOL_GPL(kvm_x86_ops);
64 73
65struct kvm_stats_debugfs_item debugfs_entries[] = { 74struct kvm_stats_debugfs_item debugfs_entries[] = {
66 { "pf_fixed", VCPU_STAT(pf_fixed) }, 75 { "pf_fixed", VCPU_STAT(pf_fixed) },
@@ -72,6 +81,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
72 { "mmio_exits", VCPU_STAT(mmio_exits) }, 81 { "mmio_exits", VCPU_STAT(mmio_exits) },
73 { "signal_exits", VCPU_STAT(signal_exits) }, 82 { "signal_exits", VCPU_STAT(signal_exits) },
74 { "irq_window", VCPU_STAT(irq_window_exits) }, 83 { "irq_window", VCPU_STAT(irq_window_exits) },
84 { "nmi_window", VCPU_STAT(nmi_window_exits) },
75 { "halt_exits", VCPU_STAT(halt_exits) }, 85 { "halt_exits", VCPU_STAT(halt_exits) },
76 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 86 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 { "hypercalls", VCPU_STAT(hypercalls) }, 87 { "hypercalls", VCPU_STAT(hypercalls) },
@@ -82,6 +92,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
82 { "fpu_reload", VCPU_STAT(fpu_reload) }, 92 { "fpu_reload", VCPU_STAT(fpu_reload) },
83 { "insn_emulation", VCPU_STAT(insn_emulation) }, 93 { "insn_emulation", VCPU_STAT(insn_emulation) },
84 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, 94 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
95 { "irq_injections", VCPU_STAT(irq_injections) },
85 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, 96 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
86 { "mmu_pte_write", VM_STAT(mmu_pte_write) }, 97 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
87 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, 98 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -89,12 +100,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
89 { "mmu_flooded", VM_STAT(mmu_flooded) }, 100 { "mmu_flooded", VM_STAT(mmu_flooded) },
90 { "mmu_recycled", VM_STAT(mmu_recycled) }, 101 { "mmu_recycled", VM_STAT(mmu_recycled) },
91 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, 102 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
103 { "mmu_unsync", VM_STAT(mmu_unsync) },
92 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, 104 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
93 { "largepages", VM_STAT(lpages) }, 105 { "largepages", VM_STAT(lpages) },
94 { NULL } 106 { NULL }
95}; 107};
96 108
97
98unsigned long segment_base(u16 selector) 109unsigned long segment_base(u16 selector)
99{ 110{
100 struct descriptor_table gdt; 111 struct descriptor_table gdt;
@@ -173,6 +184,12 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
173 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); 184 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
174} 185}
175 186
187void kvm_inject_nmi(struct kvm_vcpu *vcpu)
188{
189 vcpu->arch.nmi_pending = 1;
190}
191EXPORT_SYMBOL_GPL(kvm_inject_nmi);
192
176void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) 193void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
177{ 194{
178 WARN_ON(vcpu->arch.exception.pending); 195 WARN_ON(vcpu->arch.exception.pending);
@@ -345,6 +362,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
345void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 362void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
346{ 363{
347 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { 364 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
365 kvm_mmu_sync_roots(vcpu);
348 kvm_mmu_flush_tlb(vcpu); 366 kvm_mmu_flush_tlb(vcpu);
349 return; 367 return;
350 } 368 }
@@ -557,7 +575,7 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *
557 hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32); 575 hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
558 576
559 pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n", 577 pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
560 __FUNCTION__, tsc_khz, hv_clock->tsc_shift, 578 __func__, tsc_khz, hv_clock->tsc_shift,
561 hv_clock->tsc_to_system_mul); 579 hv_clock->tsc_to_system_mul);
562} 580}
563 581
@@ -604,6 +622,38 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
604 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); 622 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
605} 623}
606 624
625static bool msr_mtrr_valid(unsigned msr)
626{
627 switch (msr) {
628 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
629 case MSR_MTRRfix64K_00000:
630 case MSR_MTRRfix16K_80000:
631 case MSR_MTRRfix16K_A0000:
632 case MSR_MTRRfix4K_C0000:
633 case MSR_MTRRfix4K_C8000:
634 case MSR_MTRRfix4K_D0000:
635 case MSR_MTRRfix4K_D8000:
636 case MSR_MTRRfix4K_E0000:
637 case MSR_MTRRfix4K_E8000:
638 case MSR_MTRRfix4K_F0000:
639 case MSR_MTRRfix4K_F8000:
640 case MSR_MTRRdefType:
641 case MSR_IA32_CR_PAT:
642 return true;
643 case 0x2f8:
644 return true;
645 }
646 return false;
647}
648
649static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
650{
651 if (!msr_mtrr_valid(msr))
652 return 1;
653
654 vcpu->arch.mtrr[msr - 0x200] = data;
655 return 0;
656}
607 657
608int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 658int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
609{ 659{
@@ -623,10 +673,23 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
623 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", 673 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
624 __func__, data); 674 __func__, data);
625 break; 675 break;
676 case MSR_IA32_DEBUGCTLMSR:
677 if (!data) {
678 /* We support the non-activated case already */
679 break;
680 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
681 /* Values other than LBR and BTF are vendor-specific,
682 thus reserved and should throw a #GP */
683 return 1;
684 }
685 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
686 __func__, data);
687 break;
626 case MSR_IA32_UCODE_REV: 688 case MSR_IA32_UCODE_REV:
627 case MSR_IA32_UCODE_WRITE: 689 case MSR_IA32_UCODE_WRITE:
628 case 0x200 ... 0x2ff: /* MTRRs */
629 break; 690 break;
691 case 0x200 ... 0x2ff:
692 return set_msr_mtrr(vcpu, msr, data);
630 case MSR_IA32_APICBASE: 693 case MSR_IA32_APICBASE:
631 kvm_set_apic_base(vcpu, data); 694 kvm_set_apic_base(vcpu, data);
632 break; 695 break;
@@ -652,10 +715,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
652 /* ...but clean it before doing the actual write */ 715 /* ...but clean it before doing the actual write */
653 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); 716 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
654 717
655 down_read(&current->mm->mmap_sem);
656 vcpu->arch.time_page = 718 vcpu->arch.time_page =
657 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); 719 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
658 up_read(&current->mm->mmap_sem);
659 720
660 if (is_error_page(vcpu->arch.time_page)) { 721 if (is_error_page(vcpu->arch.time_page)) {
661 kvm_release_page_clean(vcpu->arch.time_page); 722 kvm_release_page_clean(vcpu->arch.time_page);
@@ -684,6 +745,15 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
684 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata); 745 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
685} 746}
686 747
748static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
749{
750 if (!msr_mtrr_valid(msr))
751 return 1;
752
753 *pdata = vcpu->arch.mtrr[msr - 0x200];
754 return 0;
755}
756
687int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 757int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
688{ 758{
689 u64 data; 759 u64 data;
@@ -703,13 +773,21 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
703 case MSR_IA32_MC0_MISC+8: 773 case MSR_IA32_MC0_MISC+8:
704 case MSR_IA32_MC0_MISC+12: 774 case MSR_IA32_MC0_MISC+12:
705 case MSR_IA32_MC0_MISC+16: 775 case MSR_IA32_MC0_MISC+16:
776 case MSR_IA32_MC0_MISC+20:
706 case MSR_IA32_UCODE_REV: 777 case MSR_IA32_UCODE_REV:
707 case MSR_IA32_EBL_CR_POWERON: 778 case MSR_IA32_EBL_CR_POWERON:
708 /* MTRR registers */ 779 case MSR_IA32_DEBUGCTLMSR:
709 case 0xfe: 780 case MSR_IA32_LASTBRANCHFROMIP:
710 case 0x200 ... 0x2ff: 781 case MSR_IA32_LASTBRANCHTOIP:
782 case MSR_IA32_LASTINTFROMIP:
783 case MSR_IA32_LASTINTTOIP:
711 data = 0; 784 data = 0;
712 break; 785 break;
786 case MSR_MTRRcap:
787 data = 0x500 | KVM_NR_VAR_MTRR;
788 break;
789 case 0x200 ... 0x2ff:
790 return get_msr_mtrr(vcpu, msr, pdata);
713 case 0xcd: /* fsb frequency */ 791 case 0xcd: /* fsb frequency */
714 data = 3; 792 data = 3;
715 break; 793 break;
@@ -817,41 +895,6 @@ out:
817 return r; 895 return r;
818} 896}
819 897
820/*
821 * Make sure that a cpu that is being hot-unplugged does not have any vcpus
822 * cached on it.
823 */
824void decache_vcpus_on_cpu(int cpu)
825{
826 struct kvm *vm;
827 struct kvm_vcpu *vcpu;
828 int i;
829
830 spin_lock(&kvm_lock);
831 list_for_each_entry(vm, &vm_list, vm_list)
832 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
833 vcpu = vm->vcpus[i];
834 if (!vcpu)
835 continue;
836 /*
837 * If the vcpu is locked, then it is running on some
838 * other cpu and therefore it is not cached on the
839 * cpu in question.
840 *
841 * If it's not locked, check the last cpu it executed
842 * on.
843 */
844 if (mutex_trylock(&vcpu->mutex)) {
845 if (vcpu->cpu == cpu) {
846 kvm_x86_ops->vcpu_decache(vcpu);
847 vcpu->cpu = -1;
848 }
849 mutex_unlock(&vcpu->mutex);
850 }
851 }
852 spin_unlock(&kvm_lock);
853}
854
855int kvm_dev_ioctl_check_extension(long ext) 898int kvm_dev_ioctl_check_extension(long ext)
856{ 899{
857 int r; 900 int r;
@@ -867,8 +910,12 @@ int kvm_dev_ioctl_check_extension(long ext)
867 case KVM_CAP_PIT: 910 case KVM_CAP_PIT:
868 case KVM_CAP_NOP_IO_DELAY: 911 case KVM_CAP_NOP_IO_DELAY:
869 case KVM_CAP_MP_STATE: 912 case KVM_CAP_MP_STATE:
913 case KVM_CAP_SYNC_MMU:
870 r = 1; 914 r = 1;
871 break; 915 break;
916 case KVM_CAP_COALESCED_MMIO:
917 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
918 break;
872 case KVM_CAP_VAPIC: 919 case KVM_CAP_VAPIC:
873 r = !kvm_x86_ops->cpu_has_accelerated_tpr(); 920 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
874 break; 921 break;
@@ -881,6 +928,9 @@ int kvm_dev_ioctl_check_extension(long ext)
881 case KVM_CAP_PV_MMU: 928 case KVM_CAP_PV_MMU:
882 r = !tdp_enabled; 929 r = !tdp_enabled;
883 break; 930 break;
931 case KVM_CAP_IOMMU:
932 r = intel_iommu_found();
933 break;
884 default: 934 default:
885 r = 0; 935 r = 0;
886 break; 936 break;
@@ -1283,28 +1333,33 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1283 struct kvm_vcpu *vcpu = filp->private_data; 1333 struct kvm_vcpu *vcpu = filp->private_data;
1284 void __user *argp = (void __user *)arg; 1334 void __user *argp = (void __user *)arg;
1285 int r; 1335 int r;
1336 struct kvm_lapic_state *lapic = NULL;
1286 1337
1287 switch (ioctl) { 1338 switch (ioctl) {
1288 case KVM_GET_LAPIC: { 1339 case KVM_GET_LAPIC: {
1289 struct kvm_lapic_state lapic; 1340 lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
1290 1341
1291 memset(&lapic, 0, sizeof lapic); 1342 r = -ENOMEM;
1292 r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic); 1343 if (!lapic)
1344 goto out;
1345 r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic);
1293 if (r) 1346 if (r)
1294 goto out; 1347 goto out;
1295 r = -EFAULT; 1348 r = -EFAULT;
1296 if (copy_to_user(argp, &lapic, sizeof lapic)) 1349 if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state)))
1297 goto out; 1350 goto out;
1298 r = 0; 1351 r = 0;
1299 break; 1352 break;
1300 } 1353 }
1301 case KVM_SET_LAPIC: { 1354 case KVM_SET_LAPIC: {
1302 struct kvm_lapic_state lapic; 1355 lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
1303 1356 r = -ENOMEM;
1357 if (!lapic)
1358 goto out;
1304 r = -EFAULT; 1359 r = -EFAULT;
1305 if (copy_from_user(&lapic, argp, sizeof lapic)) 1360 if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state)))
1306 goto out; 1361 goto out;
1307 r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);; 1362 r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic);
1308 if (r) 1363 if (r)
1309 goto out; 1364 goto out;
1310 r = 0; 1365 r = 0;
@@ -1402,6 +1457,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1402 r = -EINVAL; 1457 r = -EINVAL;
1403 } 1458 }
1404out: 1459out:
1460 if (lapic)
1461 kfree(lapic);
1405 return r; 1462 return r;
1406} 1463}
1407 1464
@@ -1476,6 +1533,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
1476 goto out; 1533 goto out;
1477 1534
1478 down_write(&kvm->slots_lock); 1535 down_write(&kvm->slots_lock);
1536 spin_lock(&kvm->mmu_lock);
1479 1537
1480 p = &kvm->arch.aliases[alias->slot]; 1538 p = &kvm->arch.aliases[alias->slot];
1481 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 1539 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -1487,6 +1545,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
1487 break; 1545 break;
1488 kvm->arch.naliases = n; 1546 kvm->arch.naliases = n;
1489 1547
1548 spin_unlock(&kvm->mmu_lock);
1490 kvm_mmu_zap_all(kvm); 1549 kvm_mmu_zap_all(kvm);
1491 1550
1492 up_write(&kvm->slots_lock); 1551 up_write(&kvm->slots_lock);
@@ -1608,6 +1667,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
1608 struct kvm *kvm = filp->private_data; 1667 struct kvm *kvm = filp->private_data;
1609 void __user *argp = (void __user *)arg; 1668 void __user *argp = (void __user *)arg;
1610 int r = -EINVAL; 1669 int r = -EINVAL;
1670 /*
1671 * This union makes it completely explicit to gcc-3.x
1672 * that these two variables' stack usage should be
1673 * combined, not added together.
1674 */
1675 union {
1676 struct kvm_pit_state ps;
1677 struct kvm_memory_alias alias;
1678 } u;
1611 1679
1612 switch (ioctl) { 1680 switch (ioctl) {
1613 case KVM_SET_TSS_ADDR: 1681 case KVM_SET_TSS_ADDR:
@@ -1639,17 +1707,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
1639 case KVM_GET_NR_MMU_PAGES: 1707 case KVM_GET_NR_MMU_PAGES:
1640 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); 1708 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
1641 break; 1709 break;
1642 case KVM_SET_MEMORY_ALIAS: { 1710 case KVM_SET_MEMORY_ALIAS:
1643 struct kvm_memory_alias alias;
1644
1645 r = -EFAULT; 1711 r = -EFAULT;
1646 if (copy_from_user(&alias, argp, sizeof alias)) 1712 if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias)))
1647 goto out; 1713 goto out;
1648 r = kvm_vm_ioctl_set_memory_alias(kvm, &alias); 1714 r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias);
1649 if (r) 1715 if (r)
1650 goto out; 1716 goto out;
1651 break; 1717 break;
1652 }
1653 case KVM_CREATE_IRQCHIP: 1718 case KVM_CREATE_IRQCHIP:
1654 r = -ENOMEM; 1719 r = -ENOMEM;
1655 kvm->arch.vpic = kvm_create_pic(kvm); 1720 kvm->arch.vpic = kvm_create_pic(kvm);
@@ -1677,13 +1742,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
1677 goto out; 1742 goto out;
1678 if (irqchip_in_kernel(kvm)) { 1743 if (irqchip_in_kernel(kvm)) {
1679 mutex_lock(&kvm->lock); 1744 mutex_lock(&kvm->lock);
1680 if (irq_event.irq < 16) 1745 kvm_set_irq(kvm, irq_event.irq, irq_event.level);
1681 kvm_pic_set_irq(pic_irqchip(kvm),
1682 irq_event.irq,
1683 irq_event.level);
1684 kvm_ioapic_set_irq(kvm->arch.vioapic,
1685 irq_event.irq,
1686 irq_event.level);
1687 mutex_unlock(&kvm->lock); 1746 mutex_unlock(&kvm->lock);
1688 r = 0; 1747 r = 0;
1689 } 1748 }
@@ -1691,65 +1750,77 @@ long kvm_arch_vm_ioctl(struct file *filp,
1691 } 1750 }
1692 case KVM_GET_IRQCHIP: { 1751 case KVM_GET_IRQCHIP: {
1693 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ 1752 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
1694 struct kvm_irqchip chip; 1753 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
1695 1754
1696 r = -EFAULT; 1755 r = -ENOMEM;
1697 if (copy_from_user(&chip, argp, sizeof chip)) 1756 if (!chip)
1698 goto out; 1757 goto out;
1758 r = -EFAULT;
1759 if (copy_from_user(chip, argp, sizeof *chip))
1760 goto get_irqchip_out;
1699 r = -ENXIO; 1761 r = -ENXIO;
1700 if (!irqchip_in_kernel(kvm)) 1762 if (!irqchip_in_kernel(kvm))
1701 goto out; 1763 goto get_irqchip_out;
1702 r = kvm_vm_ioctl_get_irqchip(kvm, &chip); 1764 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
1703 if (r) 1765 if (r)
1704 goto out; 1766 goto get_irqchip_out;
1705 r = -EFAULT; 1767 r = -EFAULT;
1706 if (copy_to_user(argp, &chip, sizeof chip)) 1768 if (copy_to_user(argp, chip, sizeof *chip))
1707 goto out; 1769 goto get_irqchip_out;
1708 r = 0; 1770 r = 0;
1771 get_irqchip_out:
1772 kfree(chip);
1773 if (r)
1774 goto out;
1709 break; 1775 break;
1710 } 1776 }
1711 case KVM_SET_IRQCHIP: { 1777 case KVM_SET_IRQCHIP: {
1712 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ 1778 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
1713 struct kvm_irqchip chip; 1779 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
1714 1780
1715 r = -EFAULT; 1781 r = -ENOMEM;
1716 if (copy_from_user(&chip, argp, sizeof chip)) 1782 if (!chip)
1717 goto out; 1783 goto out;
1784 r = -EFAULT;
1785 if (copy_from_user(chip, argp, sizeof *chip))
1786 goto set_irqchip_out;
1718 r = -ENXIO; 1787 r = -ENXIO;
1719 if (!irqchip_in_kernel(kvm)) 1788 if (!irqchip_in_kernel(kvm))
1720 goto out; 1789 goto set_irqchip_out;
1721 r = kvm_vm_ioctl_set_irqchip(kvm, &chip); 1790 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
1722 if (r) 1791 if (r)
1723 goto out; 1792 goto set_irqchip_out;
1724 r = 0; 1793 r = 0;
1794 set_irqchip_out:
1795 kfree(chip);
1796 if (r)
1797 goto out;
1725 break; 1798 break;
1726 } 1799 }
1727 case KVM_GET_PIT: { 1800 case KVM_GET_PIT: {
1728 struct kvm_pit_state ps;
1729 r = -EFAULT; 1801 r = -EFAULT;
1730 if (copy_from_user(&ps, argp, sizeof ps)) 1802 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
1731 goto out; 1803 goto out;
1732 r = -ENXIO; 1804 r = -ENXIO;
1733 if (!kvm->arch.vpit) 1805 if (!kvm->arch.vpit)
1734 goto out; 1806 goto out;
1735 r = kvm_vm_ioctl_get_pit(kvm, &ps); 1807 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
1736 if (r) 1808 if (r)
1737 goto out; 1809 goto out;
1738 r = -EFAULT; 1810 r = -EFAULT;
1739 if (copy_to_user(argp, &ps, sizeof ps)) 1811 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
1740 goto out; 1812 goto out;
1741 r = 0; 1813 r = 0;
1742 break; 1814 break;
1743 } 1815 }
1744 case KVM_SET_PIT: { 1816 case KVM_SET_PIT: {
1745 struct kvm_pit_state ps;
1746 r = -EFAULT; 1817 r = -EFAULT;
1747 if (copy_from_user(&ps, argp, sizeof ps)) 1818 if (copy_from_user(&u.ps, argp, sizeof u.ps))
1748 goto out; 1819 goto out;
1749 r = -ENXIO; 1820 r = -ENXIO;
1750 if (!kvm->arch.vpit) 1821 if (!kvm->arch.vpit)
1751 goto out; 1822 goto out;
1752 r = kvm_vm_ioctl_set_pit(kvm, &ps); 1823 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
1753 if (r) 1824 if (r)
1754 goto out; 1825 goto out;
1755 r = 0; 1826 r = 0;
@@ -1781,13 +1852,14 @@ static void kvm_init_msr_list(void)
1781 * Only apic need an MMIO device hook, so shortcut now.. 1852 * Only apic need an MMIO device hook, so shortcut now..
1782 */ 1853 */
1783static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, 1854static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
1784 gpa_t addr) 1855 gpa_t addr, int len,
1856 int is_write)
1785{ 1857{
1786 struct kvm_io_device *dev; 1858 struct kvm_io_device *dev;
1787 1859
1788 if (vcpu->arch.apic) { 1860 if (vcpu->arch.apic) {
1789 dev = &vcpu->arch.apic->dev; 1861 dev = &vcpu->arch.apic->dev;
1790 if (dev->in_range(dev, addr)) 1862 if (dev->in_range(dev, addr, len, is_write))
1791 return dev; 1863 return dev;
1792 } 1864 }
1793 return NULL; 1865 return NULL;
@@ -1795,13 +1867,15 @@ static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
1795 1867
1796 1868
1797static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, 1869static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
1798 gpa_t addr) 1870 gpa_t addr, int len,
1871 int is_write)
1799{ 1872{
1800 struct kvm_io_device *dev; 1873 struct kvm_io_device *dev;
1801 1874
1802 dev = vcpu_find_pervcpu_dev(vcpu, addr); 1875 dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
1803 if (dev == NULL) 1876 if (dev == NULL)
1804 dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); 1877 dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
1878 is_write);
1805 return dev; 1879 return dev;
1806} 1880}
1807 1881
@@ -1869,7 +1943,7 @@ mmio:
1869 * Is this MMIO handled locally? 1943 * Is this MMIO handled locally?
1870 */ 1944 */
1871 mutex_lock(&vcpu->kvm->lock); 1945 mutex_lock(&vcpu->kvm->lock);
1872 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); 1946 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
1873 if (mmio_dev) { 1947 if (mmio_dev) {
1874 kvm_iodevice_read(mmio_dev, gpa, bytes, val); 1948 kvm_iodevice_read(mmio_dev, gpa, bytes, val);
1875 mutex_unlock(&vcpu->kvm->lock); 1949 mutex_unlock(&vcpu->kvm->lock);
@@ -1924,7 +1998,7 @@ mmio:
1924 * Is this MMIO handled locally? 1998 * Is this MMIO handled locally?
1925 */ 1999 */
1926 mutex_lock(&vcpu->kvm->lock); 2000 mutex_lock(&vcpu->kvm->lock);
1927 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); 2001 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
1928 if (mmio_dev) { 2002 if (mmio_dev) {
1929 kvm_iodevice_write(mmio_dev, gpa, bytes, val); 2003 kvm_iodevice_write(mmio_dev, gpa, bytes, val);
1930 mutex_unlock(&vcpu->kvm->lock); 2004 mutex_unlock(&vcpu->kvm->lock);
@@ -1993,9 +2067,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
1993 2067
1994 val = *(u64 *)new; 2068 val = *(u64 *)new;
1995 2069
1996 down_read(&current->mm->mmap_sem);
1997 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 2070 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
1998 up_read(&current->mm->mmap_sem);
1999 2071
2000 kaddr = kmap_atomic(page, KM_USER0); 2072 kaddr = kmap_atomic(page, KM_USER0);
2001 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); 2073 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
@@ -2015,11 +2087,13 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
2015 2087
2016int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) 2088int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
2017{ 2089{
2090 kvm_mmu_invlpg(vcpu, address);
2018 return X86EMUL_CONTINUE; 2091 return X86EMUL_CONTINUE;
2019} 2092}
2020 2093
2021int emulate_clts(struct kvm_vcpu *vcpu) 2094int emulate_clts(struct kvm_vcpu *vcpu)
2022{ 2095{
2096 KVMTRACE_0D(CLTS, vcpu, handler);
2023 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); 2097 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
2024 return X86EMUL_CONTINUE; 2098 return X86EMUL_CONTINUE;
2025} 2099}
@@ -2053,21 +2127,19 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
2053 2127
2054void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) 2128void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
2055{ 2129{
2056 static int reported;
2057 u8 opcodes[4]; 2130 u8 opcodes[4];
2058 unsigned long rip = vcpu->arch.rip; 2131 unsigned long rip = kvm_rip_read(vcpu);
2059 unsigned long rip_linear; 2132 unsigned long rip_linear;
2060 2133
2061 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); 2134 if (!printk_ratelimit())
2062
2063 if (reported)
2064 return; 2135 return;
2065 2136
2137 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
2138
2066 emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); 2139 emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
2067 2140
2068 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", 2141 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
2069 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); 2142 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
2070 reported = 1;
2071} 2143}
2072EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 2144EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
2073 2145
@@ -2078,6 +2150,14 @@ static struct x86_emulate_ops emulate_ops = {
2078 .cmpxchg_emulated = emulator_cmpxchg_emulated, 2150 .cmpxchg_emulated = emulator_cmpxchg_emulated,
2079}; 2151};
2080 2152
2153static void cache_all_regs(struct kvm_vcpu *vcpu)
2154{
2155 kvm_register_read(vcpu, VCPU_REGS_RAX);
2156 kvm_register_read(vcpu, VCPU_REGS_RSP);
2157 kvm_register_read(vcpu, VCPU_REGS_RIP);
2158 vcpu->arch.regs_dirty = ~0;
2159}
2160
2081int emulate_instruction(struct kvm_vcpu *vcpu, 2161int emulate_instruction(struct kvm_vcpu *vcpu,
2082 struct kvm_run *run, 2162 struct kvm_run *run,
2083 unsigned long cr2, 2163 unsigned long cr2,
@@ -2087,8 +2167,15 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2087 int r; 2167 int r;
2088 struct decode_cache *c; 2168 struct decode_cache *c;
2089 2169
2170 kvm_clear_exception_queue(vcpu);
2090 vcpu->arch.mmio_fault_cr2 = cr2; 2171 vcpu->arch.mmio_fault_cr2 = cr2;
2091 kvm_x86_ops->cache_regs(vcpu); 2172 /*
2173 * TODO: fix x86_emulate.c to use guest_read/write_register
2174 * instead of direct ->regs accesses, can save hundred cycles
2175 * on Intel for instructions that don't read/change RSP, for
2176 * for example.
2177 */
2178 cache_all_regs(vcpu);
2092 2179
2093 vcpu->mmio_is_write = 0; 2180 vcpu->mmio_is_write = 0;
2094 vcpu->arch.pio.string = 0; 2181 vcpu->arch.pio.string = 0;
@@ -2105,27 +2192,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2105 ? X86EMUL_MODE_PROT64 : cs_db 2192 ? X86EMUL_MODE_PROT64 : cs_db
2106 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 2193 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
2107 2194
2108 if (vcpu->arch.emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
2109 vcpu->arch.emulate_ctxt.cs_base = 0;
2110 vcpu->arch.emulate_ctxt.ds_base = 0;
2111 vcpu->arch.emulate_ctxt.es_base = 0;
2112 vcpu->arch.emulate_ctxt.ss_base = 0;
2113 } else {
2114 vcpu->arch.emulate_ctxt.cs_base =
2115 get_segment_base(vcpu, VCPU_SREG_CS);
2116 vcpu->arch.emulate_ctxt.ds_base =
2117 get_segment_base(vcpu, VCPU_SREG_DS);
2118 vcpu->arch.emulate_ctxt.es_base =
2119 get_segment_base(vcpu, VCPU_SREG_ES);
2120 vcpu->arch.emulate_ctxt.ss_base =
2121 get_segment_base(vcpu, VCPU_SREG_SS);
2122 }
2123
2124 vcpu->arch.emulate_ctxt.gs_base =
2125 get_segment_base(vcpu, VCPU_SREG_GS);
2126 vcpu->arch.emulate_ctxt.fs_base =
2127 get_segment_base(vcpu, VCPU_SREG_FS);
2128
2129 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 2195 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
2130 2196
2131 /* Reject the instructions other than VMCALL/VMMCALL when 2197 /* Reject the instructions other than VMCALL/VMMCALL when
@@ -2169,7 +2235,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2169 return EMULATE_DO_MMIO; 2235 return EMULATE_DO_MMIO;
2170 } 2236 }
2171 2237
2172 kvm_x86_ops->decache_regs(vcpu);
2173 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 2238 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
2174 2239
2175 if (vcpu->mmio_is_write) { 2240 if (vcpu->mmio_is_write) {
@@ -2222,20 +2287,19 @@ int complete_pio(struct kvm_vcpu *vcpu)
2222 struct kvm_pio_request *io = &vcpu->arch.pio; 2287 struct kvm_pio_request *io = &vcpu->arch.pio;
2223 long delta; 2288 long delta;
2224 int r; 2289 int r;
2225 2290 unsigned long val;
2226 kvm_x86_ops->cache_regs(vcpu);
2227 2291
2228 if (!io->string) { 2292 if (!io->string) {
2229 if (io->in) 2293 if (io->in) {
2230 memcpy(&vcpu->arch.regs[VCPU_REGS_RAX], vcpu->arch.pio_data, 2294 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
2231 io->size); 2295 memcpy(&val, vcpu->arch.pio_data, io->size);
2296 kvm_register_write(vcpu, VCPU_REGS_RAX, val);
2297 }
2232 } else { 2298 } else {
2233 if (io->in) { 2299 if (io->in) {
2234 r = pio_copy_data(vcpu); 2300 r = pio_copy_data(vcpu);
2235 if (r) { 2301 if (r)
2236 kvm_x86_ops->cache_regs(vcpu);
2237 return r; 2302 return r;
2238 }
2239 } 2303 }
2240 2304
2241 delta = 1; 2305 delta = 1;
@@ -2245,19 +2309,24 @@ int complete_pio(struct kvm_vcpu *vcpu)
2245 * The size of the register should really depend on 2309 * The size of the register should really depend on
2246 * current address size. 2310 * current address size.
2247 */ 2311 */
2248 vcpu->arch.regs[VCPU_REGS_RCX] -= delta; 2312 val = kvm_register_read(vcpu, VCPU_REGS_RCX);
2313 val -= delta;
2314 kvm_register_write(vcpu, VCPU_REGS_RCX, val);
2249 } 2315 }
2250 if (io->down) 2316 if (io->down)
2251 delta = -delta; 2317 delta = -delta;
2252 delta *= io->size; 2318 delta *= io->size;
2253 if (io->in) 2319 if (io->in) {
2254 vcpu->arch.regs[VCPU_REGS_RDI] += delta; 2320 val = kvm_register_read(vcpu, VCPU_REGS_RDI);
2255 else 2321 val += delta;
2256 vcpu->arch.regs[VCPU_REGS_RSI] += delta; 2322 kvm_register_write(vcpu, VCPU_REGS_RDI, val);
2323 } else {
2324 val = kvm_register_read(vcpu, VCPU_REGS_RSI);
2325 val += delta;
2326 kvm_register_write(vcpu, VCPU_REGS_RSI, val);
2327 }
2257 } 2328 }
2258 2329
2259 kvm_x86_ops->decache_regs(vcpu);
2260
2261 io->count -= io->cur_count; 2330 io->count -= io->cur_count;
2262 io->cur_count = 0; 2331 io->cur_count = 0;
2263 2332
@@ -2300,15 +2369,17 @@ static void pio_string_write(struct kvm_io_device *pio_dev,
2300} 2369}
2301 2370
2302static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, 2371static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
2303 gpa_t addr) 2372 gpa_t addr, int len,
2373 int is_write)
2304{ 2374{
2305 return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr); 2375 return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
2306} 2376}
2307 2377
2308int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, 2378int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2309 int size, unsigned port) 2379 int size, unsigned port)
2310{ 2380{
2311 struct kvm_io_device *pio_dev; 2381 struct kvm_io_device *pio_dev;
2382 unsigned long val;
2312 2383
2313 vcpu->run->exit_reason = KVM_EXIT_IO; 2384 vcpu->run->exit_reason = KVM_EXIT_IO;
2314 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 2385 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
@@ -2329,13 +2400,12 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2329 KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, 2400 KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
2330 handler); 2401 handler);
2331 2402
2332 kvm_x86_ops->cache_regs(vcpu); 2403 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
2333 memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); 2404 memcpy(vcpu->arch.pio_data, &val, 4);
2334 kvm_x86_ops->decache_regs(vcpu);
2335 2405
2336 kvm_x86_ops->skip_emulated_instruction(vcpu); 2406 kvm_x86_ops->skip_emulated_instruction(vcpu);
2337 2407
2338 pio_dev = vcpu_find_pio_dev(vcpu, port); 2408 pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
2339 if (pio_dev) { 2409 if (pio_dev) {
2340 kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); 2410 kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
2341 complete_pio(vcpu); 2411 complete_pio(vcpu);
@@ -2417,7 +2487,9 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2417 } 2487 }
2418 } 2488 }
2419 2489
2420 pio_dev = vcpu_find_pio_dev(vcpu, port); 2490 pio_dev = vcpu_find_pio_dev(vcpu, port,
2491 vcpu->arch.pio.cur_count,
2492 !vcpu->arch.pio.in);
2421 if (!vcpu->arch.pio.in) { 2493 if (!vcpu->arch.pio.in) {
2422 /* string PIO write */ 2494 /* string PIO write */
2423 ret = pio_copy_data(vcpu); 2495 ret = pio_copy_data(vcpu);
@@ -2487,11 +2559,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
2487 KVMTRACE_0D(HLT, vcpu, handler); 2559 KVMTRACE_0D(HLT, vcpu, handler);
2488 if (irqchip_in_kernel(vcpu->kvm)) { 2560 if (irqchip_in_kernel(vcpu->kvm)) {
2489 vcpu->arch.mp_state = KVM_MP_STATE_HALTED; 2561 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
2490 up_read(&vcpu->kvm->slots_lock);
2491 kvm_vcpu_block(vcpu);
2492 down_read(&vcpu->kvm->slots_lock);
2493 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
2494 return -EINTR;
2495 return 1; 2562 return 1;
2496 } else { 2563 } else {
2497 vcpu->run->exit_reason = KVM_EXIT_HLT; 2564 vcpu->run->exit_reason = KVM_EXIT_HLT;
@@ -2514,13 +2581,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2514 unsigned long nr, a0, a1, a2, a3, ret; 2581 unsigned long nr, a0, a1, a2, a3, ret;
2515 int r = 1; 2582 int r = 1;
2516 2583
2517 kvm_x86_ops->cache_regs(vcpu); 2584 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
2518 2585 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
2519 nr = vcpu->arch.regs[VCPU_REGS_RAX]; 2586 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
2520 a0 = vcpu->arch.regs[VCPU_REGS_RBX]; 2587 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
2521 a1 = vcpu->arch.regs[VCPU_REGS_RCX]; 2588 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
2522 a2 = vcpu->arch.regs[VCPU_REGS_RDX];
2523 a3 = vcpu->arch.regs[VCPU_REGS_RSI];
2524 2589
2525 KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler); 2590 KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
2526 2591
@@ -2543,8 +2608,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2543 ret = -KVM_ENOSYS; 2608 ret = -KVM_ENOSYS;
2544 break; 2609 break;
2545 } 2610 }
2546 vcpu->arch.regs[VCPU_REGS_RAX] = ret; 2611 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
2547 kvm_x86_ops->decache_regs(vcpu);
2548 ++vcpu->stat.hypercalls; 2612 ++vcpu->stat.hypercalls;
2549 return r; 2613 return r;
2550} 2614}
@@ -2554,6 +2618,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
2554{ 2618{
2555 char instruction[3]; 2619 char instruction[3];
2556 int ret = 0; 2620 int ret = 0;
2621 unsigned long rip = kvm_rip_read(vcpu);
2557 2622
2558 2623
2559 /* 2624 /*
@@ -2563,9 +2628,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
2563 */ 2628 */
2564 kvm_mmu_zap_all(vcpu->kvm); 2629 kvm_mmu_zap_all(vcpu->kvm);
2565 2630
2566 kvm_x86_ops->cache_regs(vcpu);
2567 kvm_x86_ops->patch_hypercall(vcpu, instruction); 2631 kvm_x86_ops->patch_hypercall(vcpu, instruction);
2568 if (emulator_write_emulated(vcpu->arch.rip, instruction, 3, vcpu) 2632 if (emulator_write_emulated(rip, instruction, 3, vcpu)
2569 != X86EMUL_CONTINUE) 2633 != X86EMUL_CONTINUE)
2570 ret = -EFAULT; 2634 ret = -EFAULT;
2571 2635
@@ -2600,27 +2664,41 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
2600 2664
2601unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) 2665unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
2602{ 2666{
2667 unsigned long value;
2668
2603 kvm_x86_ops->decache_cr4_guest_bits(vcpu); 2669 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
2604 switch (cr) { 2670 switch (cr) {
2605 case 0: 2671 case 0:
2606 return vcpu->arch.cr0; 2672 value = vcpu->arch.cr0;
2673 break;
2607 case 2: 2674 case 2:
2608 return vcpu->arch.cr2; 2675 value = vcpu->arch.cr2;
2676 break;
2609 case 3: 2677 case 3:
2610 return vcpu->arch.cr3; 2678 value = vcpu->arch.cr3;
2679 break;
2611 case 4: 2680 case 4:
2612 return vcpu->arch.cr4; 2681 value = vcpu->arch.cr4;
2682 break;
2613 case 8: 2683 case 8:
2614 return kvm_get_cr8(vcpu); 2684 value = kvm_get_cr8(vcpu);
2685 break;
2615 default: 2686 default:
2616 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); 2687 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
2617 return 0; 2688 return 0;
2618 } 2689 }
2690 KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
2691 (u32)((u64)value >> 32), handler);
2692
2693 return value;
2619} 2694}
2620 2695
2621void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, 2696void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
2622 unsigned long *rflags) 2697 unsigned long *rflags)
2623{ 2698{
2699 KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
2700 (u32)((u64)val >> 32), handler);
2701
2624 switch (cr) { 2702 switch (cr) {
2625 case 0: 2703 case 0:
2626 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); 2704 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
@@ -2681,13 +2759,12 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
2681 u32 function, index; 2759 u32 function, index;
2682 struct kvm_cpuid_entry2 *e, *best; 2760 struct kvm_cpuid_entry2 *e, *best;
2683 2761
2684 kvm_x86_ops->cache_regs(vcpu); 2762 function = kvm_register_read(vcpu, VCPU_REGS_RAX);
2685 function = vcpu->arch.regs[VCPU_REGS_RAX]; 2763 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
2686 index = vcpu->arch.regs[VCPU_REGS_RCX]; 2764 kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
2687 vcpu->arch.regs[VCPU_REGS_RAX] = 0; 2765 kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
2688 vcpu->arch.regs[VCPU_REGS_RBX] = 0; 2766 kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
2689 vcpu->arch.regs[VCPU_REGS_RCX] = 0; 2767 kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
2690 vcpu->arch.regs[VCPU_REGS_RDX] = 0;
2691 best = NULL; 2768 best = NULL;
2692 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { 2769 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
2693 e = &vcpu->arch.cpuid_entries[i]; 2770 e = &vcpu->arch.cpuid_entries[i];
@@ -2705,18 +2782,17 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
2705 best = e; 2782 best = e;
2706 } 2783 }
2707 if (best) { 2784 if (best) {
2708 vcpu->arch.regs[VCPU_REGS_RAX] = best->eax; 2785 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
2709 vcpu->arch.regs[VCPU_REGS_RBX] = best->ebx; 2786 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
2710 vcpu->arch.regs[VCPU_REGS_RCX] = best->ecx; 2787 kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
2711 vcpu->arch.regs[VCPU_REGS_RDX] = best->edx; 2788 kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
2712 } 2789 }
2713 kvm_x86_ops->decache_regs(vcpu);
2714 kvm_x86_ops->skip_emulated_instruction(vcpu); 2790 kvm_x86_ops->skip_emulated_instruction(vcpu);
2715 KVMTRACE_5D(CPUID, vcpu, function, 2791 KVMTRACE_5D(CPUID, vcpu, function,
2716 (u32)vcpu->arch.regs[VCPU_REGS_RAX], 2792 (u32)kvm_register_read(vcpu, VCPU_REGS_RAX),
2717 (u32)vcpu->arch.regs[VCPU_REGS_RBX], 2793 (u32)kvm_register_read(vcpu, VCPU_REGS_RBX),
2718 (u32)vcpu->arch.regs[VCPU_REGS_RCX], 2794 (u32)kvm_register_read(vcpu, VCPU_REGS_RCX),
2719 (u32)vcpu->arch.regs[VCPU_REGS_RDX], handler); 2795 (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler);
2720} 2796}
2721EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); 2797EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
2722 2798
@@ -2757,9 +2833,7 @@ static void vapic_enter(struct kvm_vcpu *vcpu)
2757 if (!apic || !apic->vapic_addr) 2833 if (!apic || !apic->vapic_addr)
2758 return; 2834 return;
2759 2835
2760 down_read(&current->mm->mmap_sem);
2761 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); 2836 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
2762 up_read(&current->mm->mmap_sem);
2763 2837
2764 vcpu->arch.apic->vapic_page = page; 2838 vcpu->arch.apic->vapic_page = page;
2765} 2839}
@@ -2771,32 +2845,16 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
2771 if (!apic || !apic->vapic_addr) 2845 if (!apic || !apic->vapic_addr)
2772 return; 2846 return;
2773 2847
2848 down_read(&vcpu->kvm->slots_lock);
2774 kvm_release_page_dirty(apic->vapic_page); 2849 kvm_release_page_dirty(apic->vapic_page);
2775 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); 2850 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
2851 up_read(&vcpu->kvm->slots_lock);
2776} 2852}
2777 2853
2778static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2854static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2779{ 2855{
2780 int r; 2856 int r;
2781 2857
2782 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
2783 pr_debug("vcpu %d received sipi with vector # %x\n",
2784 vcpu->vcpu_id, vcpu->arch.sipi_vector);
2785 kvm_lapic_reset(vcpu);
2786 r = kvm_x86_ops->vcpu_reset(vcpu);
2787 if (r)
2788 return r;
2789 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2790 }
2791
2792 down_read(&vcpu->kvm->slots_lock);
2793 vapic_enter(vcpu);
2794
2795preempted:
2796 if (vcpu->guest_debug.enabled)
2797 kvm_x86_ops->guest_debug_pre(vcpu);
2798
2799again:
2800 if (vcpu->requests) 2858 if (vcpu->requests)
2801 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 2859 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
2802 kvm_mmu_unload(vcpu); 2860 kvm_mmu_unload(vcpu);
@@ -2808,6 +2866,8 @@ again:
2808 if (vcpu->requests) { 2866 if (vcpu->requests) {
2809 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) 2867 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
2810 __kvm_migrate_timers(vcpu); 2868 __kvm_migrate_timers(vcpu);
2869 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
2870 kvm_mmu_sync_roots(vcpu);
2811 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 2871 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
2812 kvm_x86_ops->tlb_flush(vcpu); 2872 kvm_x86_ops->tlb_flush(vcpu);
2813 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, 2873 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
@@ -2833,21 +2893,15 @@ again:
2833 2893
2834 local_irq_disable(); 2894 local_irq_disable();
2835 2895
2836 if (vcpu->requests || need_resched()) { 2896 if (vcpu->requests || need_resched() || signal_pending(current)) {
2837 local_irq_enable(); 2897 local_irq_enable();
2838 preempt_enable(); 2898 preempt_enable();
2839 r = 1; 2899 r = 1;
2840 goto out; 2900 goto out;
2841 } 2901 }
2842 2902
2843 if (signal_pending(current)) { 2903 if (vcpu->guest_debug.enabled)
2844 local_irq_enable(); 2904 kvm_x86_ops->guest_debug_pre(vcpu);
2845 preempt_enable();
2846 r = -EINTR;
2847 kvm_run->exit_reason = KVM_EXIT_INTR;
2848 ++vcpu->stat.signal_exits;
2849 goto out;
2850 }
2851 2905
2852 vcpu->guest_mode = 1; 2906 vcpu->guest_mode = 1;
2853 /* 2907 /*
@@ -2896,8 +2950,8 @@ again:
2896 * Profile KVM exit RIPs: 2950 * Profile KVM exit RIPs:
2897 */ 2951 */
2898 if (unlikely(prof_on == KVM_PROFILING)) { 2952 if (unlikely(prof_on == KVM_PROFILING)) {
2899 kvm_x86_ops->cache_regs(vcpu); 2953 unsigned long rip = kvm_rip_read(vcpu);
2900 profile_hit(KVM_PROFILING, (void *)vcpu->arch.rip); 2954 profile_hit(KVM_PROFILING, (void *)rip);
2901 } 2955 }
2902 2956
2903 if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu)) 2957 if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu))
@@ -2906,31 +2960,66 @@ again:
2906 kvm_lapic_sync_from_vapic(vcpu); 2960 kvm_lapic_sync_from_vapic(vcpu);
2907 2961
2908 r = kvm_x86_ops->handle_exit(kvm_run, vcpu); 2962 r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
2963out:
2964 return r;
2965}
2909 2966
2910 if (r > 0) { 2967static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2911 if (dm_request_for_irq_injection(vcpu, kvm_run)) { 2968{
2912 r = -EINTR; 2969 int r;
2913 kvm_run->exit_reason = KVM_EXIT_INTR; 2970
2914 ++vcpu->stat.request_irq_exits; 2971 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
2915 goto out; 2972 pr_debug("vcpu %d received sipi with vector # %x\n",
2916 } 2973 vcpu->vcpu_id, vcpu->arch.sipi_vector);
2917 if (!need_resched()) 2974 kvm_lapic_reset(vcpu);
2918 goto again; 2975 r = kvm_x86_ops->vcpu_reset(vcpu);
2976 if (r)
2977 return r;
2978 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2919 } 2979 }
2920 2980
2921out: 2981 down_read(&vcpu->kvm->slots_lock);
2922 up_read(&vcpu->kvm->slots_lock); 2982 vapic_enter(vcpu);
2923 if (r > 0) { 2983
2924 kvm_resched(vcpu); 2984 r = 1;
2925 down_read(&vcpu->kvm->slots_lock); 2985 while (r > 0) {
2926 goto preempted; 2986 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
2987 r = vcpu_enter_guest(vcpu, kvm_run);
2988 else {
2989 up_read(&vcpu->kvm->slots_lock);
2990 kvm_vcpu_block(vcpu);
2991 down_read(&vcpu->kvm->slots_lock);
2992 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
2993 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
2994 vcpu->arch.mp_state =
2995 KVM_MP_STATE_RUNNABLE;
2996 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
2997 r = -EINTR;
2998 }
2999
3000 if (r > 0) {
3001 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
3002 r = -EINTR;
3003 kvm_run->exit_reason = KVM_EXIT_INTR;
3004 ++vcpu->stat.request_irq_exits;
3005 }
3006 if (signal_pending(current)) {
3007 r = -EINTR;
3008 kvm_run->exit_reason = KVM_EXIT_INTR;
3009 ++vcpu->stat.signal_exits;
3010 }
3011 if (need_resched()) {
3012 up_read(&vcpu->kvm->slots_lock);
3013 kvm_resched(vcpu);
3014 down_read(&vcpu->kvm->slots_lock);
3015 }
3016 }
2927 } 3017 }
2928 3018
3019 up_read(&vcpu->kvm->slots_lock);
2929 post_kvm_run_save(vcpu, kvm_run); 3020 post_kvm_run_save(vcpu, kvm_run);
2930 3021
2931 down_read(&vcpu->kvm->slots_lock);
2932 vapic_exit(vcpu); 3022 vapic_exit(vcpu);
2933 up_read(&vcpu->kvm->slots_lock);
2934 3023
2935 return r; 3024 return r;
2936} 3025}
@@ -2942,15 +3031,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2942 3031
2943 vcpu_load(vcpu); 3032 vcpu_load(vcpu);
2944 3033
3034 if (vcpu->sigset_active)
3035 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3036
2945 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { 3037 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
2946 kvm_vcpu_block(vcpu); 3038 kvm_vcpu_block(vcpu);
2947 vcpu_put(vcpu); 3039 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2948 return -EAGAIN; 3040 r = -EAGAIN;
3041 goto out;
2949 } 3042 }
2950 3043
2951 if (vcpu->sigset_active)
2952 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2953
2954 /* re-sync apic's tpr */ 3044 /* re-sync apic's tpr */
2955 if (!irqchip_in_kernel(vcpu->kvm)) 3045 if (!irqchip_in_kernel(vcpu->kvm))
2956 kvm_set_cr8(vcpu, kvm_run->cr8); 3046 kvm_set_cr8(vcpu, kvm_run->cr8);
@@ -2980,11 +3070,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2980 } 3070 }
2981 } 3071 }
2982#endif 3072#endif
2983 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { 3073 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
2984 kvm_x86_ops->cache_regs(vcpu); 3074 kvm_register_write(vcpu, VCPU_REGS_RAX,
2985 vcpu->arch.regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; 3075 kvm_run->hypercall.ret);
2986 kvm_x86_ops->decache_regs(vcpu);
2987 }
2988 3076
2989 r = __vcpu_run(vcpu, kvm_run); 3077 r = __vcpu_run(vcpu, kvm_run);
2990 3078
@@ -3000,28 +3088,26 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3000{ 3088{
3001 vcpu_load(vcpu); 3089 vcpu_load(vcpu);
3002 3090
3003 kvm_x86_ops->cache_regs(vcpu); 3091 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
3004 3092 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
3005 regs->rax = vcpu->arch.regs[VCPU_REGS_RAX]; 3093 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
3006 regs->rbx = vcpu->arch.regs[VCPU_REGS_RBX]; 3094 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
3007 regs->rcx = vcpu->arch.regs[VCPU_REGS_RCX]; 3095 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
3008 regs->rdx = vcpu->arch.regs[VCPU_REGS_RDX]; 3096 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
3009 regs->rsi = vcpu->arch.regs[VCPU_REGS_RSI]; 3097 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
3010 regs->rdi = vcpu->arch.regs[VCPU_REGS_RDI]; 3098 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
3011 regs->rsp = vcpu->arch.regs[VCPU_REGS_RSP];
3012 regs->rbp = vcpu->arch.regs[VCPU_REGS_RBP];
3013#ifdef CONFIG_X86_64 3099#ifdef CONFIG_X86_64
3014 regs->r8 = vcpu->arch.regs[VCPU_REGS_R8]; 3100 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
3015 regs->r9 = vcpu->arch.regs[VCPU_REGS_R9]; 3101 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
3016 regs->r10 = vcpu->arch.regs[VCPU_REGS_R10]; 3102 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
3017 regs->r11 = vcpu->arch.regs[VCPU_REGS_R11]; 3103 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
3018 regs->r12 = vcpu->arch.regs[VCPU_REGS_R12]; 3104 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
3019 regs->r13 = vcpu->arch.regs[VCPU_REGS_R13]; 3105 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
3020 regs->r14 = vcpu->arch.regs[VCPU_REGS_R14]; 3106 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
3021 regs->r15 = vcpu->arch.regs[VCPU_REGS_R15]; 3107 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
3022#endif 3108#endif
3023 3109
3024 regs->rip = vcpu->arch.rip; 3110 regs->rip = kvm_rip_read(vcpu);
3025 regs->rflags = kvm_x86_ops->get_rflags(vcpu); 3111 regs->rflags = kvm_x86_ops->get_rflags(vcpu);
3026 3112
3027 /* 3113 /*
@@ -3039,29 +3125,29 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3039{ 3125{
3040 vcpu_load(vcpu); 3126 vcpu_load(vcpu);
3041 3127
3042 vcpu->arch.regs[VCPU_REGS_RAX] = regs->rax; 3128 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
3043 vcpu->arch.regs[VCPU_REGS_RBX] = regs->rbx; 3129 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
3044 vcpu->arch.regs[VCPU_REGS_RCX] = regs->rcx; 3130 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
3045 vcpu->arch.regs[VCPU_REGS_RDX] = regs->rdx; 3131 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
3046 vcpu->arch.regs[VCPU_REGS_RSI] = regs->rsi; 3132 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
3047 vcpu->arch.regs[VCPU_REGS_RDI] = regs->rdi; 3133 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
3048 vcpu->arch.regs[VCPU_REGS_RSP] = regs->rsp; 3134 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
3049 vcpu->arch.regs[VCPU_REGS_RBP] = regs->rbp; 3135 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
3050#ifdef CONFIG_X86_64 3136#ifdef CONFIG_X86_64
3051 vcpu->arch.regs[VCPU_REGS_R8] = regs->r8; 3137 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
3052 vcpu->arch.regs[VCPU_REGS_R9] = regs->r9; 3138 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
3053 vcpu->arch.regs[VCPU_REGS_R10] = regs->r10; 3139 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
3054 vcpu->arch.regs[VCPU_REGS_R11] = regs->r11; 3140 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
3055 vcpu->arch.regs[VCPU_REGS_R12] = regs->r12; 3141 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
3056 vcpu->arch.regs[VCPU_REGS_R13] = regs->r13; 3142 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
3057 vcpu->arch.regs[VCPU_REGS_R14] = regs->r14; 3143 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
3058 vcpu->arch.regs[VCPU_REGS_R15] = regs->r15; 3144 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
3145
3059#endif 3146#endif
3060 3147
3061 vcpu->arch.rip = regs->rip; 3148 kvm_rip_write(vcpu, regs->rip);
3062 kvm_x86_ops->set_rflags(vcpu, regs->rflags); 3149 kvm_x86_ops->set_rflags(vcpu, regs->rflags);
3063 3150
3064 kvm_x86_ops->decache_regs(vcpu);
3065 3151
3066 vcpu->arch.exception.pending = false; 3152 vcpu->arch.exception.pending = false;
3067 3153
@@ -3070,8 +3156,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3070 return 0; 3156 return 0;
3071} 3157}
3072 3158
3073static void get_segment(struct kvm_vcpu *vcpu, 3159void kvm_get_segment(struct kvm_vcpu *vcpu,
3074 struct kvm_segment *var, int seg) 3160 struct kvm_segment *var, int seg)
3075{ 3161{
3076 kvm_x86_ops->get_segment(vcpu, var, seg); 3162 kvm_x86_ops->get_segment(vcpu, var, seg);
3077} 3163}
@@ -3080,7 +3166,7 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
3080{ 3166{
3081 struct kvm_segment cs; 3167 struct kvm_segment cs;
3082 3168
3083 get_segment(vcpu, &cs, VCPU_SREG_CS); 3169 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
3084 *db = cs.db; 3170 *db = cs.db;
3085 *l = cs.l; 3171 *l = cs.l;
3086} 3172}
@@ -3094,15 +3180,15 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3094 3180
3095 vcpu_load(vcpu); 3181 vcpu_load(vcpu);
3096 3182
3097 get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 3183 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
3098 get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); 3184 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
3099 get_segment(vcpu, &sregs->es, VCPU_SREG_ES); 3185 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
3100 get_segment(vcpu, &sregs->fs, VCPU_SREG_FS); 3186 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
3101 get_segment(vcpu, &sregs->gs, VCPU_SREG_GS); 3187 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
3102 get_segment(vcpu, &sregs->ss, VCPU_SREG_SS); 3188 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
3103 3189
3104 get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); 3190 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
3105 get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 3191 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
3106 3192
3107 kvm_x86_ops->get_idt(vcpu, &dt); 3193 kvm_x86_ops->get_idt(vcpu, &dt);
3108 sregs->idt.limit = dt.limit; 3194 sregs->idt.limit = dt.limit;
@@ -3154,7 +3240,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3154 return 0; 3240 return 0;
3155} 3241}
3156 3242
3157static void set_segment(struct kvm_vcpu *vcpu, 3243static void kvm_set_segment(struct kvm_vcpu *vcpu,
3158 struct kvm_segment *var, int seg) 3244 struct kvm_segment *var, int seg)
3159{ 3245{
3160 kvm_x86_ops->set_segment(vcpu, var, seg); 3246 kvm_x86_ops->set_segment(vcpu, var, seg);
@@ -3168,6 +3254,10 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
3168 kvm_desct->base |= seg_desc->base2 << 24; 3254 kvm_desct->base |= seg_desc->base2 << 24;
3169 kvm_desct->limit = seg_desc->limit0; 3255 kvm_desct->limit = seg_desc->limit0;
3170 kvm_desct->limit |= seg_desc->limit << 16; 3256 kvm_desct->limit |= seg_desc->limit << 16;
3257 if (seg_desc->g) {
3258 kvm_desct->limit <<= 12;
3259 kvm_desct->limit |= 0xfff;
3260 }
3171 kvm_desct->selector = selector; 3261 kvm_desct->selector = selector;
3172 kvm_desct->type = seg_desc->type; 3262 kvm_desct->type = seg_desc->type;
3173 kvm_desct->present = seg_desc->p; 3263 kvm_desct->present = seg_desc->p;
@@ -3191,7 +3281,7 @@ static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
3191 if (selector & 1 << 2) { 3281 if (selector & 1 << 2) {
3192 struct kvm_segment kvm_seg; 3282 struct kvm_segment kvm_seg;
3193 3283
3194 get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); 3284 kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
3195 3285
3196 if (kvm_seg.unusable) 3286 if (kvm_seg.unusable)
3197 dtable->limit = 0; 3287 dtable->limit = 0;
@@ -3207,6 +3297,7 @@ static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
3207static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 3297static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3208 struct desc_struct *seg_desc) 3298 struct desc_struct *seg_desc)
3209{ 3299{
3300 gpa_t gpa;
3210 struct descriptor_table dtable; 3301 struct descriptor_table dtable;
3211 u16 index = selector >> 3; 3302 u16 index = selector >> 3;
3212 3303
@@ -3216,13 +3307,16 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3216 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); 3307 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
3217 return 1; 3308 return 1;
3218 } 3309 }
3219 return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); 3310 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
3311 gpa += index * 8;
3312 return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8);
3220} 3313}
3221 3314
3222/* allowed just for 8 bytes segments */ 3315/* allowed just for 8 bytes segments */
3223static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 3316static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3224 struct desc_struct *seg_desc) 3317 struct desc_struct *seg_desc)
3225{ 3318{
3319 gpa_t gpa;
3226 struct descriptor_table dtable; 3320 struct descriptor_table dtable;
3227 u16 index = selector >> 3; 3321 u16 index = selector >> 3;
3228 3322
@@ -3230,7 +3324,9 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3230 3324
3231 if (dtable.limit < index * 8 + 7) 3325 if (dtable.limit < index * 8 + 7)
3232 return 1; 3326 return 1;
3233 return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); 3327 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
3328 gpa += index * 8;
3329 return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8);
3234} 3330}
3235 3331
3236static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, 3332static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
@@ -3242,62 +3338,14 @@ static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
3242 base_addr |= (seg_desc->base1 << 16); 3338 base_addr |= (seg_desc->base1 << 16);
3243 base_addr |= (seg_desc->base2 << 24); 3339 base_addr |= (seg_desc->base2 << 24);
3244 3340
3245 return base_addr; 3341 return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr);
3246}
3247
3248static int load_tss_segment32(struct kvm_vcpu *vcpu,
3249 struct desc_struct *seg_desc,
3250 struct tss_segment_32 *tss)
3251{
3252 u32 base_addr;
3253
3254 base_addr = get_tss_base_addr(vcpu, seg_desc);
3255
3256 return kvm_read_guest(vcpu->kvm, base_addr, tss,
3257 sizeof(struct tss_segment_32));
3258}
3259
3260static int save_tss_segment32(struct kvm_vcpu *vcpu,
3261 struct desc_struct *seg_desc,
3262 struct tss_segment_32 *tss)
3263{
3264 u32 base_addr;
3265
3266 base_addr = get_tss_base_addr(vcpu, seg_desc);
3267
3268 return kvm_write_guest(vcpu->kvm, base_addr, tss,
3269 sizeof(struct tss_segment_32));
3270}
3271
3272static int load_tss_segment16(struct kvm_vcpu *vcpu,
3273 struct desc_struct *seg_desc,
3274 struct tss_segment_16 *tss)
3275{
3276 u32 base_addr;
3277
3278 base_addr = get_tss_base_addr(vcpu, seg_desc);
3279
3280 return kvm_read_guest(vcpu->kvm, base_addr, tss,
3281 sizeof(struct tss_segment_16));
3282}
3283
3284static int save_tss_segment16(struct kvm_vcpu *vcpu,
3285 struct desc_struct *seg_desc,
3286 struct tss_segment_16 *tss)
3287{
3288 u32 base_addr;
3289
3290 base_addr = get_tss_base_addr(vcpu, seg_desc);
3291
3292 return kvm_write_guest(vcpu->kvm, base_addr, tss,
3293 sizeof(struct tss_segment_16));
3294} 3342}
3295 3343
3296static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) 3344static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
3297{ 3345{
3298 struct kvm_segment kvm_seg; 3346 struct kvm_segment kvm_seg;
3299 3347
3300 get_segment(vcpu, &kvm_seg, seg); 3348 kvm_get_segment(vcpu, &kvm_seg, seg);
3301 return kvm_seg.selector; 3349 return kvm_seg.selector;
3302} 3350}
3303 3351
@@ -3313,11 +3361,33 @@ static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
3313 return 0; 3361 return 0;
3314} 3362}
3315 3363
3316static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 3364static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
3317 int type_bits, int seg) 3365{
3366 struct kvm_segment segvar = {
3367 .base = selector << 4,
3368 .limit = 0xffff,
3369 .selector = selector,
3370 .type = 3,
3371 .present = 1,
3372 .dpl = 3,
3373 .db = 0,
3374 .s = 1,
3375 .l = 0,
3376 .g = 0,
3377 .avl = 0,
3378 .unusable = 0,
3379 };
3380 kvm_x86_ops->set_segment(vcpu, &segvar, seg);
3381 return 0;
3382}
3383
3384int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3385 int type_bits, int seg)
3318{ 3386{
3319 struct kvm_segment kvm_seg; 3387 struct kvm_segment kvm_seg;
3320 3388
3389 if (!(vcpu->arch.cr0 & X86_CR0_PE))
3390 return kvm_load_realmode_segment(vcpu, selector, seg);
3321 if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg)) 3391 if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
3322 return 1; 3392 return 1;
3323 kvm_seg.type |= type_bits; 3393 kvm_seg.type |= type_bits;
@@ -3327,7 +3397,7 @@ static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3327 if (!kvm_seg.s) 3397 if (!kvm_seg.s)
3328 kvm_seg.unusable = 1; 3398 kvm_seg.unusable = 1;
3329 3399
3330 set_segment(vcpu, &kvm_seg, seg); 3400 kvm_set_segment(vcpu, &kvm_seg, seg);
3331 return 0; 3401 return 0;
3332} 3402}
3333 3403
@@ -3335,17 +3405,16 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
3335 struct tss_segment_32 *tss) 3405 struct tss_segment_32 *tss)
3336{ 3406{
3337 tss->cr3 = vcpu->arch.cr3; 3407 tss->cr3 = vcpu->arch.cr3;
3338 tss->eip = vcpu->arch.rip; 3408 tss->eip = kvm_rip_read(vcpu);
3339 tss->eflags = kvm_x86_ops->get_rflags(vcpu); 3409 tss->eflags = kvm_x86_ops->get_rflags(vcpu);
3340 tss->eax = vcpu->arch.regs[VCPU_REGS_RAX]; 3410 tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
3341 tss->ecx = vcpu->arch.regs[VCPU_REGS_RCX]; 3411 tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
3342 tss->edx = vcpu->arch.regs[VCPU_REGS_RDX]; 3412 tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
3343 tss->ebx = vcpu->arch.regs[VCPU_REGS_RBX]; 3413 tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
3344 tss->esp = vcpu->arch.regs[VCPU_REGS_RSP]; 3414 tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP);
3345 tss->ebp = vcpu->arch.regs[VCPU_REGS_RBP]; 3415 tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP);
3346 tss->esi = vcpu->arch.regs[VCPU_REGS_RSI]; 3416 tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI);
3347 tss->edi = vcpu->arch.regs[VCPU_REGS_RDI]; 3417 tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI);
3348
3349 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); 3418 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
3350 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); 3419 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
3351 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); 3420 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
@@ -3361,37 +3430,37 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
3361{ 3430{
3362 kvm_set_cr3(vcpu, tss->cr3); 3431 kvm_set_cr3(vcpu, tss->cr3);
3363 3432
3364 vcpu->arch.rip = tss->eip; 3433 kvm_rip_write(vcpu, tss->eip);
3365 kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); 3434 kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
3366 3435
3367 vcpu->arch.regs[VCPU_REGS_RAX] = tss->eax; 3436 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
3368 vcpu->arch.regs[VCPU_REGS_RCX] = tss->ecx; 3437 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
3369 vcpu->arch.regs[VCPU_REGS_RDX] = tss->edx; 3438 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx);
3370 vcpu->arch.regs[VCPU_REGS_RBX] = tss->ebx; 3439 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx);
3371 vcpu->arch.regs[VCPU_REGS_RSP] = tss->esp; 3440 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp);
3372 vcpu->arch.regs[VCPU_REGS_RBP] = tss->ebp; 3441 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp);
3373 vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi; 3442 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
3374 vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi; 3443 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
3375 3444
3376 if (load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) 3445 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
3377 return 1; 3446 return 1;
3378 3447
3379 if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) 3448 if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
3380 return 1; 3449 return 1;
3381 3450
3382 if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) 3451 if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
3383 return 1; 3452 return 1;
3384 3453
3385 if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) 3454 if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
3386 return 1; 3455 return 1;
3387 3456
3388 if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) 3457 if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
3389 return 1; 3458 return 1;
3390 3459
3391 if (load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) 3460 if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
3392 return 1; 3461 return 1;
3393 3462
3394 if (load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) 3463 if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
3395 return 1; 3464 return 1;
3396 return 0; 3465 return 0;
3397} 3466}
@@ -3399,16 +3468,16 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
3399static void save_state_to_tss16(struct kvm_vcpu *vcpu, 3468static void save_state_to_tss16(struct kvm_vcpu *vcpu,
3400 struct tss_segment_16 *tss) 3469 struct tss_segment_16 *tss)
3401{ 3470{
3402 tss->ip = vcpu->arch.rip; 3471 tss->ip = kvm_rip_read(vcpu);
3403 tss->flag = kvm_x86_ops->get_rflags(vcpu); 3472 tss->flag = kvm_x86_ops->get_rflags(vcpu);
3404 tss->ax = vcpu->arch.regs[VCPU_REGS_RAX]; 3473 tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
3405 tss->cx = vcpu->arch.regs[VCPU_REGS_RCX]; 3474 tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
3406 tss->dx = vcpu->arch.regs[VCPU_REGS_RDX]; 3475 tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
3407 tss->bx = vcpu->arch.regs[VCPU_REGS_RBX]; 3476 tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX);
3408 tss->sp = vcpu->arch.regs[VCPU_REGS_RSP]; 3477 tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP);
3409 tss->bp = vcpu->arch.regs[VCPU_REGS_RBP]; 3478 tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP);
3410 tss->si = vcpu->arch.regs[VCPU_REGS_RSI]; 3479 tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI);
3411 tss->di = vcpu->arch.regs[VCPU_REGS_RDI]; 3480 tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI);
3412 3481
3413 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); 3482 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
3414 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); 3483 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
@@ -3421,49 +3490,55 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
3421static int load_state_from_tss16(struct kvm_vcpu *vcpu, 3490static int load_state_from_tss16(struct kvm_vcpu *vcpu,
3422 struct tss_segment_16 *tss) 3491 struct tss_segment_16 *tss)
3423{ 3492{
3424 vcpu->arch.rip = tss->ip; 3493 kvm_rip_write(vcpu, tss->ip);
3425 kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); 3494 kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
3426 vcpu->arch.regs[VCPU_REGS_RAX] = tss->ax; 3495 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
3427 vcpu->arch.regs[VCPU_REGS_RCX] = tss->cx; 3496 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
3428 vcpu->arch.regs[VCPU_REGS_RDX] = tss->dx; 3497 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
3429 vcpu->arch.regs[VCPU_REGS_RBX] = tss->bx; 3498 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx);
3430 vcpu->arch.regs[VCPU_REGS_RSP] = tss->sp; 3499 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp);
3431 vcpu->arch.regs[VCPU_REGS_RBP] = tss->bp; 3500 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp);
3432 vcpu->arch.regs[VCPU_REGS_RSI] = tss->si; 3501 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
3433 vcpu->arch.regs[VCPU_REGS_RDI] = tss->di; 3502 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
3434 3503
3435 if (load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) 3504 if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
3436 return 1; 3505 return 1;
3437 3506
3438 if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) 3507 if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
3439 return 1; 3508 return 1;
3440 3509
3441 if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) 3510 if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
3442 return 1; 3511 return 1;
3443 3512
3444 if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) 3513 if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
3445 return 1; 3514 return 1;
3446 3515
3447 if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) 3516 if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
3448 return 1; 3517 return 1;
3449 return 0; 3518 return 0;
3450} 3519}
3451 3520
3452int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, 3521static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
3453 struct desc_struct *cseg_desc, 3522 u32 old_tss_base,
3454 struct desc_struct *nseg_desc) 3523 struct desc_struct *nseg_desc)
3455{ 3524{
3456 struct tss_segment_16 tss_segment_16; 3525 struct tss_segment_16 tss_segment_16;
3457 int ret = 0; 3526 int ret = 0;
3458 3527
3459 if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16)) 3528 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
3529 sizeof tss_segment_16))
3460 goto out; 3530 goto out;
3461 3531
3462 save_state_to_tss16(vcpu, &tss_segment_16); 3532 save_state_to_tss16(vcpu, &tss_segment_16);
3463 save_tss_segment16(vcpu, cseg_desc, &tss_segment_16);
3464 3533
3465 if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16)) 3534 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
3535 sizeof tss_segment_16))
3466 goto out; 3536 goto out;
3537
3538 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
3539 &tss_segment_16, sizeof tss_segment_16))
3540 goto out;
3541
3467 if (load_state_from_tss16(vcpu, &tss_segment_16)) 3542 if (load_state_from_tss16(vcpu, &tss_segment_16))
3468 goto out; 3543 goto out;
3469 3544
@@ -3472,21 +3547,27 @@ out:
3472 return ret; 3547 return ret;
3473} 3548}
3474 3549
3475int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, 3550static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
3476 struct desc_struct *cseg_desc, 3551 u32 old_tss_base,
3477 struct desc_struct *nseg_desc) 3552 struct desc_struct *nseg_desc)
3478{ 3553{
3479 struct tss_segment_32 tss_segment_32; 3554 struct tss_segment_32 tss_segment_32;
3480 int ret = 0; 3555 int ret = 0;
3481 3556
3482 if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32)) 3557 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
3558 sizeof tss_segment_32))
3483 goto out; 3559 goto out;
3484 3560
3485 save_state_to_tss32(vcpu, &tss_segment_32); 3561 save_state_to_tss32(vcpu, &tss_segment_32);
3486 save_tss_segment32(vcpu, cseg_desc, &tss_segment_32);
3487 3562
3488 if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32)) 3563 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
3564 sizeof tss_segment_32))
3489 goto out; 3565 goto out;
3566
3567 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
3568 &tss_segment_32, sizeof tss_segment_32))
3569 goto out;
3570
3490 if (load_state_from_tss32(vcpu, &tss_segment_32)) 3571 if (load_state_from_tss32(vcpu, &tss_segment_32))
3491 goto out; 3572 goto out;
3492 3573
@@ -3501,16 +3582,20 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3501 struct desc_struct cseg_desc; 3582 struct desc_struct cseg_desc;
3502 struct desc_struct nseg_desc; 3583 struct desc_struct nseg_desc;
3503 int ret = 0; 3584 int ret = 0;
3585 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
3586 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
3504 3587
3505 get_segment(vcpu, &tr_seg, VCPU_SREG_TR); 3588 old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base);
3506 3589
3590 /* FIXME: Handle errors. Failure to read either TSS or their
3591 * descriptors should generate a pagefault.
3592 */
3507 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) 3593 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
3508 goto out; 3594 goto out;
3509 3595
3510 if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc)) 3596 if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc))
3511 goto out; 3597 goto out;
3512 3598
3513
3514 if (reason != TASK_SWITCH_IRET) { 3599 if (reason != TASK_SWITCH_IRET) {
3515 int cpl; 3600 int cpl;
3516 3601
@@ -3528,8 +3613,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3528 3613
3529 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { 3614 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3530 cseg_desc.type &= ~(1 << 1); //clear the B flag 3615 cseg_desc.type &= ~(1 << 1); //clear the B flag
3531 save_guest_segment_descriptor(vcpu, tr_seg.selector, 3616 save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc);
3532 &cseg_desc);
3533 } 3617 }
3534 3618
3535 if (reason == TASK_SWITCH_IRET) { 3619 if (reason == TASK_SWITCH_IRET) {
@@ -3538,13 +3622,12 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3538 } 3622 }
3539 3623
3540 kvm_x86_ops->skip_emulated_instruction(vcpu); 3624 kvm_x86_ops->skip_emulated_instruction(vcpu);
3541 kvm_x86_ops->cache_regs(vcpu);
3542 3625
3543 if (nseg_desc.type & 8) 3626 if (nseg_desc.type & 8)
3544 ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc, 3627 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base,
3545 &nseg_desc); 3628 &nseg_desc);
3546 else 3629 else
3547 ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc, 3630 ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base,
3548 &nseg_desc); 3631 &nseg_desc);
3549 3632
3550 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { 3633 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
@@ -3561,9 +3644,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3561 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); 3644 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS);
3562 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); 3645 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
3563 tr_seg.type = 11; 3646 tr_seg.type = 11;
3564 set_segment(vcpu, &tr_seg, VCPU_SREG_TR); 3647 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
3565out: 3648out:
3566 kvm_x86_ops->decache_regs(vcpu);
3567 return ret; 3649 return ret;
3568} 3650}
3569EXPORT_SYMBOL_GPL(kvm_task_switch); 3651EXPORT_SYMBOL_GPL(kvm_task_switch);
@@ -3626,17 +3708,24 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3626 pr_debug("Set back pending irq %d\n", 3708 pr_debug("Set back pending irq %d\n",
3627 pending_vec); 3709 pending_vec);
3628 } 3710 }
3711 kvm_pic_clear_isr_ack(vcpu->kvm);
3629 } 3712 }
3630 3713
3631 set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 3714 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
3632 set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); 3715 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
3633 set_segment(vcpu, &sregs->es, VCPU_SREG_ES); 3716 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
3634 set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); 3717 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
3635 set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); 3718 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
3636 set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); 3719 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
3720
3721 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
3722 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
3637 3723
3638 set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); 3724 /* Older userspace won't unhalt the vcpu on reset. */
3639 set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 3725 if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 &&
3726 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
3727 !(vcpu->arch.cr0 & X86_CR0_PE))
3728 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3640 3729
3641 vcpu_put(vcpu); 3730 vcpu_put(vcpu);
3642 3731
@@ -3751,14 +3840,14 @@ void fx_init(struct kvm_vcpu *vcpu)
3751 * allocate ram with GFP_KERNEL. 3840 * allocate ram with GFP_KERNEL.
3752 */ 3841 */
3753 if (!used_math()) 3842 if (!used_math())
3754 fx_save(&vcpu->arch.host_fx_image); 3843 kvm_fx_save(&vcpu->arch.host_fx_image);
3755 3844
3756 /* Initialize guest FPU by resetting ours and saving into guest's */ 3845 /* Initialize guest FPU by resetting ours and saving into guest's */
3757 preempt_disable(); 3846 preempt_disable();
3758 fx_save(&vcpu->arch.host_fx_image); 3847 kvm_fx_save(&vcpu->arch.host_fx_image);
3759 fx_finit(); 3848 kvm_fx_finit();
3760 fx_save(&vcpu->arch.guest_fx_image); 3849 kvm_fx_save(&vcpu->arch.guest_fx_image);
3761 fx_restore(&vcpu->arch.host_fx_image); 3850 kvm_fx_restore(&vcpu->arch.host_fx_image);
3762 preempt_enable(); 3851 preempt_enable();
3763 3852
3764 vcpu->arch.cr0 |= X86_CR0_ET; 3853 vcpu->arch.cr0 |= X86_CR0_ET;
@@ -3775,8 +3864,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
3775 return; 3864 return;
3776 3865
3777 vcpu->guest_fpu_loaded = 1; 3866 vcpu->guest_fpu_loaded = 1;
3778 fx_save(&vcpu->arch.host_fx_image); 3867 kvm_fx_save(&vcpu->arch.host_fx_image);
3779 fx_restore(&vcpu->arch.guest_fx_image); 3868 kvm_fx_restore(&vcpu->arch.guest_fx_image);
3780} 3869}
3781EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); 3870EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
3782 3871
@@ -3786,8 +3875,8 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
3786 return; 3875 return;
3787 3876
3788 vcpu->guest_fpu_loaded = 0; 3877 vcpu->guest_fpu_loaded = 0;
3789 fx_save(&vcpu->arch.guest_fx_image); 3878 kvm_fx_save(&vcpu->arch.guest_fx_image);
3790 fx_restore(&vcpu->arch.host_fx_image); 3879 kvm_fx_restore(&vcpu->arch.host_fx_image);
3791 ++vcpu->stat.fpu_reload; 3880 ++vcpu->stat.fpu_reload;
3792} 3881}
3793EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); 3882EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
@@ -3922,6 +4011,7 @@ struct kvm *kvm_arch_create_vm(void)
3922 return ERR_PTR(-ENOMEM); 4011 return ERR_PTR(-ENOMEM);
3923 4012
3924 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 4013 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
4014 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
3925 4015
3926 return kvm; 4016 return kvm;
3927} 4017}
@@ -3954,6 +4044,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
3954 4044
3955void kvm_arch_destroy_vm(struct kvm *kvm) 4045void kvm_arch_destroy_vm(struct kvm *kvm)
3956{ 4046{
4047 kvm_iommu_unmap_guest(kvm);
4048 kvm_free_all_assigned_devices(kvm);
3957 kvm_free_pit(kvm); 4049 kvm_free_pit(kvm);
3958 kfree(kvm->arch.vpic); 4050 kfree(kvm->arch.vpic);
3959 kfree(kvm->arch.vioapic); 4051 kfree(kvm->arch.vioapic);
@@ -3979,16 +4071,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
3979 */ 4071 */
3980 if (!user_alloc) { 4072 if (!user_alloc) {
3981 if (npages && !old.rmap) { 4073 if (npages && !old.rmap) {
4074 unsigned long userspace_addr;
4075
3982 down_write(&current->mm->mmap_sem); 4076 down_write(&current->mm->mmap_sem);
3983 memslot->userspace_addr = do_mmap(NULL, 0, 4077 userspace_addr = do_mmap(NULL, 0,
3984 npages * PAGE_SIZE, 4078 npages * PAGE_SIZE,
3985 PROT_READ | PROT_WRITE, 4079 PROT_READ | PROT_WRITE,
3986 MAP_SHARED | MAP_ANONYMOUS, 4080 MAP_PRIVATE | MAP_ANONYMOUS,
3987 0); 4081 0);
3988 up_write(&current->mm->mmap_sem); 4082 up_write(&current->mm->mmap_sem);
3989 4083
3990 if (IS_ERR((void *)memslot->userspace_addr)) 4084 if (IS_ERR((void *)userspace_addr))
3991 return PTR_ERR((void *)memslot->userspace_addr); 4085 return PTR_ERR((void *)userspace_addr);
4086
4087 /* set userspace_addr atomically for kvm_hva_to_rmapp */
4088 spin_lock(&kvm->mmu_lock);
4089 memslot->userspace_addr = userspace_addr;
4090 spin_unlock(&kvm->mmu_lock);
3992 } else { 4091 } else {
3993 if (!old.user_alloc && old.rmap) { 4092 if (!old.user_alloc && old.rmap) {
3994 int ret; 4093 int ret;
@@ -4016,6 +4115,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
4016 return 0; 4115 return 0;
4017} 4116}
4018 4117
4118void kvm_arch_flush_shadow(struct kvm *kvm)
4119{
4120 kvm_mmu_zap_all(kvm);
4121}
4122
4019int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 4123int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
4020{ 4124{
4021 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE 4125 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE