aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-10-16 18:36:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-16 18:36:00 -0400
commit08d19f51f05a68ce89a289320ce4ed96e757df72 (patch)
tree31c5d718d0aeaff5083fe533cd6e1f9fbbe846bb /arch/x86/kvm/x86.c
parent1c95e1b69073cff5ff179e592fa1a1e182c78a17 (diff)
parent2381ad241d0bea1253a37f314b270848067640bb (diff)
Merge branch 'kvm-updates/2.6.28' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'kvm-updates/2.6.28' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (134 commits) KVM: ia64: Add intel iommu support for guests. KVM: ia64: add directed mmio range support for kvm guests KVM: ia64: Make pmt table be able to hold physical mmio entries. KVM: Move irqchip_in_kernel() from ioapic.h to irq.h KVM: Separate irq ack notification out of arch/x86/kvm/irq.c KVM: Change is_mmio_pfn to kvm_is_mmio_pfn, and make it common for all archs KVM: Move device assignment logic to common code KVM: Device Assignment: Move vtd.c from arch/x86/kvm/ to virt/kvm/ KVM: VMX: enable invlpg exiting if EPT is disabled KVM: x86: Silence various LAPIC-related host kernel messages KVM: Device Assignment: Map mmio pages into VT-d page table KVM: PIC: enhance IPI avoidance KVM: MMU: add "oos_shadow" parameter to disable oos KVM: MMU: speed up mmu_unsync_walk KVM: MMU: out of sync shadow core KVM: MMU: mmu_convert_notrap helper KVM: MMU: awareness of new kvm_mmu_zap_page behaviour KVM: MMU: mmu_parent_walk KVM: x86: trap invlpg KVM: MMU: sync roots on mmu reload ...
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c552
1 files changed, 324 insertions, 228 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19afbb644c7f..4f0677d1eae8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4,10 +4,14 @@
4 * derived from drivers/kvm/kvm_main.c 4 * derived from drivers/kvm/kvm_main.c
5 * 5 *
6 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2006 Qumranet, Inc.
7 * Copyright (C) 2008 Qumranet, Inc.
8 * Copyright IBM Corporation, 2008
7 * 9 *
8 * Authors: 10 * Authors:
9 * Avi Kivity <avi@qumranet.com> 11 * Avi Kivity <avi@qumranet.com>
10 * Yaniv Kamay <yaniv@qumranet.com> 12 * Yaniv Kamay <yaniv@qumranet.com>
13 * Amit Shah <amit.shah@qumranet.com>
14 * Ben-Ami Yassour <benami@il.ibm.com>
11 * 15 *
12 * This work is licensed under the terms of the GNU GPL, version 2. See 16 * This work is licensed under the terms of the GNU GPL, version 2. See
13 * the COPYING file in the top-level directory. 17 * the COPYING file in the top-level directory.
@@ -19,14 +23,18 @@
19#include "mmu.h" 23#include "mmu.h"
20#include "i8254.h" 24#include "i8254.h"
21#include "tss.h" 25#include "tss.h"
26#include "kvm_cache_regs.h"
27#include "x86.h"
22 28
23#include <linux/clocksource.h> 29#include <linux/clocksource.h>
30#include <linux/interrupt.h>
24#include <linux/kvm.h> 31#include <linux/kvm.h>
25#include <linux/fs.h> 32#include <linux/fs.h>
26#include <linux/vmalloc.h> 33#include <linux/vmalloc.h>
27#include <linux/module.h> 34#include <linux/module.h>
28#include <linux/mman.h> 35#include <linux/mman.h>
29#include <linux/highmem.h> 36#include <linux/highmem.h>
37#include <linux/intel-iommu.h>
30 38
31#include <asm/uaccess.h> 39#include <asm/uaccess.h>
32#include <asm/msr.h> 40#include <asm/msr.h>
@@ -61,6 +69,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
61 struct kvm_cpuid_entry2 __user *entries); 69 struct kvm_cpuid_entry2 __user *entries);
62 70
63struct kvm_x86_ops *kvm_x86_ops; 71struct kvm_x86_ops *kvm_x86_ops;
72EXPORT_SYMBOL_GPL(kvm_x86_ops);
64 73
65struct kvm_stats_debugfs_item debugfs_entries[] = { 74struct kvm_stats_debugfs_item debugfs_entries[] = {
66 { "pf_fixed", VCPU_STAT(pf_fixed) }, 75 { "pf_fixed", VCPU_STAT(pf_fixed) },
@@ -83,6 +92,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
83 { "fpu_reload", VCPU_STAT(fpu_reload) }, 92 { "fpu_reload", VCPU_STAT(fpu_reload) },
84 { "insn_emulation", VCPU_STAT(insn_emulation) }, 93 { "insn_emulation", VCPU_STAT(insn_emulation) },
85 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, 94 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
95 { "irq_injections", VCPU_STAT(irq_injections) },
86 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, 96 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
87 { "mmu_pte_write", VM_STAT(mmu_pte_write) }, 97 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
88 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, 98 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -90,12 +100,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
90 { "mmu_flooded", VM_STAT(mmu_flooded) }, 100 { "mmu_flooded", VM_STAT(mmu_flooded) },
91 { "mmu_recycled", VM_STAT(mmu_recycled) }, 101 { "mmu_recycled", VM_STAT(mmu_recycled) },
92 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, 102 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
103 { "mmu_unsync", VM_STAT(mmu_unsync) },
93 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, 104 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
94 { "largepages", VM_STAT(lpages) }, 105 { "largepages", VM_STAT(lpages) },
95 { NULL } 106 { NULL }
96}; 107};
97 108
98
99unsigned long segment_base(u16 selector) 109unsigned long segment_base(u16 selector)
100{ 110{
101 struct descriptor_table gdt; 111 struct descriptor_table gdt;
@@ -352,6 +362,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
352void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 362void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
353{ 363{
354 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { 364 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
365 kvm_mmu_sync_roots(vcpu);
355 kvm_mmu_flush_tlb(vcpu); 366 kvm_mmu_flush_tlb(vcpu);
356 return; 367 return;
357 } 368 }
@@ -662,6 +673,18 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
662 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", 673 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
663 __func__, data); 674 __func__, data);
664 break; 675 break;
676 case MSR_IA32_DEBUGCTLMSR:
677 if (!data) {
678 /* We support the non-activated case already */
679 break;
680 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
681 /* Values other than LBR and BTF are vendor-specific,
682 thus reserved and should throw a #GP */
683 return 1;
684 }
685 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
686 __func__, data);
687 break;
665 case MSR_IA32_UCODE_REV: 688 case MSR_IA32_UCODE_REV:
666 case MSR_IA32_UCODE_WRITE: 689 case MSR_IA32_UCODE_WRITE:
667 break; 690 break;
@@ -692,10 +715,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
692 /* ...but clean it before doing the actual write */ 715 /* ...but clean it before doing the actual write */
693 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); 716 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
694 717
695 down_read(&current->mm->mmap_sem);
696 vcpu->arch.time_page = 718 vcpu->arch.time_page =
697 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); 719 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
698 up_read(&current->mm->mmap_sem);
699 720
700 if (is_error_page(vcpu->arch.time_page)) { 721 if (is_error_page(vcpu->arch.time_page)) {
701 kvm_release_page_clean(vcpu->arch.time_page); 722 kvm_release_page_clean(vcpu->arch.time_page);
@@ -752,8 +773,14 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
752 case MSR_IA32_MC0_MISC+8: 773 case MSR_IA32_MC0_MISC+8:
753 case MSR_IA32_MC0_MISC+12: 774 case MSR_IA32_MC0_MISC+12:
754 case MSR_IA32_MC0_MISC+16: 775 case MSR_IA32_MC0_MISC+16:
776 case MSR_IA32_MC0_MISC+20:
755 case MSR_IA32_UCODE_REV: 777 case MSR_IA32_UCODE_REV:
756 case MSR_IA32_EBL_CR_POWERON: 778 case MSR_IA32_EBL_CR_POWERON:
779 case MSR_IA32_DEBUGCTLMSR:
780 case MSR_IA32_LASTBRANCHFROMIP:
781 case MSR_IA32_LASTBRANCHTOIP:
782 case MSR_IA32_LASTINTFROMIP:
783 case MSR_IA32_LASTINTTOIP:
757 data = 0; 784 data = 0;
758 break; 785 break;
759 case MSR_MTRRcap: 786 case MSR_MTRRcap:
@@ -901,6 +928,9 @@ int kvm_dev_ioctl_check_extension(long ext)
901 case KVM_CAP_PV_MMU: 928 case KVM_CAP_PV_MMU:
902 r = !tdp_enabled; 929 r = !tdp_enabled;
903 break; 930 break;
931 case KVM_CAP_IOMMU:
932 r = intel_iommu_found();
933 break;
904 default: 934 default:
905 r = 0; 935 r = 0;
906 break; 936 break;
@@ -1303,28 +1333,33 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1303 struct kvm_vcpu *vcpu = filp->private_data; 1333 struct kvm_vcpu *vcpu = filp->private_data;
1304 void __user *argp = (void __user *)arg; 1334 void __user *argp = (void __user *)arg;
1305 int r; 1335 int r;
1336 struct kvm_lapic_state *lapic = NULL;
1306 1337
1307 switch (ioctl) { 1338 switch (ioctl) {
1308 case KVM_GET_LAPIC: { 1339 case KVM_GET_LAPIC: {
1309 struct kvm_lapic_state lapic; 1340 lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
1310 1341
1311 memset(&lapic, 0, sizeof lapic); 1342 r = -ENOMEM;
1312 r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic); 1343 if (!lapic)
1344 goto out;
1345 r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic);
1313 if (r) 1346 if (r)
1314 goto out; 1347 goto out;
1315 r = -EFAULT; 1348 r = -EFAULT;
1316 if (copy_to_user(argp, &lapic, sizeof lapic)) 1349 if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state)))
1317 goto out; 1350 goto out;
1318 r = 0; 1351 r = 0;
1319 break; 1352 break;
1320 } 1353 }
1321 case KVM_SET_LAPIC: { 1354 case KVM_SET_LAPIC: {
1322 struct kvm_lapic_state lapic; 1355 lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
1323 1356 r = -ENOMEM;
1357 if (!lapic)
1358 goto out;
1324 r = -EFAULT; 1359 r = -EFAULT;
1325 if (copy_from_user(&lapic, argp, sizeof lapic)) 1360 if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state)))
1326 goto out; 1361 goto out;
1327 r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);; 1362 r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic);
1328 if (r) 1363 if (r)
1329 goto out; 1364 goto out;
1330 r = 0; 1365 r = 0;
@@ -1422,6 +1457,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1422 r = -EINVAL; 1457 r = -EINVAL;
1423 } 1458 }
1424out: 1459out:
1460 if (lapic)
1461 kfree(lapic);
1425 return r; 1462 return r;
1426} 1463}
1427 1464
@@ -1630,6 +1667,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
1630 struct kvm *kvm = filp->private_data; 1667 struct kvm *kvm = filp->private_data;
1631 void __user *argp = (void __user *)arg; 1668 void __user *argp = (void __user *)arg;
1632 int r = -EINVAL; 1669 int r = -EINVAL;
1670 /*
1671 * This union makes it completely explicit to gcc-3.x
1672 * that these two variables' stack usage should be
1673 * combined, not added together.
1674 */
1675 union {
1676 struct kvm_pit_state ps;
1677 struct kvm_memory_alias alias;
1678 } u;
1633 1679
1634 switch (ioctl) { 1680 switch (ioctl) {
1635 case KVM_SET_TSS_ADDR: 1681 case KVM_SET_TSS_ADDR:
@@ -1661,17 +1707,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
1661 case KVM_GET_NR_MMU_PAGES: 1707 case KVM_GET_NR_MMU_PAGES:
1662 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); 1708 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
1663 break; 1709 break;
1664 case KVM_SET_MEMORY_ALIAS: { 1710 case KVM_SET_MEMORY_ALIAS:
1665 struct kvm_memory_alias alias;
1666
1667 r = -EFAULT; 1711 r = -EFAULT;
1668 if (copy_from_user(&alias, argp, sizeof alias)) 1712 if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias)))
1669 goto out; 1713 goto out;
1670 r = kvm_vm_ioctl_set_memory_alias(kvm, &alias); 1714 r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias);
1671 if (r) 1715 if (r)
1672 goto out; 1716 goto out;
1673 break; 1717 break;
1674 }
1675 case KVM_CREATE_IRQCHIP: 1718 case KVM_CREATE_IRQCHIP:
1676 r = -ENOMEM; 1719 r = -ENOMEM;
1677 kvm->arch.vpic = kvm_create_pic(kvm); 1720 kvm->arch.vpic = kvm_create_pic(kvm);
@@ -1699,13 +1742,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
1699 goto out; 1742 goto out;
1700 if (irqchip_in_kernel(kvm)) { 1743 if (irqchip_in_kernel(kvm)) {
1701 mutex_lock(&kvm->lock); 1744 mutex_lock(&kvm->lock);
1702 if (irq_event.irq < 16) 1745 kvm_set_irq(kvm, irq_event.irq, irq_event.level);
1703 kvm_pic_set_irq(pic_irqchip(kvm),
1704 irq_event.irq,
1705 irq_event.level);
1706 kvm_ioapic_set_irq(kvm->arch.vioapic,
1707 irq_event.irq,
1708 irq_event.level);
1709 mutex_unlock(&kvm->lock); 1746 mutex_unlock(&kvm->lock);
1710 r = 0; 1747 r = 0;
1711 } 1748 }
@@ -1713,65 +1750,77 @@ long kvm_arch_vm_ioctl(struct file *filp,
1713 } 1750 }
1714 case KVM_GET_IRQCHIP: { 1751 case KVM_GET_IRQCHIP: {
1715 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ 1752 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
1716 struct kvm_irqchip chip; 1753 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
1717 1754
1718 r = -EFAULT; 1755 r = -ENOMEM;
1719 if (copy_from_user(&chip, argp, sizeof chip)) 1756 if (!chip)
1720 goto out; 1757 goto out;
1758 r = -EFAULT;
1759 if (copy_from_user(chip, argp, sizeof *chip))
1760 goto get_irqchip_out;
1721 r = -ENXIO; 1761 r = -ENXIO;
1722 if (!irqchip_in_kernel(kvm)) 1762 if (!irqchip_in_kernel(kvm))
1723 goto out; 1763 goto get_irqchip_out;
1724 r = kvm_vm_ioctl_get_irqchip(kvm, &chip); 1764 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
1725 if (r) 1765 if (r)
1726 goto out; 1766 goto get_irqchip_out;
1727 r = -EFAULT; 1767 r = -EFAULT;
1728 if (copy_to_user(argp, &chip, sizeof chip)) 1768 if (copy_to_user(argp, chip, sizeof *chip))
1729 goto out; 1769 goto get_irqchip_out;
1730 r = 0; 1770 r = 0;
1771 get_irqchip_out:
1772 kfree(chip);
1773 if (r)
1774 goto out;
1731 break; 1775 break;
1732 } 1776 }
1733 case KVM_SET_IRQCHIP: { 1777 case KVM_SET_IRQCHIP: {
1734 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ 1778 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
1735 struct kvm_irqchip chip; 1779 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
1736 1780
1737 r = -EFAULT; 1781 r = -ENOMEM;
1738 if (copy_from_user(&chip, argp, sizeof chip)) 1782 if (!chip)
1739 goto out; 1783 goto out;
1784 r = -EFAULT;
1785 if (copy_from_user(chip, argp, sizeof *chip))
1786 goto set_irqchip_out;
1740 r = -ENXIO; 1787 r = -ENXIO;
1741 if (!irqchip_in_kernel(kvm)) 1788 if (!irqchip_in_kernel(kvm))
1742 goto out; 1789 goto set_irqchip_out;
1743 r = kvm_vm_ioctl_set_irqchip(kvm, &chip); 1790 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
1744 if (r) 1791 if (r)
1745 goto out; 1792 goto set_irqchip_out;
1746 r = 0; 1793 r = 0;
1794 set_irqchip_out:
1795 kfree(chip);
1796 if (r)
1797 goto out;
1747 break; 1798 break;
1748 } 1799 }
1749 case KVM_GET_PIT: { 1800 case KVM_GET_PIT: {
1750 struct kvm_pit_state ps;
1751 r = -EFAULT; 1801 r = -EFAULT;
1752 if (copy_from_user(&ps, argp, sizeof ps)) 1802 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
1753 goto out; 1803 goto out;
1754 r = -ENXIO; 1804 r = -ENXIO;
1755 if (!kvm->arch.vpit) 1805 if (!kvm->arch.vpit)
1756 goto out; 1806 goto out;
1757 r = kvm_vm_ioctl_get_pit(kvm, &ps); 1807 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
1758 if (r) 1808 if (r)
1759 goto out; 1809 goto out;
1760 r = -EFAULT; 1810 r = -EFAULT;
1761 if (copy_to_user(argp, &ps, sizeof ps)) 1811 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
1762 goto out; 1812 goto out;
1763 r = 0; 1813 r = 0;
1764 break; 1814 break;
1765 } 1815 }
1766 case KVM_SET_PIT: { 1816 case KVM_SET_PIT: {
1767 struct kvm_pit_state ps;
1768 r = -EFAULT; 1817 r = -EFAULT;
1769 if (copy_from_user(&ps, argp, sizeof ps)) 1818 if (copy_from_user(&u.ps, argp, sizeof u.ps))
1770 goto out; 1819 goto out;
1771 r = -ENXIO; 1820 r = -ENXIO;
1772 if (!kvm->arch.vpit) 1821 if (!kvm->arch.vpit)
1773 goto out; 1822 goto out;
1774 r = kvm_vm_ioctl_set_pit(kvm, &ps); 1823 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
1775 if (r) 1824 if (r)
1776 goto out; 1825 goto out;
1777 r = 0; 1826 r = 0;
@@ -2018,9 +2067,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
2018 2067
2019 val = *(u64 *)new; 2068 val = *(u64 *)new;
2020 2069
2021 down_read(&current->mm->mmap_sem);
2022 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 2070 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
2023 up_read(&current->mm->mmap_sem);
2024 2071
2025 kaddr = kmap_atomic(page, KM_USER0); 2072 kaddr = kmap_atomic(page, KM_USER0);
2026 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); 2073 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
@@ -2040,6 +2087,7 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
2040 2087
2041int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) 2088int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
2042{ 2089{
2090 kvm_mmu_invlpg(vcpu, address);
2043 return X86EMUL_CONTINUE; 2091 return X86EMUL_CONTINUE;
2044} 2092}
2045 2093
@@ -2080,7 +2128,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
2080void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) 2128void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
2081{ 2129{
2082 u8 opcodes[4]; 2130 u8 opcodes[4];
2083 unsigned long rip = vcpu->arch.rip; 2131 unsigned long rip = kvm_rip_read(vcpu);
2084 unsigned long rip_linear; 2132 unsigned long rip_linear;
2085 2133
2086 if (!printk_ratelimit()) 2134 if (!printk_ratelimit())
@@ -2102,6 +2150,14 @@ static struct x86_emulate_ops emulate_ops = {
2102 .cmpxchg_emulated = emulator_cmpxchg_emulated, 2150 .cmpxchg_emulated = emulator_cmpxchg_emulated,
2103}; 2151};
2104 2152
2153static void cache_all_regs(struct kvm_vcpu *vcpu)
2154{
2155 kvm_register_read(vcpu, VCPU_REGS_RAX);
2156 kvm_register_read(vcpu, VCPU_REGS_RSP);
2157 kvm_register_read(vcpu, VCPU_REGS_RIP);
2158 vcpu->arch.regs_dirty = ~0;
2159}
2160
2105int emulate_instruction(struct kvm_vcpu *vcpu, 2161int emulate_instruction(struct kvm_vcpu *vcpu,
2106 struct kvm_run *run, 2162 struct kvm_run *run,
2107 unsigned long cr2, 2163 unsigned long cr2,
@@ -2111,8 +2167,15 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2111 int r; 2167 int r;
2112 struct decode_cache *c; 2168 struct decode_cache *c;
2113 2169
2170 kvm_clear_exception_queue(vcpu);
2114 vcpu->arch.mmio_fault_cr2 = cr2; 2171 vcpu->arch.mmio_fault_cr2 = cr2;
2115 kvm_x86_ops->cache_regs(vcpu); 2172 /*
2173 * TODO: fix x86_emulate.c to use guest_read/write_register
2174 * instead of direct ->regs accesses, can save hundred cycles
2175 * on Intel for instructions that don't read/change RSP, for
2176 * for example.
2177 */
2178 cache_all_regs(vcpu);
2116 2179
2117 vcpu->mmio_is_write = 0; 2180 vcpu->mmio_is_write = 0;
2118 vcpu->arch.pio.string = 0; 2181 vcpu->arch.pio.string = 0;
@@ -2172,7 +2235,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2172 return EMULATE_DO_MMIO; 2235 return EMULATE_DO_MMIO;
2173 } 2236 }
2174 2237
2175 kvm_x86_ops->decache_regs(vcpu);
2176 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 2238 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
2177 2239
2178 if (vcpu->mmio_is_write) { 2240 if (vcpu->mmio_is_write) {
@@ -2225,20 +2287,19 @@ int complete_pio(struct kvm_vcpu *vcpu)
2225 struct kvm_pio_request *io = &vcpu->arch.pio; 2287 struct kvm_pio_request *io = &vcpu->arch.pio;
2226 long delta; 2288 long delta;
2227 int r; 2289 int r;
2228 2290 unsigned long val;
2229 kvm_x86_ops->cache_regs(vcpu);
2230 2291
2231 if (!io->string) { 2292 if (!io->string) {
2232 if (io->in) 2293 if (io->in) {
2233 memcpy(&vcpu->arch.regs[VCPU_REGS_RAX], vcpu->arch.pio_data, 2294 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
2234 io->size); 2295 memcpy(&val, vcpu->arch.pio_data, io->size);
2296 kvm_register_write(vcpu, VCPU_REGS_RAX, val);
2297 }
2235 } else { 2298 } else {
2236 if (io->in) { 2299 if (io->in) {
2237 r = pio_copy_data(vcpu); 2300 r = pio_copy_data(vcpu);
2238 if (r) { 2301 if (r)
2239 kvm_x86_ops->cache_regs(vcpu);
2240 return r; 2302 return r;
2241 }
2242 } 2303 }
2243 2304
2244 delta = 1; 2305 delta = 1;
@@ -2248,19 +2309,24 @@ int complete_pio(struct kvm_vcpu *vcpu)
2248 * The size of the register should really depend on 2309 * The size of the register should really depend on
2249 * current address size. 2310 * current address size.
2250 */ 2311 */
2251 vcpu->arch.regs[VCPU_REGS_RCX] -= delta; 2312 val = kvm_register_read(vcpu, VCPU_REGS_RCX);
2313 val -= delta;
2314 kvm_register_write(vcpu, VCPU_REGS_RCX, val);
2252 } 2315 }
2253 if (io->down) 2316 if (io->down)
2254 delta = -delta; 2317 delta = -delta;
2255 delta *= io->size; 2318 delta *= io->size;
2256 if (io->in) 2319 if (io->in) {
2257 vcpu->arch.regs[VCPU_REGS_RDI] += delta; 2320 val = kvm_register_read(vcpu, VCPU_REGS_RDI);
2258 else 2321 val += delta;
2259 vcpu->arch.regs[VCPU_REGS_RSI] += delta; 2322 kvm_register_write(vcpu, VCPU_REGS_RDI, val);
2323 } else {
2324 val = kvm_register_read(vcpu, VCPU_REGS_RSI);
2325 val += delta;
2326 kvm_register_write(vcpu, VCPU_REGS_RSI, val);
2327 }
2260 } 2328 }
2261 2329
2262 kvm_x86_ops->decache_regs(vcpu);
2263
2264 io->count -= io->cur_count; 2330 io->count -= io->cur_count;
2265 io->cur_count = 0; 2331 io->cur_count = 0;
2266 2332
@@ -2313,6 +2379,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2313 int size, unsigned port) 2379 int size, unsigned port)
2314{ 2380{
2315 struct kvm_io_device *pio_dev; 2381 struct kvm_io_device *pio_dev;
2382 unsigned long val;
2316 2383
2317 vcpu->run->exit_reason = KVM_EXIT_IO; 2384 vcpu->run->exit_reason = KVM_EXIT_IO;
2318 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 2385 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
@@ -2333,8 +2400,8 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2333 KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, 2400 KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
2334 handler); 2401 handler);
2335 2402
2336 kvm_x86_ops->cache_regs(vcpu); 2403 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
2337 memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); 2404 memcpy(vcpu->arch.pio_data, &val, 4);
2338 2405
2339 kvm_x86_ops->skip_emulated_instruction(vcpu); 2406 kvm_x86_ops->skip_emulated_instruction(vcpu);
2340 2407
@@ -2492,11 +2559,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
2492 KVMTRACE_0D(HLT, vcpu, handler); 2559 KVMTRACE_0D(HLT, vcpu, handler);
2493 if (irqchip_in_kernel(vcpu->kvm)) { 2560 if (irqchip_in_kernel(vcpu->kvm)) {
2494 vcpu->arch.mp_state = KVM_MP_STATE_HALTED; 2561 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
2495 up_read(&vcpu->kvm->slots_lock);
2496 kvm_vcpu_block(vcpu);
2497 down_read(&vcpu->kvm->slots_lock);
2498 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
2499 return -EINTR;
2500 return 1; 2562 return 1;
2501 } else { 2563 } else {
2502 vcpu->run->exit_reason = KVM_EXIT_HLT; 2564 vcpu->run->exit_reason = KVM_EXIT_HLT;
@@ -2519,13 +2581,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2519 unsigned long nr, a0, a1, a2, a3, ret; 2581 unsigned long nr, a0, a1, a2, a3, ret;
2520 int r = 1; 2582 int r = 1;
2521 2583
2522 kvm_x86_ops->cache_regs(vcpu); 2584 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
2523 2585 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
2524 nr = vcpu->arch.regs[VCPU_REGS_RAX]; 2586 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
2525 a0 = vcpu->arch.regs[VCPU_REGS_RBX]; 2587 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
2526 a1 = vcpu->arch.regs[VCPU_REGS_RCX]; 2588 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
2527 a2 = vcpu->arch.regs[VCPU_REGS_RDX];
2528 a3 = vcpu->arch.regs[VCPU_REGS_RSI];
2529 2589
2530 KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler); 2590 KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
2531 2591
@@ -2548,8 +2608,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2548 ret = -KVM_ENOSYS; 2608 ret = -KVM_ENOSYS;
2549 break; 2609 break;
2550 } 2610 }
2551 vcpu->arch.regs[VCPU_REGS_RAX] = ret; 2611 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
2552 kvm_x86_ops->decache_regs(vcpu);
2553 ++vcpu->stat.hypercalls; 2612 ++vcpu->stat.hypercalls;
2554 return r; 2613 return r;
2555} 2614}
@@ -2559,6 +2618,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
2559{ 2618{
2560 char instruction[3]; 2619 char instruction[3];
2561 int ret = 0; 2620 int ret = 0;
2621 unsigned long rip = kvm_rip_read(vcpu);
2562 2622
2563 2623
2564 /* 2624 /*
@@ -2568,9 +2628,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
2568 */ 2628 */
2569 kvm_mmu_zap_all(vcpu->kvm); 2629 kvm_mmu_zap_all(vcpu->kvm);
2570 2630
2571 kvm_x86_ops->cache_regs(vcpu);
2572 kvm_x86_ops->patch_hypercall(vcpu, instruction); 2631 kvm_x86_ops->patch_hypercall(vcpu, instruction);
2573 if (emulator_write_emulated(vcpu->arch.rip, instruction, 3, vcpu) 2632 if (emulator_write_emulated(rip, instruction, 3, vcpu)
2574 != X86EMUL_CONTINUE) 2633 != X86EMUL_CONTINUE)
2575 ret = -EFAULT; 2634 ret = -EFAULT;
2576 2635
@@ -2700,13 +2759,12 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
2700 u32 function, index; 2759 u32 function, index;
2701 struct kvm_cpuid_entry2 *e, *best; 2760 struct kvm_cpuid_entry2 *e, *best;
2702 2761
2703 kvm_x86_ops->cache_regs(vcpu); 2762 function = kvm_register_read(vcpu, VCPU_REGS_RAX);
2704 function = vcpu->arch.regs[VCPU_REGS_RAX]; 2763 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
2705 index = vcpu->arch.regs[VCPU_REGS_RCX]; 2764 kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
2706 vcpu->arch.regs[VCPU_REGS_RAX] = 0; 2765 kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
2707 vcpu->arch.regs[VCPU_REGS_RBX] = 0; 2766 kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
2708 vcpu->arch.regs[VCPU_REGS_RCX] = 0; 2767 kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
2709 vcpu->arch.regs[VCPU_REGS_RDX] = 0;
2710 best = NULL; 2768 best = NULL;
2711 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { 2769 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
2712 e = &vcpu->arch.cpuid_entries[i]; 2770 e = &vcpu->arch.cpuid_entries[i];
@@ -2724,18 +2782,17 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
2724 best = e; 2782 best = e;
2725 } 2783 }
2726 if (best) { 2784 if (best) {
2727 vcpu->arch.regs[VCPU_REGS_RAX] = best->eax; 2785 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
2728 vcpu->arch.regs[VCPU_REGS_RBX] = best->ebx; 2786 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
2729 vcpu->arch.regs[VCPU_REGS_RCX] = best->ecx; 2787 kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
2730 vcpu->arch.regs[VCPU_REGS_RDX] = best->edx; 2788 kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
2731 } 2789 }
2732 kvm_x86_ops->decache_regs(vcpu);
2733 kvm_x86_ops->skip_emulated_instruction(vcpu); 2790 kvm_x86_ops->skip_emulated_instruction(vcpu);
2734 KVMTRACE_5D(CPUID, vcpu, function, 2791 KVMTRACE_5D(CPUID, vcpu, function,
2735 (u32)vcpu->arch.regs[VCPU_REGS_RAX], 2792 (u32)kvm_register_read(vcpu, VCPU_REGS_RAX),
2736 (u32)vcpu->arch.regs[VCPU_REGS_RBX], 2793 (u32)kvm_register_read(vcpu, VCPU_REGS_RBX),
2737 (u32)vcpu->arch.regs[VCPU_REGS_RCX], 2794 (u32)kvm_register_read(vcpu, VCPU_REGS_RCX),
2738 (u32)vcpu->arch.regs[VCPU_REGS_RDX], handler); 2795 (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler);
2739} 2796}
2740EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); 2797EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
2741 2798
@@ -2776,9 +2833,7 @@ static void vapic_enter(struct kvm_vcpu *vcpu)
2776 if (!apic || !apic->vapic_addr) 2833 if (!apic || !apic->vapic_addr)
2777 return; 2834 return;
2778 2835
2779 down_read(&current->mm->mmap_sem);
2780 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); 2836 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
2781 up_read(&current->mm->mmap_sem);
2782 2837
2783 vcpu->arch.apic->vapic_page = page; 2838 vcpu->arch.apic->vapic_page = page;
2784} 2839}
@@ -2796,28 +2851,10 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
2796 up_read(&vcpu->kvm->slots_lock); 2851 up_read(&vcpu->kvm->slots_lock);
2797} 2852}
2798 2853
2799static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2854static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2800{ 2855{
2801 int r; 2856 int r;
2802 2857
2803 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
2804 pr_debug("vcpu %d received sipi with vector # %x\n",
2805 vcpu->vcpu_id, vcpu->arch.sipi_vector);
2806 kvm_lapic_reset(vcpu);
2807 r = kvm_x86_ops->vcpu_reset(vcpu);
2808 if (r)
2809 return r;
2810 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2811 }
2812
2813 down_read(&vcpu->kvm->slots_lock);
2814 vapic_enter(vcpu);
2815
2816preempted:
2817 if (vcpu->guest_debug.enabled)
2818 kvm_x86_ops->guest_debug_pre(vcpu);
2819
2820again:
2821 if (vcpu->requests) 2858 if (vcpu->requests)
2822 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 2859 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
2823 kvm_mmu_unload(vcpu); 2860 kvm_mmu_unload(vcpu);
@@ -2829,6 +2866,8 @@ again:
2829 if (vcpu->requests) { 2866 if (vcpu->requests) {
2830 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) 2867 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
2831 __kvm_migrate_timers(vcpu); 2868 __kvm_migrate_timers(vcpu);
2869 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
2870 kvm_mmu_sync_roots(vcpu);
2832 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 2871 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
2833 kvm_x86_ops->tlb_flush(vcpu); 2872 kvm_x86_ops->tlb_flush(vcpu);
2834 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, 2873 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
@@ -2854,21 +2893,15 @@ again:
2854 2893
2855 local_irq_disable(); 2894 local_irq_disable();
2856 2895
2857 if (vcpu->requests || need_resched()) { 2896 if (vcpu->requests || need_resched() || signal_pending(current)) {
2858 local_irq_enable(); 2897 local_irq_enable();
2859 preempt_enable(); 2898 preempt_enable();
2860 r = 1; 2899 r = 1;
2861 goto out; 2900 goto out;
2862 } 2901 }
2863 2902
2864 if (signal_pending(current)) { 2903 if (vcpu->guest_debug.enabled)
2865 local_irq_enable(); 2904 kvm_x86_ops->guest_debug_pre(vcpu);
2866 preempt_enable();
2867 r = -EINTR;
2868 kvm_run->exit_reason = KVM_EXIT_INTR;
2869 ++vcpu->stat.signal_exits;
2870 goto out;
2871 }
2872 2905
2873 vcpu->guest_mode = 1; 2906 vcpu->guest_mode = 1;
2874 /* 2907 /*
@@ -2917,8 +2950,8 @@ again:
2917 * Profile KVM exit RIPs: 2950 * Profile KVM exit RIPs:
2918 */ 2951 */
2919 if (unlikely(prof_on == KVM_PROFILING)) { 2952 if (unlikely(prof_on == KVM_PROFILING)) {
2920 kvm_x86_ops->cache_regs(vcpu); 2953 unsigned long rip = kvm_rip_read(vcpu);
2921 profile_hit(KVM_PROFILING, (void *)vcpu->arch.rip); 2954 profile_hit(KVM_PROFILING, (void *)rip);
2922 } 2955 }
2923 2956
2924 if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu)) 2957 if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu))
@@ -2927,26 +2960,63 @@ again:
2927 kvm_lapic_sync_from_vapic(vcpu); 2960 kvm_lapic_sync_from_vapic(vcpu);
2928 2961
2929 r = kvm_x86_ops->handle_exit(kvm_run, vcpu); 2962 r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
2963out:
2964 return r;
2965}
2930 2966
2931 if (r > 0) { 2967static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2932 if (dm_request_for_irq_injection(vcpu, kvm_run)) { 2968{
2933 r = -EINTR; 2969 int r;
2934 kvm_run->exit_reason = KVM_EXIT_INTR; 2970
2935 ++vcpu->stat.request_irq_exits; 2971 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
2936 goto out; 2972 pr_debug("vcpu %d received sipi with vector # %x\n",
2937 } 2973 vcpu->vcpu_id, vcpu->arch.sipi_vector);
2938 if (!need_resched()) 2974 kvm_lapic_reset(vcpu);
2939 goto again; 2975 r = kvm_x86_ops->vcpu_reset(vcpu);
2976 if (r)
2977 return r;
2978 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2940 } 2979 }
2941 2980
2942out: 2981 down_read(&vcpu->kvm->slots_lock);
2943 up_read(&vcpu->kvm->slots_lock); 2982 vapic_enter(vcpu);
2944 if (r > 0) { 2983
2945 kvm_resched(vcpu); 2984 r = 1;
2946 down_read(&vcpu->kvm->slots_lock); 2985 while (r > 0) {
2947 goto preempted; 2986 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
2987 r = vcpu_enter_guest(vcpu, kvm_run);
2988 else {
2989 up_read(&vcpu->kvm->slots_lock);
2990 kvm_vcpu_block(vcpu);
2991 down_read(&vcpu->kvm->slots_lock);
2992 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
2993 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
2994 vcpu->arch.mp_state =
2995 KVM_MP_STATE_RUNNABLE;
2996 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
2997 r = -EINTR;
2998 }
2999
3000 if (r > 0) {
3001 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
3002 r = -EINTR;
3003 kvm_run->exit_reason = KVM_EXIT_INTR;
3004 ++vcpu->stat.request_irq_exits;
3005 }
3006 if (signal_pending(current)) {
3007 r = -EINTR;
3008 kvm_run->exit_reason = KVM_EXIT_INTR;
3009 ++vcpu->stat.signal_exits;
3010 }
3011 if (need_resched()) {
3012 up_read(&vcpu->kvm->slots_lock);
3013 kvm_resched(vcpu);
3014 down_read(&vcpu->kvm->slots_lock);
3015 }
3016 }
2948 } 3017 }
2949 3018
3019 up_read(&vcpu->kvm->slots_lock);
2950 post_kvm_run_save(vcpu, kvm_run); 3020 post_kvm_run_save(vcpu, kvm_run);
2951 3021
2952 vapic_exit(vcpu); 3022 vapic_exit(vcpu);
@@ -2966,6 +3036,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2966 3036
2967 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { 3037 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
2968 kvm_vcpu_block(vcpu); 3038 kvm_vcpu_block(vcpu);
3039 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2969 r = -EAGAIN; 3040 r = -EAGAIN;
2970 goto out; 3041 goto out;
2971 } 3042 }
@@ -2999,11 +3070,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2999 } 3070 }
3000 } 3071 }
3001#endif 3072#endif
3002 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { 3073 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
3003 kvm_x86_ops->cache_regs(vcpu); 3074 kvm_register_write(vcpu, VCPU_REGS_RAX,
3004 vcpu->arch.regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; 3075 kvm_run->hypercall.ret);
3005 kvm_x86_ops->decache_regs(vcpu);
3006 }
3007 3076
3008 r = __vcpu_run(vcpu, kvm_run); 3077 r = __vcpu_run(vcpu, kvm_run);
3009 3078
@@ -3019,28 +3088,26 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3019{ 3088{
3020 vcpu_load(vcpu); 3089 vcpu_load(vcpu);
3021 3090
3022 kvm_x86_ops->cache_regs(vcpu); 3091 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
3023 3092 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
3024 regs->rax = vcpu->arch.regs[VCPU_REGS_RAX]; 3093 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
3025 regs->rbx = vcpu->arch.regs[VCPU_REGS_RBX]; 3094 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
3026 regs->rcx = vcpu->arch.regs[VCPU_REGS_RCX]; 3095 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
3027 regs->rdx = vcpu->arch.regs[VCPU_REGS_RDX]; 3096 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
3028 regs->rsi = vcpu->arch.regs[VCPU_REGS_RSI]; 3097 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
3029 regs->rdi = vcpu->arch.regs[VCPU_REGS_RDI]; 3098 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
3030 regs->rsp = vcpu->arch.regs[VCPU_REGS_RSP];
3031 regs->rbp = vcpu->arch.regs[VCPU_REGS_RBP];
3032#ifdef CONFIG_X86_64 3099#ifdef CONFIG_X86_64
3033 regs->r8 = vcpu->arch.regs[VCPU_REGS_R8]; 3100 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
3034 regs->r9 = vcpu->arch.regs[VCPU_REGS_R9]; 3101 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
3035 regs->r10 = vcpu->arch.regs[VCPU_REGS_R10]; 3102 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
3036 regs->r11 = vcpu->arch.regs[VCPU_REGS_R11]; 3103 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
3037 regs->r12 = vcpu->arch.regs[VCPU_REGS_R12]; 3104 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
3038 regs->r13 = vcpu->arch.regs[VCPU_REGS_R13]; 3105 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
3039 regs->r14 = vcpu->arch.regs[VCPU_REGS_R14]; 3106 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
3040 regs->r15 = vcpu->arch.regs[VCPU_REGS_R15]; 3107 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
3041#endif 3108#endif
3042 3109
3043 regs->rip = vcpu->arch.rip; 3110 regs->rip = kvm_rip_read(vcpu);
3044 regs->rflags = kvm_x86_ops->get_rflags(vcpu); 3111 regs->rflags = kvm_x86_ops->get_rflags(vcpu);
3045 3112
3046 /* 3113 /*
@@ -3058,29 +3125,29 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3058{ 3125{
3059 vcpu_load(vcpu); 3126 vcpu_load(vcpu);
3060 3127
3061 vcpu->arch.regs[VCPU_REGS_RAX] = regs->rax; 3128 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
3062 vcpu->arch.regs[VCPU_REGS_RBX] = regs->rbx; 3129 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
3063 vcpu->arch.regs[VCPU_REGS_RCX] = regs->rcx; 3130 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
3064 vcpu->arch.regs[VCPU_REGS_RDX] = regs->rdx; 3131 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
3065 vcpu->arch.regs[VCPU_REGS_RSI] = regs->rsi; 3132 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
3066 vcpu->arch.regs[VCPU_REGS_RDI] = regs->rdi; 3133 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
3067 vcpu->arch.regs[VCPU_REGS_RSP] = regs->rsp; 3134 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
3068 vcpu->arch.regs[VCPU_REGS_RBP] = regs->rbp; 3135 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
3069#ifdef CONFIG_X86_64 3136#ifdef CONFIG_X86_64
3070 vcpu->arch.regs[VCPU_REGS_R8] = regs->r8; 3137 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
3071 vcpu->arch.regs[VCPU_REGS_R9] = regs->r9; 3138 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
3072 vcpu->arch.regs[VCPU_REGS_R10] = regs->r10; 3139 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
3073 vcpu->arch.regs[VCPU_REGS_R11] = regs->r11; 3140 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
3074 vcpu->arch.regs[VCPU_REGS_R12] = regs->r12; 3141 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
3075 vcpu->arch.regs[VCPU_REGS_R13] = regs->r13; 3142 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
3076 vcpu->arch.regs[VCPU_REGS_R14] = regs->r14; 3143 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
3077 vcpu->arch.regs[VCPU_REGS_R15] = regs->r15; 3144 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
3145
3078#endif 3146#endif
3079 3147
3080 vcpu->arch.rip = regs->rip; 3148 kvm_rip_write(vcpu, regs->rip);
3081 kvm_x86_ops->set_rflags(vcpu, regs->rflags); 3149 kvm_x86_ops->set_rflags(vcpu, regs->rflags);
3082 3150
3083 kvm_x86_ops->decache_regs(vcpu);
3084 3151
3085 vcpu->arch.exception.pending = false; 3152 vcpu->arch.exception.pending = false;
3086 3153
@@ -3294,11 +3361,33 @@ static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
3294 return 0; 3361 return 0;
3295} 3362}
3296 3363
3364static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
3365{
3366 struct kvm_segment segvar = {
3367 .base = selector << 4,
3368 .limit = 0xffff,
3369 .selector = selector,
3370 .type = 3,
3371 .present = 1,
3372 .dpl = 3,
3373 .db = 0,
3374 .s = 1,
3375 .l = 0,
3376 .g = 0,
3377 .avl = 0,
3378 .unusable = 0,
3379 };
3380 kvm_x86_ops->set_segment(vcpu, &segvar, seg);
3381 return 0;
3382}
3383
3297int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 3384int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3298 int type_bits, int seg) 3385 int type_bits, int seg)
3299{ 3386{
3300 struct kvm_segment kvm_seg; 3387 struct kvm_segment kvm_seg;
3301 3388
3389 if (!(vcpu->arch.cr0 & X86_CR0_PE))
3390 return kvm_load_realmode_segment(vcpu, selector, seg);
3302 if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg)) 3391 if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
3303 return 1; 3392 return 1;
3304 kvm_seg.type |= type_bits; 3393 kvm_seg.type |= type_bits;
@@ -3316,17 +3405,16 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
3316 struct tss_segment_32 *tss) 3405 struct tss_segment_32 *tss)
3317{ 3406{
3318 tss->cr3 = vcpu->arch.cr3; 3407 tss->cr3 = vcpu->arch.cr3;
3319 tss->eip = vcpu->arch.rip; 3408 tss->eip = kvm_rip_read(vcpu);
3320 tss->eflags = kvm_x86_ops->get_rflags(vcpu); 3409 tss->eflags = kvm_x86_ops->get_rflags(vcpu);
3321 tss->eax = vcpu->arch.regs[VCPU_REGS_RAX]; 3410 tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
3322 tss->ecx = vcpu->arch.regs[VCPU_REGS_RCX]; 3411 tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
3323 tss->edx = vcpu->arch.regs[VCPU_REGS_RDX]; 3412 tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
3324 tss->ebx = vcpu->arch.regs[VCPU_REGS_RBX]; 3413 tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
3325 tss->esp = vcpu->arch.regs[VCPU_REGS_RSP]; 3414 tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP);
3326 tss->ebp = vcpu->arch.regs[VCPU_REGS_RBP]; 3415 tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP);
3327 tss->esi = vcpu->arch.regs[VCPU_REGS_RSI]; 3416 tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI);
3328 tss->edi = vcpu->arch.regs[VCPU_REGS_RDI]; 3417 tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI);
3329
3330 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); 3418 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
3331 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); 3419 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
3332 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); 3420 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
@@ -3342,17 +3430,17 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
3342{ 3430{
3343 kvm_set_cr3(vcpu, tss->cr3); 3431 kvm_set_cr3(vcpu, tss->cr3);
3344 3432
3345 vcpu->arch.rip = tss->eip; 3433 kvm_rip_write(vcpu, tss->eip);
3346 kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); 3434 kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
3347 3435
3348 vcpu->arch.regs[VCPU_REGS_RAX] = tss->eax; 3436 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
3349 vcpu->arch.regs[VCPU_REGS_RCX] = tss->ecx; 3437 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
3350 vcpu->arch.regs[VCPU_REGS_RDX] = tss->edx; 3438 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx);
3351 vcpu->arch.regs[VCPU_REGS_RBX] = tss->ebx; 3439 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx);
3352 vcpu->arch.regs[VCPU_REGS_RSP] = tss->esp; 3440 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp);
3353 vcpu->arch.regs[VCPU_REGS_RBP] = tss->ebp; 3441 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp);
3354 vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi; 3442 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
3355 vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi; 3443 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
3356 3444
3357 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) 3445 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
3358 return 1; 3446 return 1;
@@ -3380,16 +3468,16 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
3380static void save_state_to_tss16(struct kvm_vcpu *vcpu, 3468static void save_state_to_tss16(struct kvm_vcpu *vcpu,
3381 struct tss_segment_16 *tss) 3469 struct tss_segment_16 *tss)
3382{ 3470{
3383 tss->ip = vcpu->arch.rip; 3471 tss->ip = kvm_rip_read(vcpu);
3384 tss->flag = kvm_x86_ops->get_rflags(vcpu); 3472 tss->flag = kvm_x86_ops->get_rflags(vcpu);
3385 tss->ax = vcpu->arch.regs[VCPU_REGS_RAX]; 3473 tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
3386 tss->cx = vcpu->arch.regs[VCPU_REGS_RCX]; 3474 tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
3387 tss->dx = vcpu->arch.regs[VCPU_REGS_RDX]; 3475 tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
3388 tss->bx = vcpu->arch.regs[VCPU_REGS_RBX]; 3476 tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX);
3389 tss->sp = vcpu->arch.regs[VCPU_REGS_RSP]; 3477 tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP);
3390 tss->bp = vcpu->arch.regs[VCPU_REGS_RBP]; 3478 tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP);
3391 tss->si = vcpu->arch.regs[VCPU_REGS_RSI]; 3479 tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI);
3392 tss->di = vcpu->arch.regs[VCPU_REGS_RDI]; 3480 tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI);
3393 3481
3394 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); 3482 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
3395 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); 3483 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
@@ -3402,16 +3490,16 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
3402static int load_state_from_tss16(struct kvm_vcpu *vcpu, 3490static int load_state_from_tss16(struct kvm_vcpu *vcpu,
3403 struct tss_segment_16 *tss) 3491 struct tss_segment_16 *tss)
3404{ 3492{
3405 vcpu->arch.rip = tss->ip; 3493 kvm_rip_write(vcpu, tss->ip);
3406 kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); 3494 kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
3407 vcpu->arch.regs[VCPU_REGS_RAX] = tss->ax; 3495 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
3408 vcpu->arch.regs[VCPU_REGS_RCX] = tss->cx; 3496 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
3409 vcpu->arch.regs[VCPU_REGS_RDX] = tss->dx; 3497 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
3410 vcpu->arch.regs[VCPU_REGS_RBX] = tss->bx; 3498 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx);
3411 vcpu->arch.regs[VCPU_REGS_RSP] = tss->sp; 3499 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp);
3412 vcpu->arch.regs[VCPU_REGS_RBP] = tss->bp; 3500 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp);
3413 vcpu->arch.regs[VCPU_REGS_RSI] = tss->si; 3501 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
3414 vcpu->arch.regs[VCPU_REGS_RDI] = tss->di; 3502 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
3415 3503
3416 if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) 3504 if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
3417 return 1; 3505 return 1;
@@ -3534,7 +3622,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3534 } 3622 }
3535 3623
3536 kvm_x86_ops->skip_emulated_instruction(vcpu); 3624 kvm_x86_ops->skip_emulated_instruction(vcpu);
3537 kvm_x86_ops->cache_regs(vcpu);
3538 3625
3539 if (nseg_desc.type & 8) 3626 if (nseg_desc.type & 8)
3540 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, 3627 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base,
@@ -3559,7 +3646,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3559 tr_seg.type = 11; 3646 tr_seg.type = 11;
3560 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); 3647 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
3561out: 3648out:
3562 kvm_x86_ops->decache_regs(vcpu);
3563 return ret; 3649 return ret;
3564} 3650}
3565EXPORT_SYMBOL_GPL(kvm_task_switch); 3651EXPORT_SYMBOL_GPL(kvm_task_switch);
@@ -3622,6 +3708,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3622 pr_debug("Set back pending irq %d\n", 3708 pr_debug("Set back pending irq %d\n",
3623 pending_vec); 3709 pending_vec);
3624 } 3710 }
3711 kvm_pic_clear_isr_ack(vcpu->kvm);
3625 } 3712 }
3626 3713
3627 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 3714 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
@@ -3634,6 +3721,12 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3634 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); 3721 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
3635 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 3722 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
3636 3723
3724 /* Older userspace won't unhalt the vcpu on reset. */
3725 if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 &&
3726 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
3727 !(vcpu->arch.cr0 & X86_CR0_PE))
3728 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3729
3637 vcpu_put(vcpu); 3730 vcpu_put(vcpu);
3638 3731
3639 return 0; 3732 return 0;
@@ -3918,6 +4011,7 @@ struct kvm *kvm_arch_create_vm(void)
3918 return ERR_PTR(-ENOMEM); 4011 return ERR_PTR(-ENOMEM);
3919 4012
3920 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 4013 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
4014 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
3921 4015
3922 return kvm; 4016 return kvm;
3923} 4017}
@@ -3950,6 +4044,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
3950 4044
3951void kvm_arch_destroy_vm(struct kvm *kvm) 4045void kvm_arch_destroy_vm(struct kvm *kvm)
3952{ 4046{
4047 kvm_iommu_unmap_guest(kvm);
4048 kvm_free_all_assigned_devices(kvm);
3953 kvm_free_pit(kvm); 4049 kvm_free_pit(kvm);
3954 kfree(kvm->arch.vpic); 4050 kfree(kvm->arch.vpic);
3955 kfree(kvm->arch.vioapic); 4051 kfree(kvm->arch.vioapic);
@@ -3981,7 +4077,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
3981 userspace_addr = do_mmap(NULL, 0, 4077 userspace_addr = do_mmap(NULL, 0,
3982 npages * PAGE_SIZE, 4078 npages * PAGE_SIZE,
3983 PROT_READ | PROT_WRITE, 4079 PROT_READ | PROT_WRITE,
3984 MAP_SHARED | MAP_ANONYMOUS, 4080 MAP_PRIVATE | MAP_ANONYMOUS,
3985 0); 4081 0);
3986 up_write(&current->mm->mmap_sem); 4082 up_write(&current->mm->mmap_sem);
3987 4083