Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini: "ARM: - support for SVE and Pointer Authentication in guests - PMU improvements POWER: - support for direct access to the POWER9 XIVE interrupt controller - memory and performance optimizations x86: - support for accessing memory not backed by struct page - fixes and refactoring Generic: - dirty page tracking improvements" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (155 commits) kvm: fix compilation on aarch64 Revert "KVM: nVMX: Expose RDPMC-exiting only when guest supports PMU" kvm: x86: Fix L1TF mitigation for shadow MMU KVM: nVMX: Disable intercept for FS/GS base MSRs in vmcs02 when possible KVM: PPC: Book3S: Remove useless checks in 'release' method of KVM device KVM: PPC: Book3S HV: XIVE: Fix spelling mistake "acessing" -> "accessing" KVM: PPC: Book3S HV: Make sure to load LPID for radix VCPUs kvm: nVMX: Set nested_run_pending in vmx_set_nested_state after checks complete tests: kvm: Add tests for KVM_SET_NESTED_STATE KVM: nVMX: KVM_SET_NESTED_STATE - Tear down old EVMCS state before setting new state tests: kvm: Add tests for KVM_CAP_MAX_VCPUS and KVM_CAP_MAX_CPU_ID tests: kvm: Add tests to .gitignore KVM: Introduce KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 KVM: Fix kvm_clear_dirty_log_protect off-by-(minus-)one KVM: Fix the bitmap range to copy during clear dirty KVM: arm64: Fix ptrauth ID register masking logic KVM: x86: use direct accessors for RIP and RSP KVM: VMX: Use accessors for GPRs outside of dedicated caching logic KVM: x86: Omit caching logic for always-available GPRs kvm, x86: Properly check whether a pfn is an MMIO or not ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2019-05-17 13:33:30 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2019-05-17 13:33:30 -0400
commit: 0ef0fd351550130129bbdb77362488befd7b69d2 (patch)
tree: 23186172f5f85c06e18e3ee1a9619879df03c5df /virt
parent: 4489da7183099f569a7d3dd819c975073c04bc72 (diff)
parent: c011d23ba046826ccf8c4a4a6c1d01c9ccaa1403 (diff)
3 files changed, 131 insertions, 18 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index ea434ddc8499..aad9284c043a 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -57,3 +57,6 @@ config HAVE_KVM_VCPU_ASYNC_IOCTL
 config HAVE_KVM_VCPU_RUN_PID_CHANGE
       bool
+config HAVE_KVM_NO_POLL
+       bool
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index f412ebc90610..90cedebaeb94 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -56,7 +56,7 @@
 __asm__(".arch_extension        virt");
 #endif
-DEFINE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
+DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data);
 static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
 /* Per-CPU variable containing the currently running vcpu. */
@@ -224,9 +224,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_MAX_VCPUS:
                r = KVM_MAX_VCPUS;
                break;
-        case KVM_CAP_NR_MEMSLOTS:
-                r = KVM_USER_MEM_SLOTS;
-                break;
        case KVM_CAP_MSI_DEVID:
                if (!kvm)
                        r = -EINVAL;
@@ -360,8 +357,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        int *last_ran;
+        kvm_host_data_t *cpu_data;
        last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran);
+        cpu_data = this_cpu_ptr(&kvm_host_data);
        /*
         * We might get preempted before the vCPU actually runs, but
@@ -373,18 +372,21 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        }
        vcpu->cpu = cpu;
-        vcpu->arch.host_cpu_context = this_cpu_ptr(&kvm_host_cpu_state);
+        vcpu->arch.host_cpu_context = &cpu_data->host_ctxt;
        kvm_arm_set_running_vcpu(vcpu);
        kvm_vgic_load(vcpu);
        kvm_timer_vcpu_load(vcpu);
        kvm_vcpu_load_sysregs(vcpu);
        kvm_arch_vcpu_load_fp(vcpu);
+        kvm_vcpu_pmu_restore_guest(vcpu);
        if (single_task_running())
                vcpu_clear_wfe_traps(vcpu);
        else
                vcpu_set_wfe_traps(vcpu);
+        vcpu_ptrauth_setup_lazy(vcpu);
 }
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -393,6 +395,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
        kvm_vcpu_put_sysregs(vcpu);
        kvm_timer_vcpu_put(vcpu);
        kvm_vgic_put(vcpu);
+        kvm_vcpu_pmu_restore_host(vcpu);
        vcpu->cpu = -1;
@@ -545,6 +548,9 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
        if (likely(vcpu->arch.has_run_once))
                return 0;
+        if (!kvm_arm_vcpu_is_finalized(vcpu))
+                return -EPERM;
        vcpu->arch.has_run_once = true;
        if (likely(irqchip_in_kernel(kvm))) {
@@ -1121,6 +1127,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                if (unlikely(!kvm_vcpu_initialized(vcpu)))
                        break;
+                r = -EPERM;
+                if (!kvm_arm_vcpu_is_finalized(vcpu))
+                        break;
                r = -EFAULT;
                if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
                        break;
@@ -1174,6 +1184,17 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                return kvm_arm_vcpu_set_events(vcpu, &events);
        }
+        case KVM_ARM_VCPU_FINALIZE: {
+                int what;
+                if (!kvm_vcpu_initialized(vcpu))
+                        return -ENOEXEC;
+                if (get_user(what, (const int __user *)argp))
+                        return -EFAULT;
+                return kvm_arm_vcpu_finalize(vcpu, what);
+        }
        default:
                r = -EINVAL;
        }
@@ -1554,11 +1575,11 @@ static int init_hyp_mode(void)
        }
        for_each_possible_cpu(cpu) {
-                kvm_cpu_context_t *cpu_ctxt;
+                kvm_host_data_t *cpu_data;
-                cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu);
+                cpu_data = per_cpu_ptr(&kvm_host_data, cpu);
-                kvm_init_host_cpu_context(cpu_ctxt, cpu);
+                kvm_init_host_cpu_context(&cpu_data->host_ctxt, cpu);
-                err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP);
+                err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP);
                if (err) {
                        kvm_err("Cannot map host CPU state: %d\n", err);
@@ -1669,6 +1690,10 @@ int kvm_arch_init(void *opaque)
        if (err)
                return err;
+        err = kvm_arm_init_sve();
+        if (err)
+                return err;
        if (!in_hyp_mode) {
                err = init_hyp_mode();
                if (err)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5fb0f1656a96..f0d13d9d125d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -51,9 +51,9 @@
 #include <linux/slab.h>
 #include <linux/sort.h>
 #include <linux/bsearch.h>
+#include <linux/io.h>
 #include <asm/processor.h>
-#include <asm/io.h>
 #include <asm/ioctl.h>
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
@@ -1135,11 +1135,11 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
 #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
 /**
- * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
+ * kvm_get_dirty_log_protect - get a snapshot of dirty pages
 *      and reenable dirty page tracking for the corresponding pages.
 * @kvm:        pointer to kvm instance
 * @log:        slot id and address to which we copy the log
- * @is_dirty:   flag set if any page is dirty
+ * @flush:      true if TLB flush is needed by caller
 *
 * We need to keep it in mind that VCPU threads can write to the bitmap
 * concurrently. So, to avoid losing track of dirty pages we keep the
@@ -1224,6 +1224,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
 *      and reenable dirty page tracking for the corresponding pages.
 * @kvm:        pointer to kvm instance
 * @log:        slot id and address from which to fetch the bitmap of dirty pages
+ * @flush:      true if TLB flush is needed by caller
 */
 int kvm_clear_dirty_log_protect(struct kvm *kvm,
                                struct kvm_clear_dirty_log *log, bool *flush)
@@ -1251,7 +1252,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
        if (!dirty_bitmap)
                return -ENOENT;
-        n = kvm_dirty_bitmap_bytes(memslot);
+        n = ALIGN(log->num_pages, BITS_PER_LONG) / 8;
        if (log->first_page > memslot->npages ||
            log->num_pages > memslot->npages - log->first_page ||
@@ -1264,8 +1265,8 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
                return -EFAULT;
        spin_lock(&kvm->mmu_lock);
-        for (offset = log->first_page,
+        for (offset = log->first_page, i = offset / BITS_PER_LONG,
-             i = offset / BITS_PER_LONG, n = log->num_pages / BITS_PER_LONG; n--;
+                 n = DIV_ROUND_UP(log->num_pages, BITS_PER_LONG); n--;
             i++, offset += BITS_PER_LONG) {
                unsigned long mask = *dirty_bitmap_buffer++;
                atomic_long_t *p = (atomic_long_t *) &dirty_bitmap[i];
@@ -1742,6 +1743,70 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(gfn_to_page);
+static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn,
+                         struct kvm_host_map *map)
+{
+        kvm_pfn_t pfn;
+        void *hva = NULL;
+        struct page *page = KVM_UNMAPPED_PAGE;
+        if (!map)
+                return -EINVAL;
+        pfn = gfn_to_pfn_memslot(slot, gfn);
+        if (is_error_noslot_pfn(pfn))
+                return -EINVAL;
+        if (pfn_valid(pfn)) {
+                page = pfn_to_page(pfn);
+                hva = kmap(page);
+        } else {
+                hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
+        }
+        if (!hva)
+                return -EFAULT;
+        map->page = page;
+        map->hva = hva;
+        map->pfn = pfn;
+        map->gfn = gfn;
+        return 0;
+}
+int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
+{
+        return __kvm_map_gfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, map);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_map);
+void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
+                    bool dirty)
+{
+        if (!map)
+                return;
+        if (!map->hva)
+                return;
+        if (map->page)
+                kunmap(map->page);
+        else
+                memunmap(map->hva);
+        if (dirty) {
+                kvm_vcpu_mark_page_dirty(vcpu, map->gfn);
+                kvm_release_pfn_dirty(map->pfn);
+        } else {
+                kvm_release_pfn_clean(map->pfn);
+        }
+        map->hva = NULL;
+        map->page = NULL;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
 struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
        kvm_pfn_t pfn;
@@ -2255,7 +2320,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
        u64 block_ns;
        start = cur = ktime_get();
-        if (vcpu->halt_poll_ns) {
+        if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) {
                ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
                ++vcpu->stat.halt_attempted_poll;
@@ -2886,6 +2951,16 @@ out:
 }
 #endif
+static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+        struct kvm_device *dev = filp->private_data;
+        if (dev->ops->mmap)
+                return dev->ops->mmap(dev, vma);
+        return -ENODEV;
+}
 static int kvm_device_ioctl_attr(struct kvm_device *dev,
                                 int (*accessor)(struct kvm_device *dev,
                                                 struct kvm_device_attr *attr),
@@ -2930,6 +3005,13 @@ static int kvm_device_release(struct inode *inode, struct file *filp)
        struct kvm_device *dev = filp->private_data;
        struct kvm *kvm = dev->kvm;
+        if (dev->ops->release) {
+                mutex_lock(&kvm->lock);
+                list_del(&dev->vm_node);
+                dev->ops->release(dev);
+                mutex_unlock(&kvm->lock);
+        }
        kvm_put_kvm(kvm);
        return 0;
 }
@@ -2938,6 +3020,7 @@ static const struct file_operations kvm_device_fops = {
        .unlocked_ioctl = kvm_device_ioctl,
        .release = kvm_device_release,
        KVM_COMPAT(kvm_device_ioctl),
+        .mmap = kvm_device_mmap,
 };
 struct kvm_device *kvm_device_from_filp(struct file *filp)
@@ -3046,7 +3129,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
        case KVM_CAP_CHECK_EXTENSION_VM:
        case KVM_CAP_ENABLE_CAP_VM:
 #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
-        case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT:
+        case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2:
 #endif
                return 1;
 #ifdef CONFIG_KVM_MMIO
@@ -3065,6 +3148,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #endif
        case KVM_CAP_MAX_VCPU_ID:
                return KVM_MAX_VCPU_ID;
+        case KVM_CAP_NR_MEMSLOTS:
+                return KVM_USER_MEM_SLOTS;
        default:
                break;
        }
@@ -3082,7 +3167,7 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
 {
        switch (cap->cap) {
 #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
-        case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT:
+        case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2:
                if (cap->flags || (cap->args[0] & ~1))
                        return -EINVAL;
                kvm->manual_dirty_log_protect = cap->args[0];
author	Linus Torvalds <torvalds@linux-foundation.org>	2019-05-17 13:33:30 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2019-05-17 13:33:30 -0400
commit	0ef0fd351550130129bbdb77362488befd7b69d2 (patch)
tree	23186172f5f85c06e18e3ee1a9619879df03c5df /virt
parent	4489da7183099f569a7d3dd819c975073c04bc72 (diff)
parent	c011d23ba046826ccf8c4a4a6c1d01c9ccaa1403 (diff)