Merge tag 'kvm-3.7-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Avi Kivity: "Highlights of the changes for this release include support for vfio level triggered interrupts, improved big real mode support on older Intels, a streamlines guest page table walker, guest APIC speedups, PIO optimizations, better overcommit handling, and read-only memory." * tag 'kvm-3.7-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (138 commits) KVM: s390: Fix vcpu_load handling in interrupt code KVM: x86: Fix guest debug across vcpu INIT reset KVM: Add resampling irqfds for level triggered interrupts KVM: optimize apic interrupt delivery KVM: MMU: Eliminate pointless temporary 'ac' KVM: MMU: Avoid access/dirty update loop if all is well KVM: MMU: Eliminate eperm temporary KVM: MMU: Optimize is_last_gpte() KVM: MMU: Simplify walk_addr_generic() loop KVM: MMU: Optimize pte permission checks KVM: MMU: Update accessed and dirty bits after guest pagetable walk KVM: MMU: Move gpte_access() out of paging_tmpl.h KVM: MMU: Optimize gpte_access() slightly KVM: MMU: Push clean gpte write protection out of gpte_access() KVM: clarify kvmclock documentation KVM: make processes waiting on vcpu mutex killable KVM: SVM: Make use of asm.h KVM: VMX: Make use of asm.h KVM: VMX: Make lto-friendly KVM: x86: lapic: Clean up find_highest_vector() and count_vectors() ... Conflicts: arch/s390/include/asm/processor.h arch/x86/kvm/i8259.c
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-10-04 12:30:33 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-10-04 12:30:33 -0400
commit: ecefbd94b834fa32559d854646d777c56749ef1c (patch)
tree: ca8958900ad9e208a8e5fb7704f1b66dc76131b4 /virt
parent: ce57e981f2b996aaca2031003b3f866368307766 (diff)
parent: 3d11df7abbff013b811d5615320580cd5d9d7d31 (diff)
7 files changed, 525 insertions, 250 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 28694f4a913..d01b24b72c6 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -21,3 +21,6 @@ config KVM_ASYNC_PF
 config HAVE_KVM_MSI
       bool
+config HAVE_KVM_CPU_RELAX_INTERCEPT
+       bool
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 74268b4c2ee..ea475cd0351 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -111,8 +111,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
                        list_entry(vcpu->async_pf.done.next,
                                   typeof(*work), link);
                list_del(&work->link);
-                if (work->page)
+                if (!is_error_page(work->page))
-                        put_page(work->page);
+                        kvm_release_page_clean(work->page);
                kmem_cache_free(async_pf_cache, work);
        }
        spin_unlock(&vcpu->async_pf.lock);
@@ -138,8 +138,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
                list_del(&work->queue);
                vcpu->async_pf.queued--;
-                if (work->page)
+                if (!is_error_page(work->page))
-                        put_page(work->page);
+                        kvm_release_page_clean(work->page);
                kmem_cache_free(async_pf_cache, work);
        }
 }
@@ -203,8 +203,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
        if (!work)
                return -ENOMEM;
-        work->page = bad_page;
+        work->page = KVM_ERR_PTR_BAD_PAGE;
-        get_page(bad_page);
        INIT_LIST_HEAD(&work->queue); /* for list_del to work */
        spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 67a35e90384..9718e98d6d2 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -43,6 +43,31 @@
 * --------------------------------------------------------------------
 */
+/*
+ * Resampling irqfds are a special variety of irqfds used to emulate
+ * level triggered interrupts.  The interrupt is asserted on eventfd
+ * trigger.  On acknowledgement through the irq ack notifier, the
+ * interrupt is de-asserted and userspace is notified through the
+ * resamplefd.  All resamplers on the same gsi are de-asserted
+ * together, so we don't need to track the state of each individual
+ * user.  We can also therefore share the same irq source ID.
+ */
+struct _irqfd_resampler {
+        struct kvm *kvm;
+        /*
+         * List of resampling struct _irqfd objects sharing this gsi.
+         * RCU list modified under kvm->irqfds.resampler_lock
+         */
+        struct list_head list;
+        struct kvm_irq_ack_notifier notifier;
+        /*
+         * Entry in list of kvm->irqfd.resampler_list.  Use for sharing
+         * resamplers among irqfds on the same gsi.
+         * Accessed and modified under kvm->irqfds.resampler_lock
+         */
+        struct list_head link;
+};
 struct _irqfd {
        /* Used for MSI fast-path */
        struct kvm *kvm;
@@ -52,6 +77,12 @@ struct _irqfd {
        /* Used for level IRQ fast-path */
        int gsi;
        struct work_struct inject;
+        /* The resampler used by this irqfd (resampler-only) */
+        struct _irqfd_resampler *resampler;
+        /* Eventfd notified on resample (resampler-only) */
+        struct eventfd_ctx *resamplefd;
+        /* Entry in list of irqfds for a resampler (resampler-only) */
+        struct list_head resampler_link;
        /* Used for setup/shutdown */
        struct eventfd_ctx *eventfd;
        struct list_head list;
@@ -67,8 +98,58 @@ irqfd_inject(struct work_struct *work)
        struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
        struct kvm *kvm = irqfd->kvm;
-        kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
+        if (!irqfd->resampler) {
-        kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
+                kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
+                kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
+        } else
+                kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+                            irqfd->gsi, 1);
+}
+/*
+ * Since resampler irqfds share an IRQ source ID, we de-assert once
+ * then notify all of the resampler irqfds using this GSI.  We can't
+ * do multiple de-asserts or we risk racing with incoming re-asserts.
+ */
+static void
+irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
+{
+        struct _irqfd_resampler *resampler;
+        struct _irqfd *irqfd;
+        resampler = container_of(kian, struct _irqfd_resampler, notifier);
+        kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+                    resampler->notifier.gsi, 0);
+        rcu_read_lock();
+        list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
+                eventfd_signal(irqfd->resamplefd, 1);
+        rcu_read_unlock();
+}
+static void
+irqfd_resampler_shutdown(struct _irqfd *irqfd)
+{
+        struct _irqfd_resampler *resampler = irqfd->resampler;
+        struct kvm *kvm = resampler->kvm;
+        mutex_lock(&kvm->irqfds.resampler_lock);
+        list_del_rcu(&irqfd->resampler_link);
+        synchronize_rcu();
+        if (list_empty(&resampler->list)) {
+                list_del(&resampler->link);
+                kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
+                kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+                            resampler->notifier.gsi, 0);
+                kfree(resampler);
+        }
+        mutex_unlock(&kvm->irqfds.resampler_lock);
 }
 /*
@@ -92,6 +173,11 @@ irqfd_shutdown(struct work_struct *work)
         */
        flush_work(&irqfd->inject);
+        if (irqfd->resampler) {
+                irqfd_resampler_shutdown(irqfd);
+                eventfd_ctx_put(irqfd->resamplefd);
+        }
        /*
         * It is now safe to release the object's resources
         */
@@ -203,7 +289,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
        struct kvm_irq_routing_table *irq_rt;
        struct _irqfd *irqfd, *tmp;
        struct file *file = NULL;
-        struct eventfd_ctx *eventfd = NULL;
+        struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
        int ret;
        unsigned int events;
@@ -231,6 +317,54 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
        irqfd->eventfd = eventfd;
+        if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
+                struct _irqfd_resampler *resampler;
+                resamplefd = eventfd_ctx_fdget(args->resamplefd);
+                if (IS_ERR(resamplefd)) {
+                        ret = PTR_ERR(resamplefd);
+                        goto fail;
+                }
+                irqfd->resamplefd = resamplefd;
+                INIT_LIST_HEAD(&irqfd->resampler_link);
+                mutex_lock(&kvm->irqfds.resampler_lock);
+                list_for_each_entry(resampler,
+                                    &kvm->irqfds.resampler_list, list) {
+                        if (resampler->notifier.gsi == irqfd->gsi) {
+                                irqfd->resampler = resampler;
+                                break;
+                        }
+                }
+                if (!irqfd->resampler) {
+                        resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
+                        if (!resampler) {
+                                ret = -ENOMEM;
+                                mutex_unlock(&kvm->irqfds.resampler_lock);
+                                goto fail;
+                        }
+                        resampler->kvm = kvm;
+                        INIT_LIST_HEAD(&resampler->list);
+                        resampler->notifier.gsi = irqfd->gsi;
+                        resampler->notifier.irq_acked = irqfd_resampler_ack;
+                        INIT_LIST_HEAD(&resampler->link);
+                        list_add(&resampler->link, &kvm->irqfds.resampler_list);
+                        kvm_register_irq_ack_notifier(kvm,
+                                                      &resampler->notifier);
+                        irqfd->resampler = resampler;
+                }
+                list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
+                synchronize_rcu();
+                mutex_unlock(&kvm->irqfds.resampler_lock);
+        }
        /*
         * Install our own custom wake-up handling so we are notified via
         * a callback whenever someone signals the underlying eventfd
@@ -276,6 +410,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
        return 0;
 fail:
+        if (irqfd->resampler)
+                irqfd_resampler_shutdown(irqfd);
+        if (resamplefd && !IS_ERR(resamplefd))
+                eventfd_ctx_put(resamplefd);
        if (eventfd && !IS_ERR(eventfd))
                eventfd_ctx_put(eventfd);
@@ -291,6 +431,8 @@ kvm_eventfd_init(struct kvm *kvm)
 {
        spin_lock_init(&kvm->irqfds.lock);
        INIT_LIST_HEAD(&kvm->irqfds.items);
+        INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
+        mutex_init(&kvm->irqfds.resampler_lock);
        INIT_LIST_HEAD(&kvm->ioeventfds);
 }
@@ -340,7 +482,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
 int
 kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 {
-        if (args->flags & ~KVM_IRQFD_FLAG_DEASSIGN)
+        if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
                return -EINVAL;
        if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index ef61d529a6c..cfb7e4d52dc 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -197,28 +197,29 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
        u32 old_irr;
        u32 mask = 1 << irq;
        union kvm_ioapic_redirect_entry entry;
-        int ret = 1;
+        int ret, irq_level;
+        BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
        spin_lock(&ioapic->lock);
        old_irr = ioapic->irr;
-        if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
+        irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
-                int irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
+                                         irq_source_id, level);
-                                                     irq_source_id, level);
+        entry = ioapic->redirtbl[irq];
-                entry = ioapic->redirtbl[irq];
+        irq_level ^= entry.fields.polarity;
-                irq_level ^= entry.fields.polarity;
+        if (!irq_level) {
-                if (!irq_level)
+                ioapic->irr &= ~mask;
-                        ioapic->irr &= ~mask;
+                ret = 1;
-                else {
+        } else {
-                        int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+                int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
-                        ioapic->irr |= mask;
+                ioapic->irr |= mask;
-                        if ((edge && old_irr != ioapic->irr) ||
+                if ((edge && old_irr != ioapic->irr) ||
-                            (!edge && !entry.fields.remote_irr))
+                    (!edge && !entry.fields.remote_irr))
-                                ret = ioapic_service(ioapic, irq);
+                        ret = ioapic_service(ioapic, irq);
-                        else
+                else
-                                ret = 0; /* report coalesced interrupt */
+                        ret = 0; /* report coalesced interrupt */
-                }
-                trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
        }
+        trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
        spin_unlock(&ioapic->lock);
        return ret;
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index e9fff9830bf..037cb6730e6 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -42,13 +42,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
 static void kvm_iommu_put_pages(struct kvm *kvm,
                                gfn_t base_gfn, unsigned long npages);
-static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
+static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
-                           gfn_t gfn, unsigned long size)
+                           unsigned long size)
 {
        gfn_t end_gfn;
        pfn_t pfn;
-        pfn     = gfn_to_pfn_memslot(kvm, slot, gfn);
+        pfn     = gfn_to_pfn_memslot(slot, gfn);
        end_gfn = gfn + (size >> PAGE_SHIFT);
        gfn    += 1;
@@ -56,7 +56,7 @@ static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
                return pfn;
        while (gfn < end_gfn)
-                gfn_to_pfn_memslot(kvm, slot, gfn++);
+                gfn_to_pfn_memslot(slot, gfn++);
        return pfn;
 }
@@ -105,7 +105,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
                 * Pin all pages we are about to map in memory. This is
                 * important because we unmap and unpin in 4kb steps later.
                 */
-                pfn = kvm_pin_pages(kvm, slot, gfn, page_size);
+                pfn = kvm_pin_pages(slot, gfn, page_size);
                if (is_error_pfn(pfn)) {
                        gfn += 1;
                        continue;
@@ -300,6 +300,12 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
                /* Get physical address */
                phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
+                if (!phys) {
+                        gfn++;
+                        continue;
+                }
                pfn  = phys >> PAGE_SHIFT;
                /* Unmap address from IO address space */
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 83402d74a76..2eb58af7ee9 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -68,8 +68,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
        struct kvm_vcpu *vcpu, *lowest = NULL;
        if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
-                        kvm_is_dm_lowest_prio(irq))
+                        kvm_is_dm_lowest_prio(irq)) {
                printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
+                irq->delivery_mode = APIC_DM_FIXED;
+        }
+        if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r))
+                return r;
        kvm_for_each_vcpu(i, vcpu, kvm) {
                if (!kvm_apic_present(vcpu))
@@ -223,6 +228,9 @@ int kvm_request_irq_source_id(struct kvm *kvm)
        }
        ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+#ifdef CONFIG_X86
+        ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
+#endif
        set_bit(irq_source_id, bitmap);
 unlock:
        mutex_unlock(&kvm->irq_lock);
@@ -233,6 +241,9 @@ unlock:
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 {
        ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+#ifdef CONFIG_X86
+        ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
+#endif
        mutex_lock(&kvm->irq_lock);
        if (irq_source_id < 0 ||
@@ -321,11 +332,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
                switch (ue->u.irqchip.irqchip) {
                case KVM_IRQCHIP_PIC_MASTER:
                        e->set = kvm_set_pic_irq;
-                        max_pin = 16;
+                        max_pin = PIC_NUM_PINS;
                        break;
                case KVM_IRQCHIP_PIC_SLAVE:
                        e->set = kvm_set_pic_irq;
-                        max_pin = 16;
+                        max_pin = PIC_NUM_PINS;
                        delta = 8;
                        break;
                case KVM_IRQCHIP_IOAPIC:
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d617f69131d..c353b4599ce 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -100,13 +100,7 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
 static bool largepages_enabled = true;
-static struct page *hwpoison_page;
+bool kvm_is_mmio_pfn(pfn_t pfn)
-static pfn_t hwpoison_pfn;
-struct page *fault_page;
-pfn_t fault_pfn;
-inline int kvm_is_mmio_pfn(pfn_t pfn)
 {
        if (pfn_valid(pfn)) {
                int reserved;
@@ -137,11 +131,12 @@ inline int kvm_is_mmio_pfn(pfn_t pfn)
 /*
 * Switches to specified vcpu, until a matching vcpu_put()
 */
-void vcpu_load(struct kvm_vcpu *vcpu)
+int vcpu_load(struct kvm_vcpu *vcpu)
 {
        int cpu;
-        mutex_lock(&vcpu->mutex);
+        if (mutex_lock_killable(&vcpu->mutex))
+                return -EINTR;
        if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
                /* The thread running this VCPU changed. */
                struct pid *oldpid = vcpu->pid;
@@ -154,6 +149,7 @@ void vcpu_load(struct kvm_vcpu *vcpu)
        preempt_notifier_register(&vcpu->preempt_notifier);
        kvm_arch_vcpu_load(vcpu, cpu);
        put_cpu();
+        return 0;
 }
 void vcpu_put(struct kvm_vcpu *vcpu)
@@ -236,6 +232,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
        }
        vcpu->run = page_address(page);
+        kvm_vcpu_set_in_spin_loop(vcpu, false);
+        kvm_vcpu_set_dy_eligible(vcpu, false);
        r = kvm_arch_vcpu_init(vcpu);
        if (r < 0)
                goto fail_free_run;
@@ -332,8 +331,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
         * count is also read inside the mmu_lock critical section.
         */
        kvm->mmu_notifier_count++;
-        for (; start < end; start += PAGE_SIZE)
+        need_tlb_flush = kvm_unmap_hva_range(kvm, start, end);
-                need_tlb_flush |= kvm_unmap_hva(kvm, start);
        need_tlb_flush |= kvm->tlbs_dirty;
        /* we've to flush the tlb before the pages can be freed */
        if (need_tlb_flush)
@@ -412,7 +410,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
        int idx;
        idx = srcu_read_lock(&kvm->srcu);
-        kvm_arch_flush_shadow(kvm);
+        kvm_arch_flush_shadow_all(kvm);
        srcu_read_unlock(&kvm->srcu, idx);
 }
@@ -551,16 +549,12 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
                                  struct kvm_memory_slot *dont)
 {
-        if (!dont || free->rmap != dont->rmap)
-                vfree(free->rmap);
        if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
                kvm_destroy_dirty_bitmap(free);
        kvm_arch_free_memslot(free, dont);
        free->npages = 0;
-        free->rmap = NULL;
 }
 void kvm_free_physmem(struct kvm *kvm)
@@ -590,7 +584,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
        mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
 #else
-        kvm_arch_flush_shadow(kvm);
+        kvm_arch_flush_shadow_all(kvm);
 #endif
        kvm_arch_destroy_vm(kvm);
        kvm_free_physmem(kvm);
@@ -686,6 +680,20 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)
        slots->generation++;
 }
+static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
+{
+        u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
+#ifdef KVM_CAP_READONLY_MEM
+        valid_flags |= KVM_MEM_READONLY;
+#endif
+        if (mem->flags & ~valid_flags)
+                return -EINVAL;
+        return 0;
+}
 /*
 * Allocate some memory and give it an address in the guest physical address
 * space.
@@ -706,6 +714,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
        struct kvm_memory_slot old, new;
        struct kvm_memslots *slots, *old_memslots;
+        r = check_memory_region_flags(mem);
+        if (r)
+                goto out;
        r = -EINVAL;
        /* General sanity checks */
        if (mem->memory_size & (PAGE_SIZE - 1))
@@ -769,11 +781,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
        if (npages && !old.npages) {
                new.user_alloc = user_alloc;
                new.userspace_addr = mem->userspace_addr;
-#ifndef CONFIG_S390
-                new.rmap = vzalloc(npages * sizeof(*new.rmap));
-                if (!new.rmap)
-                        goto out_free;
-#endif /* not defined CONFIG_S390 */
                if (kvm_arch_create_memslot(&new, npages))
                        goto out_free;
        }
@@ -785,7 +793,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
                /* destroy any largepage mappings for dirty tracking */
        }
-        if (!npages) {
+        if (!npages || base_gfn != old.base_gfn) {
                struct kvm_memory_slot *slot;
                r = -ENOMEM;
@@ -801,14 +809,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
                old_memslots = kvm->memslots;
                rcu_assign_pointer(kvm->memslots, slots);
                synchronize_srcu_expedited(&kvm->srcu);
-                /* From this point no new shadow pages pointing to a deleted
+                /* From this point no new shadow pages pointing to a deleted,
-                 * memslot will be created.
+                 * or moved, memslot will be created.
                 *
                 * validation of sp->gfn happens in:
                 *      - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
                 *      - kvm_is_visible_gfn (mmu_check_roots)
                 */
-                kvm_arch_flush_shadow(kvm);
+                kvm_arch_flush_shadow_memslot(kvm, slot);
                kfree(old_memslots);
        }
@@ -832,7 +840,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
        /* actual memory is freed via old in kvm_free_physmem_slot below */
        if (!npages) {
-                new.rmap = NULL;
                new.dirty_bitmap = NULL;
                memset(&new.arch, 0, sizeof(new.arch));
        }
@@ -844,13 +851,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
        kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
-        /*
-         * If the new memory slot is created, we need to clear all
-         * mmio sptes.
-         */
-        if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT)
-                kvm_arch_flush_shadow(kvm);
        kvm_free_physmem_slot(&old, &new);
        kfree(old_memslots);
@@ -932,53 +932,6 @@ void kvm_disable_largepages(void)
 }
 EXPORT_SYMBOL_GPL(kvm_disable_largepages);
-int is_error_page(struct page *page)
-{
-        return page == bad_page || page == hwpoison_page || page == fault_page;
-}
-EXPORT_SYMBOL_GPL(is_error_page);
-int is_error_pfn(pfn_t pfn)
-{
-        return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn;
-}
-EXPORT_SYMBOL_GPL(is_error_pfn);
-int is_hwpoison_pfn(pfn_t pfn)
-{
-        return pfn == hwpoison_pfn;
-}
-EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
-int is_fault_pfn(pfn_t pfn)
-{
-        return pfn == fault_pfn;
-}
-EXPORT_SYMBOL_GPL(is_fault_pfn);
-int is_noslot_pfn(pfn_t pfn)
-{
-        return pfn == bad_pfn;
-}
-EXPORT_SYMBOL_GPL(is_noslot_pfn);
-int is_invalid_pfn(pfn_t pfn)
-{
-        return pfn == hwpoison_pfn || pfn == fault_pfn;
-}
-EXPORT_SYMBOL_GPL(is_invalid_pfn);
-static inline unsigned long bad_hva(void)
-{
-        return PAGE_OFFSET;
-}
-int kvm_is_error_hva(unsigned long addr)
-{
-        return addr == bad_hva();
-}
-EXPORT_SYMBOL_GPL(kvm_is_error_hva);
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
 {
        return __gfn_to_memslot(kvm_memslots(kvm), gfn);
@@ -1021,28 +974,62 @@ out:
        return size;
 }
-static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
+static bool memslot_is_readonly(struct kvm_memory_slot *slot)
-                                     gfn_t *nr_pages)
+{
+        return slot->flags & KVM_MEM_READONLY;
+}
+static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
+                                       gfn_t *nr_pages, bool write)
 {
        if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
-                return bad_hva();
+                return KVM_HVA_ERR_BAD;
+        if (memslot_is_readonly(slot) && write)
+                return KVM_HVA_ERR_RO_BAD;
        if (nr_pages)
                *nr_pages = slot->npages - (gfn - slot->base_gfn);
-        return gfn_to_hva_memslot(slot, gfn);
+        return __gfn_to_hva_memslot(slot, gfn);
 }
+static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
+                                     gfn_t *nr_pages)
+{
+        return __gfn_to_hva_many(slot, gfn, nr_pages, true);
+}
+unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
+                                 gfn_t gfn)
+{
+        return gfn_to_hva_many(slot, gfn, NULL);
+}
+EXPORT_SYMBOL_GPL(gfn_to_hva_memslot);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 {
        return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
 }
 EXPORT_SYMBOL_GPL(gfn_to_hva);
-static pfn_t get_fault_pfn(void)
+/*
+ * The hva returned by this function is only allowed to be read.
+ * It should pair with kvm_read_hva() or kvm_read_hva_atomic().
+ */
+static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn)
+{
+        return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
+}
+static int kvm_read_hva(void *data, void __user *hva, int len)
 {
-        get_page(fault_page);
+        return __copy_from_user(data, hva, len);
-        return fault_pfn;
+}
+static int kvm_read_hva_atomic(void *data, void __user *hva, int len)
+{
+        return __copy_from_user_inatomic(data, hva, len);
 }
 int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
@@ -1065,108 +1052,186 @@ static inline int check_user_page_hwpoison(unsigned long addr)
        return rc == -EHWPOISON;
 }
-static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
+/*
-                        bool *async, bool write_fault, bool *writable)
+ * The atomic path to get the writable pfn which will be stored in @pfn,
+ * true indicates success, otherwise false is returned.
+ */
+static bool hva_to_pfn_fast(unsigned long addr, bool atomic, bool *async,
+                            bool write_fault, bool *writable, pfn_t *pfn)
 {
        struct page *page[1];
-        int npages = 0;
+        int npages;
-        pfn_t pfn;
-        /* we can do it either atomically or asynchronously, not both */
+        if (!(async || atomic))
-        BUG_ON(atomic && async);
+                return false;
-        BUG_ON(!write_fault && !writable);
+        /*
+         * Fast pin a writable pfn only if it is a write fault request
+         * or the caller allows to map a writable pfn for a read fault
+         * request.
+         */
+        if (!(write_fault || writable))
+                return false;
-        if (writable)
+        npages = __get_user_pages_fast(addr, 1, 1, page);
-                *writable = true;
+        if (npages == 1) {
+                *pfn = page_to_pfn(page[0]);
-        if (atomic || async)
+                if (writable)
-                npages = __get_user_pages_fast(addr, 1, 1, page);
+                        *writable = true;
+                return true;
+        }
-        if (unlikely(npages != 1) && !atomic) {
+        return false;
-                might_sleep();
+}
-                if (writable)
+/*
-                        *writable = write_fault;
+ * The slow path to get the pfn of the specified host virtual address,
+ * 1 indicates success, -errno is returned if error is detected.
+ */
+static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
+                           bool *writable, pfn_t *pfn)
+{
+        struct page *page[1];
+        int npages = 0;
-                if (async) {
+        might_sleep();
-                        down_read(&current->mm->mmap_sem);
-                        npages = get_user_page_nowait(current, current->mm,
+        if (writable)
-                                                     addr, write_fault, page);
+                *writable = write_fault;
-                        up_read(&current->mm->mmap_sem);
-                } else
+        if (async) {
-                        npages = get_user_pages_fast(addr, 1, write_fault,
+                down_read(&current->mm->mmap_sem);
-                                                     page);
+                npages = get_user_page_nowait(current, current->mm,
+                                              addr, write_fault, page);
-                /* map read fault as writable if possible */
+                up_read(&current->mm->mmap_sem);
-                if (unlikely(!write_fault) && npages == 1) {
+        } else
-                        struct page *wpage[1];
+                npages = get_user_pages_fast(addr, 1, write_fault,
+                                             page);
-                        npages = __get_user_pages_fast(addr, 1, 1, wpage);
+        if (npages != 1)
-                        if (npages == 1) {
+                return npages;
-                                *writable = true;
-                                put_page(page[0]);
+        /* map read fault as writable if possible */
-                                page[0] = wpage[0];
+        if (unlikely(!write_fault) && writable) {
-                        }
+                struct page *wpage[1];
-                        npages = 1;
+                npages = __get_user_pages_fast(addr, 1, 1, wpage);
+                if (npages == 1) {
+                        *writable = true;
+                        put_page(page[0]);
+                        page[0] = wpage[0];
                }
+                npages = 1;
        }
+        *pfn = page_to_pfn(page[0]);
+        return npages;
+}
-        if (unlikely(npages != 1)) {
+static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
-                struct vm_area_struct *vma;
+{
+        if (unlikely(!(vma->vm_flags & VM_READ)))
+                return false;
-                if (atomic)
+        if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE))))
-                        return get_fault_pfn();
+                return false;
-                down_read(&current->mm->mmap_sem);
+        return true;
-                if (npages == -EHWPOISON ||
+}
-                        (!async && check_user_page_hwpoison(addr))) {
-                        up_read(&current->mm->mmap_sem);
-                        get_page(hwpoison_page);
-                        return page_to_pfn(hwpoison_page);
-                }
-                vma = find_vma_intersection(current->mm, addr, addr+1);
+/*
+ * Pin guest page in memory and return its pfn.
-                if (vma == NULL)
+ * @addr: host virtual address which maps memory to the guest
-                        pfn = get_fault_pfn();
+ * @atomic: whether this function can sleep
-                else if ((vma->vm_flags & VM_PFNMAP)) {
+ * @async: whether this function need to wait IO complete if the
-                        pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
+ *         host page is not in the memory
-                                vma->vm_pgoff;
+ * @write_fault: whether we should get a writable host page
-                        BUG_ON(!kvm_is_mmio_pfn(pfn));
+ * @writable: whether it allows to map a writable host page for !@write_fault
-                } else {
+ *
-                        if (async && (vma->vm_flags & VM_WRITE))
+ * The function will map a writable host page for these two cases:
-                                *async = true;
+ * 1): @write_fault = true
-                        pfn = get_fault_pfn();
+ * 2): @write_fault = false && @writable, @writable will tell the caller
-                }
+ *     whether the mapping is writable.
-                up_read(&current->mm->mmap_sem);
+ */
-        } else
+static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
-                pfn = page_to_pfn(page[0]);
+                        bool write_fault, bool *writable)
+{
+        struct vm_area_struct *vma;
+        pfn_t pfn = 0;
+        int npages;
+        /* we can do it either atomically or asynchronously, not both */
+        BUG_ON(atomic && async);
+        if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn))
+                return pfn;
+        if (atomic)
+                return KVM_PFN_ERR_FAULT;
+        npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn);
+        if (npages == 1)
+                return pfn;
+        down_read(&current->mm->mmap_sem);
+        if (npages == -EHWPOISON ||
+              (!async && check_user_page_hwpoison(addr))) {
+                pfn = KVM_PFN_ERR_HWPOISON;
+                goto exit;
+        }
+        vma = find_vma_intersection(current->mm, addr, addr + 1);
+        if (vma == NULL)
+                pfn = KVM_PFN_ERR_FAULT;
+        else if ((vma->vm_flags & VM_PFNMAP)) {
+                pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
+                        vma->vm_pgoff;
+                BUG_ON(!kvm_is_mmio_pfn(pfn));
+        } else {
+                if (async && vma_is_valid(vma, write_fault))
+                        *async = true;
+                pfn = KVM_PFN_ERR_FAULT;
+        }
+exit:
+        up_read(&current->mm->mmap_sem);
        return pfn;
 }
-pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
+static pfn_t
+__gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
+                     bool *async, bool write_fault, bool *writable)
 {
-        return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
+        unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
+        if (addr == KVM_HVA_ERR_RO_BAD)
+                return KVM_PFN_ERR_RO_FAULT;
+        if (kvm_is_error_hva(addr))
+                return KVM_PFN_ERR_BAD;
+        /* Do not map writable pfn in the readonly memslot. */
+        if (writable && memslot_is_readonly(slot)) {
+                *writable = false;
+                writable = NULL;
+        }
+        return hva_to_pfn(addr, atomic, async, write_fault,
+                          writable);
 }
-EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
 static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
                          bool write_fault, bool *writable)
 {
-        unsigned long addr;
+        struct kvm_memory_slot *slot;
        if (async)
                *async = false;
-        addr = gfn_to_hva(kvm, gfn);
+        slot = gfn_to_memslot(kvm, gfn);
-        if (kvm_is_error_hva(addr)) {
-                get_page(bad_page);
-                return page_to_pfn(bad_page);
-        }
-        return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
+        return __gfn_to_pfn_memslot(slot, gfn, atomic, async, write_fault,
+                                    writable);
 }
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
@@ -1195,12 +1260,16 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
-pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
+pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
-                         struct kvm_memory_slot *slot, gfn_t gfn)
+{
+        return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
+}
+pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
 {
-        unsigned long addr = gfn_to_hva_memslot(slot, gfn);
+        return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
-        return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
 }
+EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
 int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
                                                                  int nr_pages)
@@ -1219,30 +1288,42 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
 }
 EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
+static struct page *kvm_pfn_to_page(pfn_t pfn)
+{
+        if (is_error_pfn(pfn))
+                return KVM_ERR_PTR_BAD_PAGE;
+        if (kvm_is_mmio_pfn(pfn)) {
+                WARN_ON(1);
+                return KVM_ERR_PTR_BAD_PAGE;
+        }
+        return pfn_to_page(pfn);
+}
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
        pfn_t pfn;
        pfn = gfn_to_pfn(kvm, gfn);
-        if (!kvm_is_mmio_pfn(pfn))
-                return pfn_to_page(pfn);
-        WARN_ON(kvm_is_mmio_pfn(pfn));
-        get_page(bad_page);
+        return kvm_pfn_to_page(pfn);
-        return bad_page;
 }
 EXPORT_SYMBOL_GPL(gfn_to_page);
 void kvm_release_page_clean(struct page *page)
 {
+        WARN_ON(is_error_page(page));
        kvm_release_pfn_clean(page_to_pfn(page));
 }
 EXPORT_SYMBOL_GPL(kvm_release_page_clean);
 void kvm_release_pfn_clean(pfn_t pfn)
 {
+        WARN_ON(is_error_pfn(pfn));
        if (!kvm_is_mmio_pfn(pfn))
                put_page(pfn_to_page(pfn));
 }
@@ -1250,6 +1331,8 @@ EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
 void kvm_release_page_dirty(struct page *page)
 {
+        WARN_ON(is_error_page(page));
        kvm_release_pfn_dirty(page_to_pfn(page));
 }
 EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
@@ -1305,10 +1388,10 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
        int r;
        unsigned long addr;
-        addr = gfn_to_hva(kvm, gfn);
+        addr = gfn_to_hva_read(kvm, gfn);
        if (kvm_is_error_hva(addr))
                return -EFAULT;
-        r = __copy_from_user(data, (void __user *)addr + offset, len);
+        r = kvm_read_hva(data, (void __user *)addr + offset, len);
        if (r)
                return -EFAULT;
        return 0;
@@ -1343,11 +1426,11 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
        gfn_t gfn = gpa >> PAGE_SHIFT;
        int offset = offset_in_page(gpa);
-        addr = gfn_to_hva(kvm, gfn);
+        addr = gfn_to_hva_read(kvm, gfn);
        if (kvm_is_error_hva(addr))
                return -EFAULT;
        pagefault_disable();
-        r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
+        r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len);
        pagefault_enable();
        if (r)
                return -EFAULT;
@@ -1580,6 +1663,43 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
+#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
+/*
+ * Helper that checks whether a VCPU is eligible for directed yield.
+ * Most eligible candidate to yield is decided by following heuristics:
+ *
+ *  (a) VCPU which has not done pl-exit or cpu relax intercepted recently
+ *  (preempted lock holder), indicated by @in_spin_loop.
+ *  Set at the beiginning and cleared at the end of interception/PLE handler.
+ *
+ *  (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
+ *  chance last time (mostly it has become eligible now since we have probably
+ *  yielded to lockholder in last iteration. This is done by toggling
+ *  @dy_eligible each time a VCPU checked for eligibility.)
+ *
+ *  Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding
+ *  to preempted lock-holder could result in wrong VCPU selection and CPU
+ *  burning. Giving priority for a potential lock-holder increases lock
+ *  progress.
+ *
+ *  Since algorithm is based on heuristics, accessing another VCPU data without
+ *  locking does not harm. It may result in trying to yield to  same VCPU, fail
+ *  and continue with next VCPU and so on.
+ */
+bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
+{
+        bool eligible;
+        eligible = !vcpu->spin_loop.in_spin_loop ||
+                        (vcpu->spin_loop.in_spin_loop &&
+                         vcpu->spin_loop.dy_eligible);
+        if (vcpu->spin_loop.in_spin_loop)
+                kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
+        return eligible;
+}
+#endif
 void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 {
        struct kvm *kvm = me->kvm;
@@ -1589,6 +1709,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
        int pass;
        int i;
+        kvm_vcpu_set_in_spin_loop(me, true);
        /*
         * We boost the priority of a VCPU that is runnable but not
         * currently running, because it got preempted by something
@@ -1607,6 +1728,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
                                continue;
                        if (waitqueue_active(&vcpu->wq))
                                continue;
+                        if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
+                                continue;
                        if (kvm_vcpu_yield_to(vcpu)) {
                                kvm->last_boosted_vcpu = i;
                                yielded = 1;
@@ -1614,6 +1737,10 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
                        }
                }
        }
+        kvm_vcpu_set_in_spin_loop(me, false);
+        /* Ensure vcpu is not eligible during next spinloop */
+        kvm_vcpu_set_dy_eligible(me, false);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
@@ -1766,7 +1893,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
 #endif
-        vcpu_load(vcpu);
+        r = vcpu_load(vcpu);
+        if (r)
+                return r;
        switch (ioctl) {
        case KVM_RUN:
                r = -EINVAL;
@@ -2094,6 +2223,29 @@ static long kvm_vm_ioctl(struct file *filp,
                break;
        }
 #endif
+#ifdef __KVM_HAVE_IRQ_LINE
+        case KVM_IRQ_LINE_STATUS:
+        case KVM_IRQ_LINE: {
+                struct kvm_irq_level irq_event;
+                r = -EFAULT;
+                if (copy_from_user(&irq_event, argp, sizeof irq_event))
+                        goto out;
+                r = kvm_vm_ioctl_irq_line(kvm, &irq_event);
+                if (r)
+                        goto out;
+                r = -EFAULT;
+                if (ioctl == KVM_IRQ_LINE_STATUS) {
+                        if (copy_to_user(argp, &irq_event, sizeof irq_event))
+                                goto out;
+                }
+                r = 0;
+                break;
+        }
+#endif
        default:
                r = kvm_arch_vm_ioctl(filp, ioctl, arg);
                if (r == -ENOTTY)
@@ -2698,9 +2850,6 @@ static struct syscore_ops kvm_syscore_ops = {
        .resume = kvm_resume,
 };
-struct page *bad_page;
-pfn_t bad_pfn;
 static inline
 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
 {
@@ -2732,33 +2881,6 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
        if (r)
                goto out_fail;
-        bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-        if (bad_page == NULL) {
-                r = -ENOMEM;
-                goto out;
-        }
-        bad_pfn = page_to_pfn(bad_page);
-        hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-        if (hwpoison_page == NULL) {
-                r = -ENOMEM;
-                goto out_free_0;
-        }
-        hwpoison_pfn = page_to_pfn(hwpoison_page);
-        fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-        if (fault_page == NULL) {
-                r = -ENOMEM;
-                goto out_free_0;
-        }
-        fault_pfn = page_to_pfn(fault_page);
        if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
                r = -ENOMEM;
                goto out_free_0;
@@ -2833,12 +2955,6 @@ out_free_1:
 out_free_0a:
        free_cpumask_var(cpus_hardware_enabled);
 out_free_0:
-        if (fault_page)
-                __free_page(fault_page);
-        if (hwpoison_page)
-                __free_page(hwpoison_page);
-        __free_page(bad_page);
-out:
        kvm_arch_exit();
 out_fail:
        return r;
@@ -2858,8 +2974,5 @@ void kvm_exit(void)
        kvm_arch_hardware_unsetup();
        kvm_arch_exit();
        free_cpumask_var(cpus_hardware_enabled);
-        __free_page(fault_page);
-        __free_page(hwpoison_page);
-        __free_page(bad_page);
 }
 EXPORT_SYMBOL_GPL(kvm_exit);
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-10-04 12:30:33 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-10-04 12:30:33 -0400
commit	ecefbd94b834fa32559d854646d777c56749ef1c (patch)
tree	ca8958900ad9e208a8e5fb7704f1b66dc76131b4 /virt
parent	ce57e981f2b996aaca2031003b3f866368307766 (diff)
parent	3d11df7abbff013b811d5615320580cd5d9d7d31 (diff)