Merge branch 'kvm-updates/2.6.33' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/2.6.33' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: Fix leak of free lapic date in kvm_arch_vcpu_init() KVM: x86: Fix probable memory leak of vcpu->arch.mce_banks KVM: S390: fix potential array overrun in intercept handling KVM: fix spurious interrupt with irqfd eventfd - allow atomic read and waitqueue remove KVM: MMU: bail out pagewalk on kvm_read_guest error KVM: properly check max PIC pin in irq route setup KVM: only allow one gsi per fd KVM: x86: Fix host_mapping_level() KVM: powerpc: Show timing option only on embedded KVM: Fix race between APIC TMR and IRR
author: Linus Torvalds <torvalds@linux-foundation.org> 2010-01-25 22:02:31 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2010-01-25 22:02:31 -0500
commit: 486d35e2220acfe45d85131c557d94fe889184a2 (patch)
tree: 6be42a8a0d82e7e09bb4ac05edcbdb96adf650dc
parent: a8d0b6666ecfe14226f1e46d693d5e2cde072337 (diff)
parent: 443c39bc9ef7d8f648408d74c97e943f3bb3f48a (diff)
10 files changed, 128 insertions, 34 deletions
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 07703f72330..6fb6e8aa389 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -53,7 +53,7 @@ config KVM_440
 config KVM_EXIT_TIMING
        bool "Detailed exit timing"
-        depends on KVM
+        depends on KVM_440 || KVM_E500
        ---help---
          Calculate elapsed time for every exit/enter cycle. A per-vcpu
          report is available in debugfs kvm/vm#_vcpu#_timing.
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index ba9d8a7bc1a..b40096494e4 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -213,7 +213,7 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
        return rc2;
 }
-static const intercept_handler_t intercept_funcs[0x48 >> 2] = {
+static const intercept_handler_t intercept_funcs[] = {
        [0x00 >> 2] = handle_noop,
        [0x04 >> 2] = handle_instruction,
        [0x08 >> 2] = handle_prog,
@@ -230,7 +230,7 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
        intercept_handler_t func;
        u8 code = vcpu->arch.sie_block->icptcode;
-        if (code & 3 || code > 0x48)
+        if (code & 3 || (code >> 2) >= ARRAY_SIZE(intercept_funcs))
                return -ENOTSUPP;
        func = intercept_funcs[code >> 2];
        if (func)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3063a0c4858..ba8c045da78 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -373,6 +373,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
                if (unlikely(!apic_enabled(apic)))
                        break;
+                if (trig_mode) {
+                        apic_debug("level trig mode for vector %d", vector);
+                        apic_set_vector(vector, apic->regs + APIC_TMR);
+                } else
+                        apic_clear_vector(vector, apic->regs + APIC_TMR);
                result = !apic_test_and_set_irr(vector, apic);
                trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
                                          trig_mode, vector, !result);
@@ -383,11 +389,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
                        break;
                }
-                if (trig_mode) {
-                        apic_debug("level trig mode for vector %d", vector);
-                        apic_set_vector(vector, apic->regs + APIC_TMR);
-                } else
-                        apic_clear_vector(vector, apic->regs + APIC_TMR);
                kvm_vcpu_kick(vcpu);
                break;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4c3e5b2314c..89a49fb46a2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -477,7 +477,7 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
        addr = gfn_to_hva(kvm, gfn);
        if (kvm_is_error_hva(addr))
-                return page_size;
+                return PT_PAGE_TABLE_LEVEL;
        down_read(&current->mm->mmap_sem);
        vma = find_vma(current->mm, addr);
@@ -515,11 +515,9 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
        if (host_level == PT_PAGE_TABLE_LEVEL)
                return host_level;
-        for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) {
+        for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level)
                if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
                        break;
-        }
        return level - 1;
 }
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 58a0f1e8859..ede2131a922 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -150,7 +150,9 @@ walk:
                walker->table_gfn[walker->level - 1] = table_gfn;
                walker->pte_gpa[walker->level - 1] = pte_gpa;
-                kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
+                if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)))
+                        goto not_present;
                trace_kvm_mmu_paging_element(pte, walker->level);
                if (!is_present_gpte(pte))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6651dbf5867..1ddcad452ad 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5072,12 +5072,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
                                       GFP_KERNEL);
        if (!vcpu->arch.mce_banks) {
                r = -ENOMEM;
-                goto fail_mmu_destroy;
+                goto fail_free_lapic;
        }
        vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
        return 0;
+fail_free_lapic:
+        kvm_free_lapic(vcpu);
 fail_mmu_destroy:
        kvm_mmu_destroy(vcpu);
 fail_free_pio_data:
@@ -5088,6 +5089,7 @@ fail:
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
+        kfree(vcpu->arch.mce_banks);
        kvm_free_lapic(vcpu);
        down_read(&vcpu->kvm->slots_lock);
        kvm_mmu_destroy(vcpu);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index d26402ff06e..7758cc382ef 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
        return events;
 }
-static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
+static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
-                            loff_t *ppos)
+{
+        *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+        ctx->count -= *cnt;
+}
+/**
+ * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
+ * @ctx: [in] Pointer to eventfd context.
+ * @wait: [in] Wait queue to be removed.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN      : The operation would have blocked.
+ *
+ * This is used to atomically remove a wait queue entry from the eventfd wait
+ * queue head, and read/reset the counter value.
+ */
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+                                  __u64 *cnt)
+{
+        unsigned long flags;
+        spin_lock_irqsave(&ctx->wqh.lock, flags);
+        eventfd_ctx_do_read(ctx, cnt);
+        __remove_wait_queue(&ctx->wqh, wait);
+        if (*cnt != 0 && waitqueue_active(&ctx->wqh))
+                wake_up_locked_poll(&ctx->wqh, POLLOUT);
+        spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+        return *cnt != 0 ? 0 : -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
+/**
+ * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
+ * @ctx: [in] Pointer to eventfd context.
+ * @no_wait: [in] Different from zero if the operation should not block.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN      : The operation would have blocked but @no_wait was nonzero.
+ * -ERESTARTSYS : A signal interrupted the wait operation.
+ *
+ * If @no_wait is zero, the function might sleep until the eventfd internal
+ * counter becomes greater than zero.
+ */
+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
 {
-        struct eventfd_ctx *ctx = file->private_data;
        ssize_t res;
-        __u64 ucnt = 0;
        DECLARE_WAITQUEUE(wait, current);
-        if (count < sizeof(ucnt))
-                return -EINVAL;
        spin_lock_irq(&ctx->wqh.lock);
+        *cnt = 0;
        res = -EAGAIN;
        if (ctx->count > 0)
-                res = sizeof(ucnt);
+                res = 0;
-        else if (!(file->f_flags & O_NONBLOCK)) {
+        else if (!no_wait) {
                __add_wait_queue(&ctx->wqh, &wait);
-                for (res = 0;;) {
+                for (;;) {
                        set_current_state(TASK_INTERRUPTIBLE);
                        if (ctx->count > 0) {
-                                res = sizeof(ucnt);
+                                res = 0;
                                break;
                        }
                        if (signal_pending(current)) {
@@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
                __remove_wait_queue(&ctx->wqh, &wait);
                __set_current_state(TASK_RUNNING);
        }
-        if (likely(res > 0)) {
+        if (likely(res == 0)) {
-                ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+                eventfd_ctx_do_read(ctx, cnt);
-                ctx->count -= ucnt;
                if (waitqueue_active(&ctx->wqh))
                        wake_up_locked_poll(&ctx->wqh, POLLOUT);
        }
        spin_unlock_irq(&ctx->wqh.lock);
-        if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
-                return -EFAULT;
        return res;
 }
+EXPORT_SYMBOL_GPL(eventfd_ctx_read);
+static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
+                            loff_t *ppos)
+{
+        struct eventfd_ctx *ctx = file->private_data;
+        ssize_t res;
+        __u64 cnt;
+        if (count < sizeof(cnt))
+                return -EINVAL;
+        res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
+        if (res < 0)
+                return res;
+        return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
+}
 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
                             loff_t *ppos)
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 94dd10366a7..91bb4f27238 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -10,6 +10,7 @@
 #include <linux/fcntl.h>
 #include <linux/file.h>
+#include <linux/wait.h>
 /*
 * CAREFUL: Check include/asm-generic/fcntl.h when defining
@@ -34,6 +35,9 @@ struct file *eventfd_fget(int fd);
 struct eventfd_ctx *eventfd_ctx_fdget(int fd);
 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
 int eventfd_signal(struct eventfd_ctx *ctx, int n);
+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt);
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+                                  __u64 *cnt);
 #else /* CONFIG_EVENTFD */
@@ -61,6 +65,18 @@ static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
 }
+static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait,
+                                       __u64 *cnt)
+{
+        return -ENOSYS;
+}
+static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
+                                                wait_queue_t *wait, __u64 *cnt)
+{
+        return -ENOSYS;
+}
 #endif
 #endif /* _LINUX_EVENTFD_H */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 30f70fd511c..a9d3fc6c681 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -72,12 +72,13 @@ static void
 irqfd_shutdown(struct work_struct *work)
 {
        struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
+        u64 cnt;
        /*
         * Synchronize with the wait-queue and unhook ourselves to prevent
         * further events.
         */
-        remove_wait_queue(irqfd->wqh, &irqfd->wait);
+        eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
        /*
         * We know no new events will be scheduled at this point, so block
@@ -166,7 +167,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 static int
 kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 {
-        struct _irqfd *irqfd;
+        struct _irqfd *irqfd, *tmp;
        struct file *file = NULL;
        struct eventfd_ctx *eventfd = NULL;
        int ret;
@@ -203,9 +204,20 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
        init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
        init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
+        spin_lock_irq(&kvm->irqfds.lock);
+        ret = 0;
+        list_for_each_entry(tmp, &kvm->irqfds.items, list) {
+                if (irqfd->eventfd != tmp->eventfd)
+                        continue;
+                /* This fd is used for another irq already. */
+                ret = -EBUSY;
+                spin_unlock_irq(&kvm->irqfds.lock);
+                goto fail;
+        }
        events = file->f_op->poll(file, &irqfd->pt);
-        spin_lock_irq(&kvm->irqfds.lock);
        list_add_tail(&irqfd->list, &kvm->irqfds.items);
        spin_unlock_irq(&kvm->irqfds.lock);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 9b077342ab5..9fd5b3ebc51 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -302,6 +302,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 {
        int r = -EINVAL;
        int delta;
+        unsigned max_pin;
        struct kvm_kernel_irq_routing_entry *ei;
        struct hlist_node *n;
@@ -322,12 +323,15 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
                switch (ue->u.irqchip.irqchip) {
                case KVM_IRQCHIP_PIC_MASTER:
                        e->set = kvm_set_pic_irq;
+                        max_pin = 16;
                        break;
                case KVM_IRQCHIP_PIC_SLAVE:
                        e->set = kvm_set_pic_irq;
+                        max_pin = 16;
                        delta = 8;
                        break;
                case KVM_IRQCHIP_IOAPIC:
+                        max_pin = KVM_IOAPIC_NUM_PINS;
                        e->set = kvm_set_ioapic_irq;
                        break;
                default:
@@ -335,7 +339,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
                }
                e->irqchip.irqchip = ue->u.irqchip.irqchip;
                e->irqchip.pin = ue->u.irqchip.pin + delta;
-                if (e->irqchip.pin >= KVM_IOAPIC_NUM_PINS)
+                if (e->irqchip.pin >= max_pin)
                        goto out;
                rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
                break;
author	Linus Torvalds <torvalds@linux-foundation.org>	2010-01-25 22:02:31 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2010-01-25 22:02:31 -0500
commit	486d35e2220acfe45d85131c557d94fe889184a2 (patch)
tree	6be42a8a0d82e7e09bb4ac05edcbdb96adf650dc
parent	a8d0b6666ecfe14226f1e46d693d5e2cde072337 (diff)
parent	443c39bc9ef7d8f648408d74c97e943f3bb3f48a (diff)

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 07703f72330..6fb6e8aa389 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig
@@ -53,7 +53,7 @@ config KVM_440
53		53
54	config KVM_EXIT_TIMING	54	config KVM_EXIT_TIMING
55	bool "Detailed exit timing"	55	bool "Detailed exit timing"
56	depends on KVM	56	depends on KVM_440 \|\| KVM_E500
57	---help---	57	---help---
58	Calculate elapsed time for every exit/enter cycle. A per-vcpu	58	Calculate elapsed time for every exit/enter cycle. A per-vcpu
59	report is available in debugfs kvm/vm#_vcpu#_timing.	59	report is available in debugfs kvm/vm#_vcpu#_timing.


diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index ba9d8a7bc1a..b40096494e4 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c
@@ -213,7 +213,7 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
213	return rc2;	213	return rc2;
214	}	214	}
215		215
216	static const intercept_handler_t intercept_funcs[0x48 >> 2] = {	216	static const intercept_handler_t intercept_funcs[] = {
217	[0x00 >> 2] = handle_noop,	217	[0x00 >> 2] = handle_noop,
218	[0x04 >> 2] = handle_instruction,	218	[0x04 >> 2] = handle_instruction,
219	[0x08 >> 2] = handle_prog,	219	[0x08 >> 2] = handle_prog,
@@ -230,7 +230,7 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
230	intercept_handler_t func;	230	intercept_handler_t func;
231	u8 code = vcpu->arch.sie_block->icptcode;	231	u8 code = vcpu->arch.sie_block->icptcode;
232		232
233	if (code & 3 \|\| code > 0x48)	233	if (code & 3 \|\| (code >> 2) >= ARRAY_SIZE(intercept_funcs))
234	return -ENOTSUPP;	234	return -ENOTSUPP;
235	func = intercept_funcs[code >> 2];	235	func = intercept_funcs[code >> 2];
236	if (func)	236	if (func)


diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3063a0c4858..ba8c045da78 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c
@@ -373,6 +373,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
373	if (unlikely(!apic_enabled(apic)))	373	if (unlikely(!apic_enabled(apic)))
374	break;	374	break;
375		375
		376	if (trig_mode) {
		377	apic_debug("level trig mode for vector %d", vector);
		378	apic_set_vector(vector, apic->regs + APIC_TMR);
		379	} else
		380	apic_clear_vector(vector, apic->regs + APIC_TMR);
		381
376	result = !apic_test_and_set_irr(vector, apic);	382	result = !apic_test_and_set_irr(vector, apic);
377	trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,	383	trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
378	trig_mode, vector, !result);	384	trig_mode, vector, !result);
@@ -383,11 +389,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
383	break;	389	break;
384	}	390	}
385		391
386	if (trig_mode) {
387	apic_debug("level trig mode for vector %d", vector);
388	apic_set_vector(vector, apic->regs + APIC_TMR);
389	} else
390	apic_clear_vector(vector, apic->regs + APIC_TMR);
391	kvm_vcpu_kick(vcpu);	392	kvm_vcpu_kick(vcpu);
392	break;	393	break;
393		394


diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4c3e5b2314c..89a49fb46a2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c
@@ -477,7 +477,7 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
477		477
478	addr = gfn_to_hva(kvm, gfn);	478	addr = gfn_to_hva(kvm, gfn);
479	if (kvm_is_error_hva(addr))	479	if (kvm_is_error_hva(addr))
480	return page_size;	480	return PT_PAGE_TABLE_LEVEL;
481		481
482	down_read(&current->mm->mmap_sem);	482	down_read(&current->mm->mmap_sem);
483	vma = find_vma(current->mm, addr);	483	vma = find_vma(current->mm, addr);
@@ -515,11 +515,9 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
515	if (host_level == PT_PAGE_TABLE_LEVEL)	515	if (host_level == PT_PAGE_TABLE_LEVEL)
516	return host_level;	516	return host_level;
517		517
518	for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) {	518	for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level)
519
520	if (has_wrprotected_page(vcpu->kvm, large_gfn, level))	519	if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
521	break;	520	break;
522	}
523		521
524	return level - 1;	522	return level - 1;
525	}	523	}


diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 58a0f1e8859..ede2131a922 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h
@@ -150,7 +150,9 @@ walk:
150	walker->table_gfn[walker->level - 1] = table_gfn;	150	walker->table_gfn[walker->level - 1] = table_gfn;
151	walker->pte_gpa[walker->level - 1] = pte_gpa;	151	walker->pte_gpa[walker->level - 1] = pte_gpa;
152		152
153	kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));	153	if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)))
		154	goto not_present;
		155
154	trace_kvm_mmu_paging_element(pte, walker->level);	156	trace_kvm_mmu_paging_element(pte, walker->level);
155		157
156	if (!is_present_gpte(pte))	158	if (!is_present_gpte(pte))


diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6651dbf5867..1ddcad452ad 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c
@@ -5072,12 +5072,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5072	GFP_KERNEL);	5072	GFP_KERNEL);
5073	if (!vcpu->arch.mce_banks) {	5073	if (!vcpu->arch.mce_banks) {
5074	r = -ENOMEM;	5074	r = -ENOMEM;
5075	goto fail_mmu_destroy;	5075	goto fail_free_lapic;
5076	}	5076	}
5077	vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;	5077	vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
5078		5078
5079	return 0;	5079	return 0;
5080		5080	fail_free_lapic:
		5081	kvm_free_lapic(vcpu);
5081	fail_mmu_destroy:	5082	fail_mmu_destroy:
5082	kvm_mmu_destroy(vcpu);	5083	kvm_mmu_destroy(vcpu);
5083	fail_free_pio_data:	5084	fail_free_pio_data:
@@ -5088,6 +5089,7 @@ fail:
5088		5089
5089	void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)	5090	void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
5090	{	5091	{
		5092	kfree(vcpu->arch.mce_banks);
5091	kvm_free_lapic(vcpu);	5093	kvm_free_lapic(vcpu);
5092	down_read(&vcpu->kvm->slots_lock);	5094	down_read(&vcpu->kvm->slots_lock);
5093	kvm_mmu_destroy(vcpu);	5095	kvm_mmu_destroy(vcpu);


diff --git a/fs/eventfd.c b/fs/eventfd.c index d26402ff06e..7758cc382ef 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c
@@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file file, poll_table wait)
135	return events;	135	return events;
136	}	136	}
137		137
138	static ssize_t eventfd_read(struct file file, char __user buf, size_t count,	138	static void eventfd_ctx_do_read(struct eventfd_ctx ctx, __u64 cnt)
139	loff_t *ppos)	139	{
		140	*cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
		141	ctx->count -= *cnt;
		142	}
		143
		144	/**
		145	* eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
		146	* @ctx: [in] Pointer to eventfd context.
		147	* @wait: [in] Wait queue to be removed.
		148	* @cnt: [out] Pointer to the 64bit conter value.
		149	*
		150	* Returns zero if successful, or the following error codes:
		151	*
		152	* -EAGAIN : The operation would have blocked.
		153	*
		154	* This is used to atomically remove a wait queue entry from the eventfd wait
		155	* queue head, and read/reset the counter value.
		156	*/
		157	int eventfd_ctx_remove_wait_queue(struct eventfd_ctx ctx, wait_queue_t wait,
		158	__u64 *cnt)
		159	{
		160	unsigned long flags;
		161
		162	spin_lock_irqsave(&ctx->wqh.lock, flags);
		163	eventfd_ctx_do_read(ctx, cnt);
		164	__remove_wait_queue(&ctx->wqh, wait);
		165	if (*cnt != 0 && waitqueue_active(&ctx->wqh))
		166	wake_up_locked_poll(&ctx->wqh, POLLOUT);
		167	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
		168
		169	return *cnt != 0 ? 0 : -EAGAIN;
		170	}
		171	EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
		172
		173	/**
		174	* eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
		175	* @ctx: [in] Pointer to eventfd context.
		176	* @no_wait: [in] Different from zero if the operation should not block.
		177	* @cnt: [out] Pointer to the 64bit conter value.
		178	*
		179	* Returns zero if successful, or the following error codes:
		180	*
		181	* -EAGAIN : The operation would have blocked but @no_wait was nonzero.
		182	* -ERESTARTSYS : A signal interrupted the wait operation.
		183	*
		184	* If @no_wait is zero, the function might sleep until the eventfd internal
		185	* counter becomes greater than zero.
		186	*/
		187	ssize_t eventfd_ctx_read(struct eventfd_ctx ctx, int no_wait, __u64 cnt)
140	{	188	{
141	struct eventfd_ctx *ctx = file->private_data;
142	ssize_t res;	189	ssize_t res;
143	__u64 ucnt = 0;
144	DECLARE_WAITQUEUE(wait, current);	190	DECLARE_WAITQUEUE(wait, current);
145		191
146	if (count < sizeof(ucnt))
147	return -EINVAL;
148	spin_lock_irq(&ctx->wqh.lock);	192	spin_lock_irq(&ctx->wqh.lock);
		193	*cnt = 0;
149	res = -EAGAIN;	194	res = -EAGAIN;
150	if (ctx->count > 0)	195	if (ctx->count > 0)
151	res = sizeof(ucnt);	196	res = 0;
152	else if (!(file->f_flags & O_NONBLOCK)) {	197	else if (!no_wait) {
153	__add_wait_queue(&ctx->wqh, &wait);	198	__add_wait_queue(&ctx->wqh, &wait);
154	for (res = 0;;) {	199	for (;;) {
155	set_current_state(TASK_INTERRUPTIBLE);	200	set_current_state(TASK_INTERRUPTIBLE);
156	if (ctx->count > 0) {	201	if (ctx->count > 0) {
157	res = sizeof(ucnt);	202	res = 0;
158	break;	203	break;
159	}	204	}
160	if (signal_pending(current)) {	205	if (signal_pending(current)) {
@@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file file, char __user buf, size_t count,
168	__remove_wait_queue(&ctx->wqh, &wait);	213	__remove_wait_queue(&ctx->wqh, &wait);
169	__set_current_state(TASK_RUNNING);	214	__set_current_state(TASK_RUNNING);
170	}	215	}
171	if (likely(res > 0)) {	216	if (likely(res == 0)) {
172	ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;	217	eventfd_ctx_do_read(ctx, cnt);
173	ctx->count -= ucnt;
174	if (waitqueue_active(&ctx->wqh))	218	if (waitqueue_active(&ctx->wqh))
175	wake_up_locked_poll(&ctx->wqh, POLLOUT);	219	wake_up_locked_poll(&ctx->wqh, POLLOUT);
176	}	220	}
177	spin_unlock_irq(&ctx->wqh.lock);	221	spin_unlock_irq(&ctx->wqh.lock);
178	if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
179	return -EFAULT;
180		222
181	return res;	223	return res;
182	}	224	}
		225	EXPORT_SYMBOL_GPL(eventfd_ctx_read);
		226
		227	static ssize_t eventfd_read(struct file file, char __user buf, size_t count,
		228	loff_t *ppos)
		229	{
		230	struct eventfd_ctx *ctx = file->private_data;
		231	ssize_t res;
		232	__u64 cnt;
		233
		234	if (count < sizeof(cnt))
		235	return -EINVAL;
		236	res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
		237	if (res < 0)
		238	return res;
		239
		240	return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
		241	}
183		242
184	static ssize_t eventfd_write(struct file file, const char __user buf, size_t count,	243	static ssize_t eventfd_write(struct file file, const char __user buf, size_t count,
185	loff_t *ppos)	244	loff_t *ppos)


diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 94dd10366a7..91bb4f27238 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h
@@ -10,6 +10,7 @@
10		10
11	#include <linux/fcntl.h>	11	#include <linux/fcntl.h>
12	#include <linux/file.h>	12	#include <linux/file.h>
		13	#include <linux/wait.h>
13		14
14	/*	15	/*
15	* CAREFUL: Check include/asm-generic/fcntl.h when defining	16	* CAREFUL: Check include/asm-generic/fcntl.h when defining
@@ -34,6 +35,9 @@ struct file *eventfd_fget(int fd);
34	struct eventfd_ctx *eventfd_ctx_fdget(int fd);	35	struct eventfd_ctx *eventfd_ctx_fdget(int fd);
35	struct eventfd_ctx eventfd_ctx_fileget(struct file file);	36	struct eventfd_ctx eventfd_ctx_fileget(struct file file);
36	int eventfd_signal(struct eventfd_ctx *ctx, int n);	37	int eventfd_signal(struct eventfd_ctx *ctx, int n);
		38	ssize_t eventfd_ctx_read(struct eventfd_ctx ctx, int no_wait, __u64 cnt);
		39	int eventfd_ctx_remove_wait_queue(struct eventfd_ctx ctx, wait_queue_t wait,
		40	__u64 *cnt);
37		41
38	#else /* CONFIG_EVENTFD */	42	#else /* CONFIG_EVENTFD */
39		43
@@ -61,6 +65,18 @@ static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
61		65
62	}	66	}
63		67
		68	static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait,
		69	__u64 *cnt)
		70	{
		71	return -ENOSYS;
		72	}
		73
		74	static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
		75	wait_queue_t wait, __u64 cnt)
		76	{
		77	return -ENOSYS;
		78	}
		79
64	#endif	80	#endif
65		81
66	#endif /* _LINUX_EVENTFD_H */	82	#endif /* _LINUX_EVENTFD_H */


diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 30f70fd511c..a9d3fc6c681 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c
@@ -72,12 +72,13 @@ static void
72	irqfd_shutdown(struct work_struct *work)	72	irqfd_shutdown(struct work_struct *work)
73	{	73	{
74	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);	74	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
		75	u64 cnt;
75		76
76	/*	77	/*
77	* Synchronize with the wait-queue and unhook ourselves to prevent	78	* Synchronize with the wait-queue and unhook ourselves to prevent
78	* further events.	79	* further events.
79	*/	80	*/
80	remove_wait_queue(irqfd->wqh, &irqfd->wait);	81	eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
81		82
82	/*	83	/*
83	* We know no new events will be scheduled at this point, so block	84	* We know no new events will be scheduled at this point, so block
@@ -166,7 +167,7 @@ irqfd_ptable_queue_proc(struct file file, wait_queue_head_t wqh,
166	static int	167	static int
167	kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)	168	kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
168	{	169	{
169	struct _irqfd *irqfd;	170	struct _irqfd irqfd, tmp;
170	struct file *file = NULL;	171	struct file *file = NULL;
171	struct eventfd_ctx *eventfd = NULL;	172	struct eventfd_ctx *eventfd = NULL;
172	int ret;	173	int ret;
@@ -203,9 +204,20 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
203	init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);	204	init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
204	init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);	205	init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
205		206
		207	spin_lock_irq(&kvm->irqfds.lock);
		208
		209	ret = 0;
		210	list_for_each_entry(tmp, &kvm->irqfds.items, list) {
		211	if (irqfd->eventfd != tmp->eventfd)
		212	continue;
		213	/* This fd is used for another irq already. */
		214	ret = -EBUSY;
		215	spin_unlock_irq(&kvm->irqfds.lock);
		216	goto fail;
		217	}
		218
206	events = file->f_op->poll(file, &irqfd->pt);	219	events = file->f_op->poll(file, &irqfd->pt);
207		220
208	spin_lock_irq(&kvm->irqfds.lock);
209	list_add_tail(&irqfd->list, &kvm->irqfds.items);	221	list_add_tail(&irqfd->list, &kvm->irqfds.items);
210	spin_unlock_irq(&kvm->irqfds.lock);	222	spin_unlock_irq(&kvm->irqfds.lock);
211		223


diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 9b077342ab5..9fd5b3ebc51 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c
@@ -302,6 +302,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
302	{	302	{
303	int r = -EINVAL;	303	int r = -EINVAL;
304	int delta;	304	int delta;
		305	unsigned max_pin;
305	struct kvm_kernel_irq_routing_entry *ei;	306	struct kvm_kernel_irq_routing_entry *ei;
306	struct hlist_node *n;	307	struct hlist_node *n;
307		308
@@ -322,12 +323,15 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
322	switch (ue->u.irqchip.irqchip) {	323	switch (ue->u.irqchip.irqchip) {
323	case KVM_IRQCHIP_PIC_MASTER:	324	case KVM_IRQCHIP_PIC_MASTER:
324	e->set = kvm_set_pic_irq;	325	e->set = kvm_set_pic_irq;
		326	max_pin = 16;
325	break;	327	break;
326	case KVM_IRQCHIP_PIC_SLAVE:	328	case KVM_IRQCHIP_PIC_SLAVE:
327	e->set = kvm_set_pic_irq;	329	e->set = kvm_set_pic_irq;
		330	max_pin = 16;
328	delta = 8;	331	delta = 8;
329	break;	332	break;
330	case KVM_IRQCHIP_IOAPIC:	333	case KVM_IRQCHIP_IOAPIC:
		334	max_pin = KVM_IOAPIC_NUM_PINS;
331	e->set = kvm_set_ioapic_irq;	335	e->set = kvm_set_ioapic_irq;
332	break;	336	break;
333	default:	337	default:
@@ -335,7 +339,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
335	}	339	}
336	e->irqchip.irqchip = ue->u.irqchip.irqchip;	340	e->irqchip.irqchip = ue->u.irqchip.irqchip;
337	e->irqchip.pin = ue->u.irqchip.pin + delta;	341	e->irqchip.pin = ue->u.irqchip.pin + delta;
338	if (e->irqchip.pin >= KVM_IOAPIC_NUM_PINS)	342	if (e->irqchip.pin >= max_pin)
339	goto out;	343	goto out;
340	rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;	344	rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
341	break;	345	break;