aboutsummaryrefslogblamecommitdiffstats
path: root/virt/kvm/eventfd.c
blob: bb4ebd89b9fff11ee5a0227bf74805650d9ab4a8 (plain) (tree)





















                                                                          
                      





                            

                         



























                                                                         
                                   
                                                                     
                                     



































































































































































                                                                               
                          





                           
                                 

                                           
                                         























































































                                                                              


















































































































































































































































                                                                                
/*
 * kvm eventfd support - use eventfd objects to signal various KVM events
 *
 * Copyright 2009 Novell.  All Rights Reserved.
 *
 * Author:
 *	Gregory Haskins <ghaskins@novell.com>
 *
 * This file is free software; you can redistribute it and/or modify
 * it under the terms of version 2 of the GNU General Public License
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/workqueue.h>
#include <linux/syscalls.h>
#include <linux/wait.h>
#include <linux/poll.h>
#include <linux/file.h>
#include <linux/list.h>
#include <linux/eventfd.h>
#include <linux/kernel.h>

#include "iodev.h"

/*
 * --------------------------------------------------------------------
 * irqfd: Allows an fd to be used to inject an interrupt to the guest
 *
 * Credit goes to Avi Kivity for the original idea.
 * --------------------------------------------------------------------
 */

struct _irqfd {
	struct kvm               *kvm;
	struct eventfd_ctx       *eventfd;
	int                       gsi;
	struct list_head          list;
	poll_table                pt;
	wait_queue_head_t        *wqh;
	wait_queue_t              wait;
	struct work_struct        inject;
	struct work_struct        shutdown;
};

static struct workqueue_struct *irqfd_cleanup_wq;

static void
irqfd_inject(struct work_struct *work)
{
	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
	struct kvm *kvm = irqfd->kvm;

	mutex_lock(&kvm->irq_lock);
	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
	mutex_unlock(&kvm->irq_lock);
}

/*
 * Race-free decouple logic (ordering is critical)
 */
static void
irqfd_shutdown(struct work_struct *work)
{
	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);

	/*
	 * Synchronize with the wait-queue and unhook ourselves to prevent
	 * further events.
	 */
	remove_wait_queue(irqfd->wqh, &irqfd->wait);

	/*
	 * We know no new events will be scheduled at this point, so block
	 * until all previously outstanding events have completed
	 */
	flush_work(&irqfd->inject);

	/*
	 * It is now safe to release the object's resources
	 */
	eventfd_ctx_put(irqfd->eventfd);
	kfree(irqfd);
}


/* assumes kvm->irqfds.lock is held */
static bool
irqfd_is_active(struct _irqfd *irqfd)
{
	return list_empty(&irqfd->list) ? false : true;
}

/*
 * Mark the irqfd as inactive and schedule it for removal
 *
 * assumes kvm->irqfds.lock is held
 */
static void
irqfd_deactivate(struct _irqfd *irqfd)
{
	BUG_ON(!irqfd_is_active(irqfd));

	list_del_init(&irqfd->list);

	queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
}

/*
 * Called with wqh->lock held and interrupts disabled
 */
static int
irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
	unsigned long flags = (unsigned long)key;

	if (flags & POLLIN)
		/* An event has been signaled, inject an interrupt */
		schedule_work(&irqfd->inject);

	if (flags & POLLHUP) {
		/* The eventfd is closing, detach from KVM */
		struct kvm *kvm = irqfd->kvm;
		unsigned long flags;

		spin_lock_irqsave(&kvm->irqfds.lock, flags);

		/*
		 * We must check if someone deactivated the irqfd before
		 * we could acquire the irqfds.lock since the item is
		 * deactivated from the KVM side before it is unhooked from
		 * the wait-queue.  If it is already deactivated, we can
		 * simply return knowing the other side will cleanup for us.
		 * We cannot race against the irqfd going away since the
		 * other side is required to acquire wqh->lock, which we hold
		 */
		if (irqfd_is_active(irqfd))
			irqfd_deactivate(irqfd);

		spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
	}

	return 0;
}

static void
irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
			poll_table *pt)
{
	struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);

	irqfd->wqh = wqh;
	add_wait_queue(wqh, &irqfd->wait);
}

static int
kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
{
	struct _irqfd *irqfd;
	struct file *file = NULL;
	struct eventfd_ctx *eventfd = NULL;
	int ret;
	unsigned int events;

	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
	if (!irqfd)
		return -ENOMEM;

	irqfd->kvm = kvm;
	irqfd->gsi = gsi;
	INIT_LIST_HEAD(&irqfd->list);
	INIT_WORK(&irqfd->inject, irqfd_inject);
	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);

	file = eventfd_fget(fd);
	if (IS_ERR(file)) {
		ret = PTR_ERR(file);
		goto fail;
	}

	eventfd = eventfd_ctx_fileget(file);
	if (IS_ERR(eventfd)) {
		ret = PTR_ERR(eventfd);
		goto fail;
	}

	irqfd->eventfd = eventfd;

	/*
	 * Install our own custom wake-up handling so we are notified via
	 * a callback whenever someone signals the underlying eventfd
	 */
	init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
	init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);

	events = file->f_op->poll(file, &irqfd->pt);

	spin_lock_irq(&kvm->irqfds.lock);
	list_add_tail(&irqfd->list, &kvm->irqfds.items);
	spin_unlock_irq(&kvm->irqfds.lock);

	/*
	 * Check if there was an event already pending on the eventfd
	 * before we registered, and trigger it as if we didn't miss it.
	 */
	if (events & POLLIN)
		schedule_work(&irqfd->inject);

	/*
	 * do not drop the file until the irqfd is fully initialized, otherwise
	 * we might race against the POLLHUP
	 */
	fput(file);

	return 0;

fail:
	if (eventfd && !IS_ERR(eventfd))
		eventfd_ctx_put(eventfd);

	if (!IS_ERR(file))
		fput(file);

	kfree(irqfd);
	return ret;
}

void
kvm_eventfd_init(struct kvm *kvm)
{
	spin_lock_init(&kvm->irqfds.lock);
	INIT_LIST_HEAD(&kvm->irqfds.items);
	INIT_LIST_HEAD(&kvm->ioeventfds);
}

/*
 * shutdown any irqfd's that match fd+gsi
 */
static int
kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
{
	struct _irqfd *irqfd, *tmp;
	struct eventfd_ctx *eventfd;

	eventfd = eventfd_ctx_fdget(fd);
	if (IS_ERR(eventfd))
		return PTR_ERR(eventfd);

	spin_lock_irq(&kvm->irqfds.lock);

	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
			irqfd_deactivate(irqfd);
	}

	spin_unlock_irq(&kvm->irqfds.lock);
	eventfd_ctx_put(eventfd);

	/*
	 * Block until we know all outstanding shutdown jobs have completed
	 * so that we guarantee there will not be any more interrupts on this
	 * gsi once this deassign function returns.
	 */
	flush_workqueue(irqfd_cleanup_wq);

	return 0;
}

int
kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
{
	if (flags & KVM_IRQFD_FLAG_DEASSIGN)
		return kvm_irqfd_deassign(kvm, fd, gsi);

	return kvm_irqfd_assign(kvm, fd, gsi);
}

/*
 * This function is called as the kvm VM fd is being released. Shutdown all
 * irqfds that still remain open
 */
void
kvm_irqfd_release(struct kvm *kvm)
{
	struct _irqfd *irqfd, *tmp;

	spin_lock_irq(&kvm->irqfds.lock);

	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
		irqfd_deactivate(irqfd);

	spin_unlock_irq(&kvm->irqfds.lock);

	/*
	 * Block until we know all outstanding shutdown jobs have completed
	 * since we do not take a kvm* reference.
	 */
	flush_workqueue(irqfd_cleanup_wq);

}

/*
 * create a host-wide workqueue for issuing deferred shutdown requests
 * aggregated from all vm* instances. Wegit/diff/arch/i386/kernel/timers/timer_tsc.c?h=v2.6.36-rc7&id=1da177e4c3f41524e886b7f1b8a0c1fc7321cac2'>arch/i386/kernel/timers/timer_tsc.c
560
-rw-r--r--arch/i386/kernel/trampoline.S80
-rw-r--r--arch/i386/kernel/traps.c1084
-rw-r--r--arch/i386/kernel/vm86.c804
-rw-r--r--arch/i386/kernel/vmlinux.lds.S134
-rw-r--r--arch/i386/kernel/vsyscall-int80.S53
-rw-r--r--arch/i386/kernel/vsyscall-sigreturn.S142
-rw-r--r--arch/i386/kernel/vsyscall-sysenter.S104
-rw-r--r--arch/i386/kernel/vsyscall.S15
-rw-r--r--arch/i386/kernel/vsyscall.lds.S65
124 files changed, 43777 insertions, 0 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
new file mode 100644
index 000000000000..933787a46b4c
--- /dev/null
+++ b/arch/i386/kernel/Makefile
@@ -0,0 +1,71 @@
1#
2# Makefile for the linux kernel.
3#
4
5extra-y := head.o init_task.o vmlinux.lds
6
7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
9 pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
10 doublefault.o quirks.o
11
12obj-y += cpu/
13obj-y += timers/
14obj-$(CONFIG_ACPI_BOOT) += acpi/
15obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
16obj-$(CONFIG_MCA) += mca.o
17obj-$(CONFIG_X86_MSR) += msr.o
18obj-$(CONFIG_X86_CPUID) += cpuid.o
19obj-$(CONFIG_MICROCODE) += microcode.o
20obj-$(CONFIG_APM) += apm.o
21obj-$(CONFIG_X86_SMP) += smp.o smpboot.o
22obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
23obj-$(CONFIG_X86_MPPARSE) += mpparse.o
24obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
25obj-$(CONFIG_X86_IO_APIC) += io_apic.o
26obj-$(CONFIG_X86_NUMAQ) += numaq.o
27obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o
28obj-$(CONFIG_KPROBES) += kprobes.o
29obj-$(CONFIG_MODULES) += module.o
30obj-y += sysenter.o vsyscall.o
31obj-$(CONFIG_ACPI_SRAT) += srat.o
32obj-$(CONFIG_HPET_TIMER) += time_hpet.o
33obj-$(CONFIG_EFI) += efi.o efi_stub.o
34obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
35
36EXTRA_AFLAGS := -traditional
37
38obj-$(CONFIG_SCx200) += scx200.o
39
40# vsyscall.o contains the vsyscall DSO images as __initdata.
41# We must build both images before we can assemble it.
42# Note: kbuild does not track this dependency due to usage of .incbin
43$(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so
44targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so)
45targets += vsyscall.lds
46
47# The DSO images are built using a special linker script.
48quiet_cmd_syscall = SYSCALL $@
49 cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \
50 -Wl,-T,$(filter-out FORCE,$^) -o $@
51
52export CPPFLAGS_vsyscall.lds += -P -C -U$(ARCH)
53
54vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1
55SYSCFLAGS_vsyscall-sysenter.so = $(vsyscall-flags)
56SYSCFLAGS_vsyscall-int80.so = $(vsyscall-flags)
57
58$(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
59$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
60 $(call if_changed,syscall)
61
62# We also create a special relocatable object that should mirror the symbol
63# table and layout of the linked DSO. With ld -R we can then refer to
64# these symbols in the kernel code rather than hand-coded addresses.
65extra-y += vsyscall-syms.o
66$(obj)/built-in.o: $(obj)/vsyscall-syms.o
67$(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o
68
69SYSCFLAGS_vsyscall-syms.o = -r
70$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds $(obj)/vsyscall-sysenter.o FORCE
71 $(call if_changed,syscall)
diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile
new file mode 100644
index 000000000000..ee75cb286c