diff options
| author | Avi Kivity <avi@qumranet.com> | 2007-02-21 12:28:04 -0500 |
|---|---|---|
| committer | Avi Kivity <avi@qumranet.com> | 2007-03-04 04:12:42 -0500 |
| commit | f17abe9a44425ff9c9858bc1806cc09d6b5dad1c (patch) | |
| tree | 47b060818bbea16dfecf21b8008a99c978a5f0c3 /drivers | |
| parent | 37e29d906c6eb1ece907e509160518b2edc2c083 (diff) | |
KVM: Create an inode per virtual machine
This avoids having filp->f_op and the corresponding inode->i_fop different,
which is a little unorthodox.
The ioctl list is split into two: global kvm ioctls and per-vm ioctls. A new
ioctl, KVM_CREATE_VM, is used to create VMs and return the VM fd.
Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/kvm/kvm_main.c | 212 |
1 files changed, 171 insertions, 41 deletions
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index c01252e84377..aa07d9c9d20d 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include <asm/desc.h> | 36 | #include <asm/desc.h> |
| 37 | #include <linux/sysdev.h> | 37 | #include <linux/sysdev.h> |
| 38 | #include <linux/cpu.h> | 38 | #include <linux/cpu.h> |
| 39 | #include <linux/file.h> | ||
| 39 | #include <linux/fs.h> | 40 | #include <linux/fs.h> |
| 40 | #include <linux/mount.h> | 41 | #include <linux/mount.h> |
| 41 | 42 | ||
| @@ -95,6 +96,55 @@ struct segment_descriptor_64 { | |||
| 95 | 96 | ||
| 96 | #endif | 97 | #endif |
| 97 | 98 | ||
| 99 | static struct inode *kvmfs_inode(struct file_operations *fops) | ||
| 100 | { | ||
| 101 | int error = -ENOMEM; | ||
| 102 | struct inode *inode = new_inode(kvmfs_mnt->mnt_sb); | ||
| 103 | |||
| 104 | if (!inode) | ||
| 105 | goto eexit_1; | ||
| 106 | |||
| 107 | inode->i_fop = fops; | ||
| 108 | |||
| 109 | /* | ||
| 110 | * Mark the inode dirty from the very beginning, | ||
| 111 | * that way it will never be moved to the dirty | ||
| 112 | * list because mark_inode_dirty() will think | ||
| 113 | * that it already _is_ on the dirty list. | ||
| 114 | */ | ||
| 115 | inode->i_state = I_DIRTY; | ||
| 116 | inode->i_mode = S_IRUSR | S_IWUSR; | ||
| 117 | inode->i_uid = current->fsuid; | ||
| 118 | inode->i_gid = current->fsgid; | ||
| 119 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
| 120 | return inode; | ||
| 121 | |||
| 122 | eexit_1: | ||
| 123 | return ERR_PTR(error); | ||
| 124 | } | ||
| 125 | |||
| 126 | static struct file *kvmfs_file(struct inode *inode, void *private_data) | ||
| 127 | { | ||
| 128 | struct file *file = get_empty_filp(); | ||
| 129 | |||
| 130 | if (!file) | ||
| 131 | return ERR_PTR(-ENFILE); | ||
| 132 | |||
| 133 | file->f_path.mnt = mntget(kvmfs_mnt); | ||
| 134 | file->f_path.dentry = d_alloc_anon(inode); | ||
| 135 | if (!file->f_path.dentry) | ||
| 136 | return ERR_PTR(-ENOMEM); | ||
| 137 | file->f_mapping = inode->i_mapping; | ||
| 138 | |||
| 139 | file->f_pos = 0; | ||
| 140 | file->f_flags = O_RDWR; | ||
| 141 | file->f_op = inode->i_fop; | ||
| 142 | file->f_mode = FMODE_READ | FMODE_WRITE; | ||
| 143 | file->f_version = 0; | ||
| 144 | file->private_data = private_data; | ||
| 145 | return file; | ||
| 146 | } | ||
| 147 | |||
| 98 | unsigned long segment_base(u16 selector) | 148 | unsigned long segment_base(u16 selector) |
| 99 | { | 149 | { |
| 100 | struct descriptor_table gdt; | 150 | struct descriptor_table gdt; |
| @@ -222,13 +272,13 @@ static void vcpu_put(struct kvm_vcpu *vcpu) | |||
| 222 | mutex_unlock(&vcpu->mutex); | 272 | mutex_unlock(&vcpu->mutex); |
| 223 | } | 273 | } |
| 224 | 274 | ||
| 225 | static int kvm_dev_open(struct inode *inode, struct file *filp) | 275 | static struct kvm *kvm_create_vm(void) |
| 226 | { | 276 | { |
| 227 | struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); | 277 | struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); |
| 228 | int i; | 278 | int i; |
| 229 | 279 | ||
| 230 | if (!kvm) | 280 | if (!kvm) |
| 231 | return -ENOMEM; | 281 | return ERR_PTR(-ENOMEM); |
| 232 | 282 | ||
| 233 | spin_lock_init(&kvm->lock); | 283 | spin_lock_init(&kvm->lock); |
| 234 | INIT_LIST_HEAD(&kvm->active_mmu_pages); | 284 | INIT_LIST_HEAD(&kvm->active_mmu_pages); |
| @@ -244,7 +294,11 @@ static int kvm_dev_open(struct inode *inode, struct file *filp) | |||
| 244 | list_add(&kvm->vm_list, &vm_list); | 294 | list_add(&kvm->vm_list, &vm_list); |
| 245 | spin_unlock(&kvm_lock); | 295 | spin_unlock(&kvm_lock); |
| 246 | } | 296 | } |
| 247 | filp->private_data = kvm; | 297 | return kvm; |
| 298 | } | ||
| 299 | |||
| 300 | static int kvm_dev_open(struct inode *inode, struct file *filp) | ||
| 301 | { | ||
| 248 | return 0; | 302 | return 0; |
| 249 | } | 303 | } |
| 250 | 304 | ||
| @@ -300,14 +354,24 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
| 300 | 354 | ||
| 301 | static int kvm_dev_release(struct inode *inode, struct file *filp) | 355 | static int kvm_dev_release(struct inode *inode, struct file *filp) |
| 302 | { | 356 | { |
| 303 | struct kvm *kvm = filp->private_data; | 357 | return 0; |
| 358 | } | ||
| 304 | 359 | ||
| 360 | static void kvm_destroy_vm(struct kvm *kvm) | ||
| 361 | { | ||
| 305 | spin_lock(&kvm_lock); | 362 | spin_lock(&kvm_lock); |
| 306 | list_del(&kvm->vm_list); | 363 | list_del(&kvm->vm_list); |
| 307 | spin_unlock(&kvm_lock); | 364 | spin_unlock(&kvm_lock); |
| 308 | kvm_free_vcpus(kvm); | 365 | kvm_free_vcpus(kvm); |
| 309 | kvm_free_physmem(kvm); | 366 | kvm_free_physmem(kvm); |
| 310 | kfree(kvm); | 367 | kfree(kvm); |
| 368 | } | ||
| 369 | |||
| 370 | static int kvm_vm_release(struct inode *inode, struct file *filp) | ||
| 371 | { | ||
| 372 | struct kvm *kvm = filp->private_data; | ||
| 373 | |||
| 374 | kvm_destroy_vm(kvm); | ||
| 311 | return 0; | 375 | return 0; |
| 312 | } | 376 | } |
| 313 | 377 | ||
| @@ -1900,17 +1964,14 @@ static int kvm_dev_ioctl_debug_guest(struct kvm *kvm, | |||
| 1900 | return r; | 1964 | return r; |
| 1901 | } | 1965 | } |
| 1902 | 1966 | ||
| 1903 | static long kvm_dev_ioctl(struct file *filp, | 1967 | static long kvm_vm_ioctl(struct file *filp, |
| 1904 | unsigned int ioctl, unsigned long arg) | 1968 | unsigned int ioctl, unsigned long arg) |
| 1905 | { | 1969 | { |
| 1906 | struct kvm *kvm = filp->private_data; | 1970 | struct kvm *kvm = filp->private_data; |
| 1907 | void __user *argp = (void __user *)arg; | 1971 | void __user *argp = (void __user *)arg; |
| 1908 | int r = -EINVAL; | 1972 | int r = -EINVAL; |
| 1909 | 1973 | ||
| 1910 | switch (ioctl) { | 1974 | switch (ioctl) { |
| 1911 | case KVM_GET_API_VERSION: | ||
| 1912 | r = KVM_API_VERSION; | ||
| 1913 | break; | ||
| 1914 | case KVM_CREATE_VCPU: | 1975 | case KVM_CREATE_VCPU: |
| 1915 | r = kvm_dev_ioctl_create_vcpu(kvm, arg); | 1976 | r = kvm_dev_ioctl_create_vcpu(kvm, arg); |
| 1916 | if (r) | 1977 | if (r) |
| @@ -2052,6 +2113,107 @@ static long kvm_dev_ioctl(struct file *filp, | |||
| 2052 | case KVM_SET_MSRS: | 2113 | case KVM_SET_MSRS: |
| 2053 | r = msr_io(kvm, argp, do_set_msr, 0); | 2114 | r = msr_io(kvm, argp, do_set_msr, 0); |
| 2054 | break; | 2115 | break; |
| 2116 | default: | ||
| 2117 | ; | ||
| 2118 | } | ||
| 2119 | out: | ||
| 2120 | return r; | ||
| 2121 | } | ||
| 2122 | |||
| 2123 | static struct page *kvm_vm_nopage(struct vm_area_struct *vma, | ||
| 2124 | unsigned long address, | ||
| 2125 | int *type) | ||
| 2126 | { | ||
| 2127 | struct kvm *kvm = vma->vm_file->private_data; | ||
| 2128 | unsigned long pgoff; | ||
| 2129 | struct kvm_memory_slot *slot; | ||
| 2130 | struct page *page; | ||
| 2131 | |||
| 2132 | *type = VM_FAULT_MINOR; | ||
| 2133 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | ||
| 2134 | slot = gfn_to_memslot(kvm, pgoff); | ||
| 2135 | if (!slot) | ||
| 2136 | return NOPAGE_SIGBUS; | ||
| 2137 | page = gfn_to_page(slot, pgoff); | ||
| 2138 | if (!page) | ||
| 2139 | return NOPAGE_SIGBUS; | ||
| 2140 | get_page(page); | ||
| 2141 | return page; | ||
| 2142 | } | ||
| 2143 | |||
| 2144 | static struct vm_operations_struct kvm_vm_vm_ops = { | ||
| 2145 | .nopage = kvm_vm_nopage, | ||
| 2146 | }; | ||
| 2147 | |||
| 2148 | static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) | ||
| 2149 | { | ||
| 2150 | vma->vm_ops = &kvm_vm_vm_ops; | ||
| 2151 | return 0; | ||
| 2152 | } | ||
| 2153 | |||
| 2154 | static struct file_operations kvm_vm_fops = { | ||
| 2155 | .release = kvm_vm_release, | ||
| 2156 | .unlocked_ioctl = kvm_vm_ioctl, | ||
| 2157 | .compat_ioctl = kvm_vm_ioctl, | ||
| 2158 | .mmap = kvm_vm_mmap, | ||
| 2159 | }; | ||
| 2160 | |||
| 2161 | static int kvm_dev_ioctl_create_vm(void) | ||
| 2162 | { | ||
| 2163 | int fd, r; | ||
| 2164 | struct inode *inode; | ||
| 2165 | struct file *file; | ||
| 2166 | struct kvm *kvm; | ||
| 2167 | |||
| 2168 | inode = kvmfs_inode(&kvm_vm_fops); | ||
| 2169 | if (IS_ERR(inode)) { | ||
| 2170 | r = PTR_ERR(inode); | ||
| 2171 | goto out1; | ||
| 2172 | } | ||
| 2173 | |||
| 2174 | kvm = kvm_create_vm(); | ||
| 2175 | if (IS_ERR(kvm)) { | ||
| 2176 | r = PTR_ERR(kvm); | ||
| 2177 | goto out2; | ||
| 2178 | } | ||
| 2179 | |||
| 2180 | file = kvmfs_file(inode, kvm); | ||
| 2181 | if (IS_ERR(file)) { | ||
| 2182 | r = PTR_ERR(file); | ||
| 2183 | goto out3; | ||
| 2184 | } | ||
| 2185 | |||
| 2186 | r = get_unused_fd(); | ||
| 2187 | if (r < 0) | ||
| 2188 | goto out4; | ||
| 2189 | fd = r; | ||
| 2190 | fd_install(fd, file); | ||
| 2191 | |||
| 2192 | return fd; | ||
| 2193 | |||
| 2194 | out4: | ||
| 2195 | fput(file); | ||
| 2196 | out3: | ||
| 2197 | kvm_destroy_vm(kvm); | ||
| 2198 | out2: | ||
| 2199 | iput(inode); | ||
| 2200 | out1: | ||
| 2201 | return r; | ||
| 2202 | } | ||
| 2203 | |||
| 2204 | static long kvm_dev_ioctl(struct file *filp, | ||
| 2205 | unsigned int ioctl, unsigned long arg) | ||
| 2206 | { | ||
| 2207 | void __user *argp = (void __user *)arg; | ||
| 2208 | int r = -EINVAL; | ||
| 2209 | |||
| 2210 | switch (ioctl) { | ||
| 2211 | case KVM_GET_API_VERSION: | ||
| 2212 | r = KVM_API_VERSION; | ||
| 2213 | break; | ||
| 2214 | case KVM_CREATE_VM: | ||
| 2215 | r = kvm_dev_ioctl_create_vm(); | ||
| 2216 | break; | ||
| 2055 | case KVM_GET_MSR_INDEX_LIST: { | 2217 | case KVM_GET_MSR_INDEX_LIST: { |
| 2056 | struct kvm_msr_list __user *user_msr_list = argp; | 2218 | struct kvm_msr_list __user *user_msr_list = argp; |
| 2057 | struct kvm_msr_list msr_list; | 2219 | struct kvm_msr_list msr_list; |
| @@ -2086,43 +2248,11 @@ out: | |||
| 2086 | return r; | 2248 | return r; |
| 2087 | } | 2249 | } |
| 2088 | 2250 | ||
| 2089 | static struct page *kvm_dev_nopage(struct vm_area_struct *vma, | ||
| 2090 | unsigned long address, | ||
| 2091 | int *type) | ||
| 2092 | { | ||
| 2093 | struct kvm *kvm = vma->vm_file->private_data; | ||
| 2094 | unsigned long pgoff; | ||
| 2095 | struct kvm_memory_slot *slot; | ||
| 2096 | struct page *page; | ||
| 2097 | |||
| 2098 | *type = VM_FAULT_MINOR; | ||
| 2099 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | ||
| 2100 | slot = gfn_to_memslot(kvm, pgoff); | ||
| 2101 | if (!slot) | ||
| 2102 | return NOPAGE_SIGBUS; | ||
| 2103 | page = gfn_to_page(slot, pgoff); | ||
| 2104 | if (!page) | ||
| 2105 | return NOPAGE_SIGBUS; | ||
| 2106 | get_page(page); | ||
| 2107 | return page; | ||
| 2108 | } | ||
| 2109 | |||
| 2110 | static struct vm_operations_struct kvm_dev_vm_ops = { | ||
| 2111 | .nopage = kvm_dev_nopage, | ||
| 2112 | }; | ||
| 2113 | |||
| 2114 | static int kvm_dev_mmap(struct file *file, struct vm_area_struct *vma) | ||
| 2115 | { | ||
| 2116 | vma->vm_ops = &kvm_dev_vm_ops; | ||
| 2117 | return 0; | ||
| 2118 | } | ||
| 2119 | |||
| 2120 | static struct file_operations kvm_chardev_ops = { | 2251 | static struct file_operations kvm_chardev_ops = { |
| 2121 | .open = kvm_dev_open, | 2252 | .open = kvm_dev_open, |
| 2122 | .release = kvm_dev_release, | 2253 | .release = kvm_dev_release, |
| 2123 | .unlocked_ioctl = kvm_dev_ioctl, | 2254 | .unlocked_ioctl = kvm_dev_ioctl, |
| 2124 | .compat_ioctl = kvm_dev_ioctl, | 2255 | .compat_ioctl = kvm_dev_ioctl, |
| 2125 | .mmap = kvm_dev_mmap, | ||
| 2126 | }; | 2256 | }; |
| 2127 | 2257 | ||
| 2128 | static struct miscdevice kvm_dev = { | 2258 | static struct miscdevice kvm_dev = { |
