diff options
author | Avi Kivity <avi@qumranet.com> | 2007-02-21 12:28:04 -0500 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2007-03-04 04:12:42 -0500 |
commit | f17abe9a44425ff9c9858bc1806cc09d6b5dad1c (patch) | |
tree | 47b060818bbea16dfecf21b8008a99c978a5f0c3 /drivers | |
parent | 37e29d906c6eb1ece907e509160518b2edc2c083 (diff) |
KVM: Create an inode per virtual machine
This avoids having filp->f_op and the corresponding inode->i_fop different,
which is a little unorthodox.
The ioctl list is split into two: global kvm ioctls and per-vm ioctls. A new
ioctl, KVM_CREATE_VM, is used to create VMs and return the VM fd.
Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/kvm/kvm_main.c | 212 |
1 files changed, 171 insertions, 41 deletions
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index c01252e84377..aa07d9c9d20d 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <asm/desc.h> | 36 | #include <asm/desc.h> |
37 | #include <linux/sysdev.h> | 37 | #include <linux/sysdev.h> |
38 | #include <linux/cpu.h> | 38 | #include <linux/cpu.h> |
39 | #include <linux/file.h> | ||
39 | #include <linux/fs.h> | 40 | #include <linux/fs.h> |
40 | #include <linux/mount.h> | 41 | #include <linux/mount.h> |
41 | 42 | ||
@@ -95,6 +96,55 @@ struct segment_descriptor_64 { | |||
95 | 96 | ||
96 | #endif | 97 | #endif |
97 | 98 | ||
99 | static struct inode *kvmfs_inode(struct file_operations *fops) | ||
100 | { | ||
101 | int error = -ENOMEM; | ||
102 | struct inode *inode = new_inode(kvmfs_mnt->mnt_sb); | ||
103 | |||
104 | if (!inode) | ||
105 | goto eexit_1; | ||
106 | |||
107 | inode->i_fop = fops; | ||
108 | |||
109 | /* | ||
110 | * Mark the inode dirty from the very beginning, | ||
111 | * that way it will never be moved to the dirty | ||
112 | * list because mark_inode_dirty() will think | ||
113 | * that it already _is_ on the dirty list. | ||
114 | */ | ||
115 | inode->i_state = I_DIRTY; | ||
116 | inode->i_mode = S_IRUSR | S_IWUSR; | ||
117 | inode->i_uid = current->fsuid; | ||
118 | inode->i_gid = current->fsgid; | ||
119 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
120 | return inode; | ||
121 | |||
122 | eexit_1: | ||
123 | return ERR_PTR(error); | ||
124 | } | ||
125 | |||
126 | static struct file *kvmfs_file(struct inode *inode, void *private_data) | ||
127 | { | ||
128 | struct file *file = get_empty_filp(); | ||
129 | |||
130 | if (!file) | ||
131 | return ERR_PTR(-ENFILE); | ||
132 | |||
133 | file->f_path.mnt = mntget(kvmfs_mnt); | ||
134 | file->f_path.dentry = d_alloc_anon(inode); | ||
135 | if (!file->f_path.dentry) | ||
136 | return ERR_PTR(-ENOMEM); | ||
137 | file->f_mapping = inode->i_mapping; | ||
138 | |||
139 | file->f_pos = 0; | ||
140 | file->f_flags = O_RDWR; | ||
141 | file->f_op = inode->i_fop; | ||
142 | file->f_mode = FMODE_READ | FMODE_WRITE; | ||
143 | file->f_version = 0; | ||
144 | file->private_data = private_data; | ||
145 | return file; | ||
146 | } | ||
147 | |||
98 | unsigned long segment_base(u16 selector) | 148 | unsigned long segment_base(u16 selector) |
99 | { | 149 | { |
100 | struct descriptor_table gdt; | 150 | struct descriptor_table gdt; |
@@ -222,13 +272,13 @@ static void vcpu_put(struct kvm_vcpu *vcpu) | |||
222 | mutex_unlock(&vcpu->mutex); | 272 | mutex_unlock(&vcpu->mutex); |
223 | } | 273 | } |
224 | 274 | ||
225 | static int kvm_dev_open(struct inode *inode, struct file *filp) | 275 | static struct kvm *kvm_create_vm(void) |
226 | { | 276 | { |
227 | struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); | 277 | struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); |
228 | int i; | 278 | int i; |
229 | 279 | ||
230 | if (!kvm) | 280 | if (!kvm) |
231 | return -ENOMEM; | 281 | return ERR_PTR(-ENOMEM); |
232 | 282 | ||
233 | spin_lock_init(&kvm->lock); | 283 | spin_lock_init(&kvm->lock); |
234 | INIT_LIST_HEAD(&kvm->active_mmu_pages); | 284 | INIT_LIST_HEAD(&kvm->active_mmu_pages); |
@@ -244,7 +294,11 @@ static int kvm_dev_open(struct inode *inode, struct file *filp) | |||
244 | list_add(&kvm->vm_list, &vm_list); | 294 | list_add(&kvm->vm_list, &vm_list); |
245 | spin_unlock(&kvm_lock); | 295 | spin_unlock(&kvm_lock); |
246 | } | 296 | } |
247 | filp->private_data = kvm; | 297 | return kvm; |
298 | } | ||
299 | |||
300 | static int kvm_dev_open(struct inode *inode, struct file *filp) | ||
301 | { | ||
248 | return 0; | 302 | return 0; |
249 | } | 303 | } |
250 | 304 | ||
@@ -300,14 +354,24 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
300 | 354 | ||
301 | static int kvm_dev_release(struct inode *inode, struct file *filp) | 355 | static int kvm_dev_release(struct inode *inode, struct file *filp) |
302 | { | 356 | { |
303 | struct kvm *kvm = filp->private_data; | 357 | return 0; |
358 | } | ||
304 | 359 | ||
360 | static void kvm_destroy_vm(struct kvm *kvm) | ||
361 | { | ||
305 | spin_lock(&kvm_lock); | 362 | spin_lock(&kvm_lock); |
306 | list_del(&kvm->vm_list); | 363 | list_del(&kvm->vm_list); |
307 | spin_unlock(&kvm_lock); | 364 | spin_unlock(&kvm_lock); |
308 | kvm_free_vcpus(kvm); | 365 | kvm_free_vcpus(kvm); |
309 | kvm_free_physmem(kvm); | 366 | kvm_free_physmem(kvm); |
310 | kfree(kvm); | 367 | kfree(kvm); |
368 | } | ||
369 | |||
370 | static int kvm_vm_release(struct inode *inode, struct file *filp) | ||
371 | { | ||
372 | struct kvm *kvm = filp->private_data; | ||
373 | |||
374 | kvm_destroy_vm(kvm); | ||
311 | return 0; | 375 | return 0; |
312 | } | 376 | } |
313 | 377 | ||
@@ -1900,17 +1964,14 @@ static int kvm_dev_ioctl_debug_guest(struct kvm *kvm, | |||
1900 | return r; | 1964 | return r; |
1901 | } | 1965 | } |
1902 | 1966 | ||
1903 | static long kvm_dev_ioctl(struct file *filp, | 1967 | static long kvm_vm_ioctl(struct file *filp, |
1904 | unsigned int ioctl, unsigned long arg) | 1968 | unsigned int ioctl, unsigned long arg) |
1905 | { | 1969 | { |
1906 | struct kvm *kvm = filp->private_data; | 1970 | struct kvm *kvm = filp->private_data; |
1907 | void __user *argp = (void __user *)arg; | 1971 | void __user *argp = (void __user *)arg; |
1908 | int r = -EINVAL; | 1972 | int r = -EINVAL; |
1909 | 1973 | ||
1910 | switch (ioctl) { | 1974 | switch (ioctl) { |
1911 | case KVM_GET_API_VERSION: | ||
1912 | r = KVM_API_VERSION; | ||
1913 | break; | ||
1914 | case KVM_CREATE_VCPU: | 1975 | case KVM_CREATE_VCPU: |
1915 | r = kvm_dev_ioctl_create_vcpu(kvm, arg); | 1976 | r = kvm_dev_ioctl_create_vcpu(kvm, arg); |
1916 | if (r) | 1977 | if (r) |
@@ -2052,6 +2113,107 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2052 | case KVM_SET_MSRS: | 2113 | case KVM_SET_MSRS: |
2053 | r = msr_io(kvm, argp, do_set_msr, 0); | 2114 | r = msr_io(kvm, argp, do_set_msr, 0); |
2054 | break; | 2115 | break; |
2116 | default: | ||
2117 | ; | ||
2118 | } | ||
2119 | out: | ||
2120 | return r; | ||
2121 | } | ||
2122 | |||
2123 | static struct page *kvm_vm_nopage(struct vm_area_struct *vma, | ||
2124 | unsigned long address, | ||
2125 | int *type) | ||
2126 | { | ||
2127 | struct kvm *kvm = vma->vm_file->private_data; | ||
2128 | unsigned long pgoff; | ||
2129 | struct kvm_memory_slot *slot; | ||
2130 | struct page *page; | ||
2131 | |||
2132 | *type = VM_FAULT_MINOR; | ||
2133 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | ||
2134 | slot = gfn_to_memslot(kvm, pgoff); | ||
2135 | if (!slot) | ||
2136 | return NOPAGE_SIGBUS; | ||
2137 | page = gfn_to_page(slot, pgoff); | ||
2138 | if (!page) | ||
2139 | return NOPAGE_SIGBUS; | ||
2140 | get_page(page); | ||
2141 | return page; | ||
2142 | } | ||
2143 | |||
2144 | static struct vm_operations_struct kvm_vm_vm_ops = { | ||
2145 | .nopage = kvm_vm_nopage, | ||
2146 | }; | ||
2147 | |||
2148 | static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) | ||
2149 | { | ||
2150 | vma->vm_ops = &kvm_vm_vm_ops; | ||
2151 | return 0; | ||
2152 | } | ||
2153 | |||
2154 | static struct file_operations kvm_vm_fops = { | ||
2155 | .release = kvm_vm_release, | ||
2156 | .unlocked_ioctl = kvm_vm_ioctl, | ||
2157 | .compat_ioctl = kvm_vm_ioctl, | ||
2158 | .mmap = kvm_vm_mmap, | ||
2159 | }; | ||
2160 | |||
2161 | static int kvm_dev_ioctl_create_vm(void) | ||
2162 | { | ||
2163 | int fd, r; | ||
2164 | struct inode *inode; | ||
2165 | struct file *file; | ||
2166 | struct kvm *kvm; | ||
2167 | |||
2168 | inode = kvmfs_inode(&kvm_vm_fops); | ||
2169 | if (IS_ERR(inode)) { | ||
2170 | r = PTR_ERR(inode); | ||
2171 | goto out1; | ||
2172 | } | ||
2173 | |||
2174 | kvm = kvm_create_vm(); | ||
2175 | if (IS_ERR(kvm)) { | ||
2176 | r = PTR_ERR(kvm); | ||
2177 | goto out2; | ||
2178 | } | ||
2179 | |||
2180 | file = kvmfs_file(inode, kvm); | ||
2181 | if (IS_ERR(file)) { | ||
2182 | r = PTR_ERR(file); | ||
2183 | goto out3; | ||
2184 | } | ||
2185 | |||
2186 | r = get_unused_fd(); | ||
2187 | if (r < 0) | ||
2188 | goto out4; | ||
2189 | fd = r; | ||
2190 | fd_install(fd, file); | ||
2191 | |||
2192 | return fd; | ||
2193 | |||
2194 | out4: | ||
2195 | fput(file); | ||
2196 | out3: | ||
2197 | kvm_destroy_vm(kvm); | ||
2198 | out2: | ||
2199 | iput(inode); | ||
2200 | out1: | ||
2201 | return r; | ||
2202 | } | ||
2203 | |||
2204 | static long kvm_dev_ioctl(struct file *filp, | ||
2205 | unsigned int ioctl, unsigned long arg) | ||
2206 | { | ||
2207 | void __user *argp = (void __user *)arg; | ||
2208 | int r = -EINVAL; | ||
2209 | |||
2210 | switch (ioctl) { | ||
2211 | case KVM_GET_API_VERSION: | ||
2212 | r = KVM_API_VERSION; | ||
2213 | break; | ||
2214 | case KVM_CREATE_VM: | ||
2215 | r = kvm_dev_ioctl_create_vm(); | ||
2216 | break; | ||
2055 | case KVM_GET_MSR_INDEX_LIST: { | 2217 | case KVM_GET_MSR_INDEX_LIST: { |
2056 | struct kvm_msr_list __user *user_msr_list = argp; | 2218 | struct kvm_msr_list __user *user_msr_list = argp; |
2057 | struct kvm_msr_list msr_list; | 2219 | struct kvm_msr_list msr_list; |
@@ -2086,43 +2248,11 @@ out: | |||
2086 | return r; | 2248 | return r; |
2087 | } | 2249 | } |
2088 | 2250 | ||
2089 | static struct page *kvm_dev_nopage(struct vm_area_struct *vma, | ||
2090 | unsigned long address, | ||
2091 | int *type) | ||
2092 | { | ||
2093 | struct kvm *kvm = vma->vm_file->private_data; | ||
2094 | unsigned long pgoff; | ||
2095 | struct kvm_memory_slot *slot; | ||
2096 | struct page *page; | ||
2097 | |||
2098 | *type = VM_FAULT_MINOR; | ||
2099 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | ||
2100 | slot = gfn_to_memslot(kvm, pgoff); | ||
2101 | if (!slot) | ||
2102 | return NOPAGE_SIGBUS; | ||
2103 | page = gfn_to_page(slot, pgoff); | ||
2104 | if (!page) | ||
2105 | return NOPAGE_SIGBUS; | ||
2106 | get_page(page); | ||
2107 | return page; | ||
2108 | } | ||
2109 | |||
2110 | static struct vm_operations_struct kvm_dev_vm_ops = { | ||
2111 | .nopage = kvm_dev_nopage, | ||
2112 | }; | ||
2113 | |||
2114 | static int kvm_dev_mmap(struct file *file, struct vm_area_struct *vma) | ||
2115 | { | ||
2116 | vma->vm_ops = &kvm_dev_vm_ops; | ||
2117 | return 0; | ||
2118 | } | ||
2119 | |||
2120 | static struct file_operations kvm_chardev_ops = { | 2251 | static struct file_operations kvm_chardev_ops = { |
2121 | .open = kvm_dev_open, | 2252 | .open = kvm_dev_open, |
2122 | .release = kvm_dev_release, | 2253 | .release = kvm_dev_release, |
2123 | .unlocked_ioctl = kvm_dev_ioctl, | 2254 | .unlocked_ioctl = kvm_dev_ioctl, |
2124 | .compat_ioctl = kvm_dev_ioctl, | 2255 | .compat_ioctl = kvm_dev_ioctl, |
2125 | .mmap = kvm_dev_mmap, | ||
2126 | }; | 2256 | }; |
2127 | 2257 | ||
2128 | static struct miscdevice kvm_dev = { | 2258 | static struct miscdevice kvm_dev = { |