aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
authorBen-Ami Yassour <benami@il.ibm.com>2008-07-28 12:26:26 -0400
committerAvi Kivity <avi@qumranet.com>2008-10-15 04:15:18 -0400
commit4d5c5d0fe89c921336b95f5e7e4f529a9df92f53 (patch)
tree7281955b46811fb25283a3b6abb7620630b2b26e /arch/x86/kvm/x86.c
parentcbff90a7caa49507d399c9a55ba4a411e840bfb4 (diff)
KVM: pci device assignment
Based on a patch from: Amit Shah <amit.shah@qumranet.com> This patch adds support for handling PCI devices that are assigned to the guest. The device to be assigned to the guest is registered in the host kernel and interrupt delivery is handled. If a device is already assigned, or the device driver for it is still loaded on the host, the device assignment is failed by conveying a -EBUSY reply to the userspace. Devices that share their interrupt line are not supported at the moment. By itself, this patch will not make devices work within the guest. The VT-d extension is required to enable the device to perform DMA. Another alternative is PVDMA. Signed-off-by: Amit Shah <amit.shah@qumranet.com> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com> Signed-off-by: Weidong Han <weidong.han@intel.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c243
1 files changed, 243 insertions, 0 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 94a216562f10..a97157cc42ae 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4,10 +4,14 @@
4 * derived from drivers/kvm/kvm_main.c 4 * derived from drivers/kvm/kvm_main.c
5 * 5 *
6 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2006 Qumranet, Inc.
7 * Copyright (C) 2008 Qumranet, Inc.
8 * Copyright IBM Corporation, 2008
7 * 9 *
8 * Authors: 10 * Authors:
9 * Avi Kivity <avi@qumranet.com> 11 * Avi Kivity <avi@qumranet.com>
10 * Yaniv Kamay <yaniv@qumranet.com> 12 * Yaniv Kamay <yaniv@qumranet.com>
13 * Amit Shah <amit.shah@qumranet.com>
14 * Ben-Ami Yassour <benami@il.ibm.com>
11 * 15 *
12 * This work is licensed under the terms of the GNU GPL, version 2. See 16 * This work is licensed under the terms of the GNU GPL, version 2. See
13 * the COPYING file in the top-level directory. 17 * the COPYING file in the top-level directory.
@@ -23,8 +27,10 @@
23#include "x86.h" 27#include "x86.h"
24 28
25#include <linux/clocksource.h> 29#include <linux/clocksource.h>
30#include <linux/interrupt.h>
26#include <linux/kvm.h> 31#include <linux/kvm.h>
27#include <linux/fs.h> 32#include <linux/fs.h>
33#include <linux/pci.h>
28#include <linux/vmalloc.h> 34#include <linux/vmalloc.h>
29#include <linux/module.h> 35#include <linux/module.h>
30#include <linux/mman.h> 36#include <linux/mman.h>
@@ -98,6 +104,219 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
98 { NULL } 104 { NULL }
99}; 105};
100 106
107struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
108 int assigned_dev_id)
109{
110 struct list_head *ptr;
111 struct kvm_assigned_dev_kernel *match;
112
113 list_for_each(ptr, head) {
114 match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
115 if (match->assigned_dev_id == assigned_dev_id)
116 return match;
117 }
118 return NULL;
119}
120
121static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
122{
123 struct kvm_assigned_dev_kernel *assigned_dev;
124
125 assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
126 interrupt_work);
127
128 /* This is taken to safely inject irq inside the guest. When
129 * the interrupt injection (or the ioapic code) uses a
130 * finer-grained lock, update this
131 */
132 mutex_lock(&assigned_dev->kvm->lock);
133 kvm_set_irq(assigned_dev->kvm,
134 assigned_dev->guest_irq, 1);
135 mutex_unlock(&assigned_dev->kvm->lock);
136 kvm_put_kvm(assigned_dev->kvm);
137}
138
139/* FIXME: Implement the OR logic needed to make shared interrupts on
140 * this line behave properly
141 */
142static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
143{
144 struct kvm_assigned_dev_kernel *assigned_dev =
145 (struct kvm_assigned_dev_kernel *) dev_id;
146
147 kvm_get_kvm(assigned_dev->kvm);
148 schedule_work(&assigned_dev->interrupt_work);
149 disable_irq_nosync(irq);
150 return IRQ_HANDLED;
151}
152
153/* Ack the irq line for an assigned device */
154static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
155{
156 struct kvm_assigned_dev_kernel *dev;
157
158 if (kian->gsi == -1)
159 return;
160
161 dev = container_of(kian, struct kvm_assigned_dev_kernel,
162 ack_notifier);
163 kvm_set_irq(dev->kvm, dev->guest_irq, 0);
164 enable_irq(dev->host_irq);
165}
166
167static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
168 struct kvm_assigned_irq
169 *assigned_irq)
170{
171 int r = 0;
172 struct kvm_assigned_dev_kernel *match;
173
174 mutex_lock(&kvm->lock);
175
176 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
177 assigned_irq->assigned_dev_id);
178 if (!match) {
179 mutex_unlock(&kvm->lock);
180 return -EINVAL;
181 }
182
183 if (match->irq_requested) {
184 match->guest_irq = assigned_irq->guest_irq;
185 match->ack_notifier.gsi = assigned_irq->guest_irq;
186 mutex_unlock(&kvm->lock);
187 return 0;
188 }
189
190 INIT_WORK(&match->interrupt_work,
191 kvm_assigned_dev_interrupt_work_handler);
192
193 if (irqchip_in_kernel(kvm)) {
194 if (assigned_irq->host_irq)
195 match->host_irq = assigned_irq->host_irq;
196 else
197 match->host_irq = match->dev->irq;
198 match->guest_irq = assigned_irq->guest_irq;
199 match->ack_notifier.gsi = assigned_irq->guest_irq;
200 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
201 kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
202
203 /* Even though this is PCI, we don't want to use shared
204 * interrupts. Sharing host devices with guest-assigned devices
205 * on the same interrupt line is not a happy situation: there
206 * are going to be long delays in accepting, acking, etc.
207 */
208 if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
209 "kvm_assigned_device", (void *)match)) {
210 printk(KERN_INFO "%s: couldn't allocate irq for pv "
211 "device\n", __func__);
212 r = -EIO;
213 goto out;
214 }
215 }
216
217 match->irq_requested = true;
218out:
219 mutex_unlock(&kvm->lock);
220 return r;
221}
222
223static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
224 struct kvm_assigned_pci_dev *assigned_dev)
225{
226 int r = 0;
227 struct kvm_assigned_dev_kernel *match;
228 struct pci_dev *dev;
229
230 mutex_lock(&kvm->lock);
231
232 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
233 assigned_dev->assigned_dev_id);
234 if (match) {
235 /* device already assigned */
236 r = -EINVAL;
237 goto out;
238 }
239
240 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
241 if (match == NULL) {
242 printk(KERN_INFO "%s: Couldn't allocate memory\n",
243 __func__);
244 r = -ENOMEM;
245 goto out;
246 }
247 dev = pci_get_bus_and_slot(assigned_dev->busnr,
248 assigned_dev->devfn);
249 if (!dev) {
250 printk(KERN_INFO "%s: host device not found\n", __func__);
251 r = -EINVAL;
252 goto out_free;
253 }
254 if (pci_enable_device(dev)) {
255 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
256 r = -EBUSY;
257 goto out_put;
258 }
259 r = pci_request_regions(dev, "kvm_assigned_device");
260 if (r) {
261 printk(KERN_INFO "%s: Could not get access to device regions\n",
262 __func__);
263 goto out_disable;
264 }
265 match->assigned_dev_id = assigned_dev->assigned_dev_id;
266 match->host_busnr = assigned_dev->busnr;
267 match->host_devfn = assigned_dev->devfn;
268 match->dev = dev;
269
270 match->kvm = kvm;
271
272 list_add(&match->list, &kvm->arch.assigned_dev_head);
273
274out:
275 mutex_unlock(&kvm->lock);
276 return r;
277out_disable:
278 pci_disable_device(dev);
279out_put:
280 pci_dev_put(dev);
281out_free:
282 kfree(match);
283 mutex_unlock(&kvm->lock);
284 return r;
285}
286
287static void kvm_free_assigned_devices(struct kvm *kvm)
288{
289 struct list_head *ptr, *ptr2;
290 struct kvm_assigned_dev_kernel *assigned_dev;
291
292 list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
293 assigned_dev = list_entry(ptr,
294 struct kvm_assigned_dev_kernel,
295 list);
296
297 if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) {
298 free_irq(assigned_dev->host_irq,
299 (void *)assigned_dev);
300
301 kvm_unregister_irq_ack_notifier(kvm,
302 &assigned_dev->
303 ack_notifier);
304 }
305
306 if (cancel_work_sync(&assigned_dev->interrupt_work))
307 /* We had pending work. That means we will have to take
308 * care of kvm_put_kvm.
309 */
310 kvm_put_kvm(kvm);
311
312 pci_release_regions(assigned_dev->dev);
313 pci_disable_device(assigned_dev->dev);
314 pci_dev_put(assigned_dev->dev);
315
316 list_del(&assigned_dev->list);
317 kfree(assigned_dev);
318 }
319}
101 320
102unsigned long segment_base(u16 selector) 321unsigned long segment_base(u16 selector)
103{ 322{
@@ -1766,6 +1985,28 @@ long kvm_arch_vm_ioctl(struct file *filp,
1766 r = 0; 1985 r = 0;
1767 break; 1986 break;
1768 } 1987 }
1988 case KVM_ASSIGN_PCI_DEVICE: {
1989 struct kvm_assigned_pci_dev assigned_dev;
1990
1991 r = -EFAULT;
1992 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1993 goto out;
1994 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
1995 if (r)
1996 goto out;
1997 break;
1998 }
1999 case KVM_ASSIGN_IRQ: {
2000 struct kvm_assigned_irq assigned_irq;
2001
2002 r = -EFAULT;
2003 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
2004 goto out;
2005 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
2006 if (r)
2007 goto out;
2008 break;
2009 }
1769 case KVM_GET_PIT: { 2010 case KVM_GET_PIT: {
1770 struct kvm_pit_state ps; 2011 struct kvm_pit_state ps;
1771 r = -EFAULT; 2012 r = -EFAULT;
@@ -3945,6 +4186,7 @@ struct kvm *kvm_arch_create_vm(void)
3945 return ERR_PTR(-ENOMEM); 4186 return ERR_PTR(-ENOMEM);
3946 4187
3947 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 4188 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
4189 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
3948 4190
3949 return kvm; 4191 return kvm;
3950} 4192}
@@ -3977,6 +4219,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
3977 4219
3978void kvm_arch_destroy_vm(struct kvm *kvm) 4220void kvm_arch_destroy_vm(struct kvm *kvm)
3979{ 4221{
4222 kvm_free_assigned_devices(kvm);
3980 kvm_free_pit(kvm); 4223 kvm_free_pit(kvm);
3981 kfree(kvm->arch.vpic); 4224 kfree(kvm->arch.vpic);
3982 kfree(kvm->arch.vioapic); 4225 kfree(kvm->arch.vioapic);