aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen-Ami Yassour <benami@il.ibm.com>2008-07-28 12:26:26 -0400
committerAvi Kivity <avi@qumranet.com>2008-10-15 04:15:18 -0400
commit4d5c5d0fe89c921336b95f5e7e4f529a9df92f53 (patch)
tree7281955b46811fb25283a3b6abb7620630b2b26e
parentcbff90a7caa49507d399c9a55ba4a411e840bfb4 (diff)
KVM: pci device assignment
Based on a patch from: Amit Shah <amit.shah@qumranet.com> This patch adds support for handling PCI devices that are assigned to the guest. The device to be assigned to the guest is registered in the host kernel and interrupt delivery is handled. If a device is already assigned, or the device driver for it is still loaded on the host, the device assignment is failed by conveying a -EBUSY reply to the userspace. Devices that share their interrupt line are not supported at the moment. By itself, this patch will not make devices work within the guest. The VT-d extension is required to enable the device to perform DMA. Another alternative is PVDMA. Signed-off-by: Amit Shah <amit.shah@qumranet.com> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com> Signed-off-by: Weidong Han <weidong.han@intel.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r--arch/x86/kvm/x86.c243
-rw-r--r--include/asm-x86/kvm_host.h16
-rw-r--r--include/linux/kvm.h19
3 files changed, 278 insertions, 0 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 94a216562f10..a97157cc42ae 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4,10 +4,14 @@
4 * derived from drivers/kvm/kvm_main.c 4 * derived from drivers/kvm/kvm_main.c
5 * 5 *
6 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2006 Qumranet, Inc.
7 * Copyright (C) 2008 Qumranet, Inc.
8 * Copyright IBM Corporation, 2008
7 * 9 *
8 * Authors: 10 * Authors:
9 * Avi Kivity <avi@qumranet.com> 11 * Avi Kivity <avi@qumranet.com>
10 * Yaniv Kamay <yaniv@qumranet.com> 12 * Yaniv Kamay <yaniv@qumranet.com>
13 * Amit Shah <amit.shah@qumranet.com>
14 * Ben-Ami Yassour <benami@il.ibm.com>
11 * 15 *
12 * This work is licensed under the terms of the GNU GPL, version 2. See 16 * This work is licensed under the terms of the GNU GPL, version 2. See
13 * the COPYING file in the top-level directory. 17 * the COPYING file in the top-level directory.
@@ -23,8 +27,10 @@
23#include "x86.h" 27#include "x86.h"
24 28
25#include <linux/clocksource.h> 29#include <linux/clocksource.h>
30#include <linux/interrupt.h>
26#include <linux/kvm.h> 31#include <linux/kvm.h>
27#include <linux/fs.h> 32#include <linux/fs.h>
33#include <linux/pci.h>
28#include <linux/vmalloc.h> 34#include <linux/vmalloc.h>
29#include <linux/module.h> 35#include <linux/module.h>
30#include <linux/mman.h> 36#include <linux/mman.h>
@@ -98,6 +104,219 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
98 { NULL } 104 { NULL }
99}; 105};
100 106
107struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
108 int assigned_dev_id)
109{
110 struct list_head *ptr;
111 struct kvm_assigned_dev_kernel *match;
112
113 list_for_each(ptr, head) {
114 match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
115 if (match->assigned_dev_id == assigned_dev_id)
116 return match;
117 }
118 return NULL;
119}
120
121static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
122{
123 struct kvm_assigned_dev_kernel *assigned_dev;
124
125 assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
126 interrupt_work);
127
128 /* This is taken to safely inject irq inside the guest. When
129 * the interrupt injection (or the ioapic code) uses a
130 * finer-grained lock, update this
131 */
132 mutex_lock(&assigned_dev->kvm->lock);
133 kvm_set_irq(assigned_dev->kvm,
134 assigned_dev->guest_irq, 1);
135 mutex_unlock(&assigned_dev->kvm->lock);
136 kvm_put_kvm(assigned_dev->kvm);
137}
138
139/* FIXME: Implement the OR logic needed to make shared interrupts on
140 * this line behave properly
141 */
142static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
143{
144 struct kvm_assigned_dev_kernel *assigned_dev =
145 (struct kvm_assigned_dev_kernel *) dev_id;
146
147 kvm_get_kvm(assigned_dev->kvm);
148 schedule_work(&assigned_dev->interrupt_work);
149 disable_irq_nosync(irq);
150 return IRQ_HANDLED;
151}
152
153/* Ack the irq line for an assigned device */
154static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
155{
156 struct kvm_assigned_dev_kernel *dev;
157
158 if (kian->gsi == -1)
159 return;
160
161 dev = container_of(kian, struct kvm_assigned_dev_kernel,
162 ack_notifier);
163 kvm_set_irq(dev->kvm, dev->guest_irq, 0);
164 enable_irq(dev->host_irq);
165}
166
167static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
168 struct kvm_assigned_irq
169 *assigned_irq)
170{
171 int r = 0;
172 struct kvm_assigned_dev_kernel *match;
173
174 mutex_lock(&kvm->lock);
175
176 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
177 assigned_irq->assigned_dev_id);
178 if (!match) {
179 mutex_unlock(&kvm->lock);
180 return -EINVAL;
181 }
182
183 if (match->irq_requested) {
184 match->guest_irq = assigned_irq->guest_irq;
185 match->ack_notifier.gsi = assigned_irq->guest_irq;
186 mutex_unlock(&kvm->lock);
187 return 0;
188 }
189
190 INIT_WORK(&match->interrupt_work,
191 kvm_assigned_dev_interrupt_work_handler);
192
193 if (irqchip_in_kernel(kvm)) {
194 if (assigned_irq->host_irq)
195 match->host_irq = assigned_irq->host_irq;
196 else
197 match->host_irq = match->dev->irq;
198 match->guest_irq = assigned_irq->guest_irq;
199 match->ack_notifier.gsi = assigned_irq->guest_irq;
200 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
201 kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
202
203 /* Even though this is PCI, we don't want to use shared
204 * interrupts. Sharing host devices with guest-assigned devices
205 * on the same interrupt line is not a happy situation: there
206 * are going to be long delays in accepting, acking, etc.
207 */
208 if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
209 "kvm_assigned_device", (void *)match)) {
210 printk(KERN_INFO "%s: couldn't allocate irq for pv "
211 "device\n", __func__);
212 r = -EIO;
213 goto out;
214 }
215 }
216
217 match->irq_requested = true;
218out:
219 mutex_unlock(&kvm->lock);
220 return r;
221}
222
223static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
224 struct kvm_assigned_pci_dev *assigned_dev)
225{
226 int r = 0;
227 struct kvm_assigned_dev_kernel *match;
228 struct pci_dev *dev;
229
230 mutex_lock(&kvm->lock);
231
232 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
233 assigned_dev->assigned_dev_id);
234 if (match) {
235 /* device already assigned */
236 r = -EINVAL;
237 goto out;
238 }
239
240 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
241 if (match == NULL) {
242 printk(KERN_INFO "%s: Couldn't allocate memory\n",
243 __func__);
244 r = -ENOMEM;
245 goto out;
246 }
247 dev = pci_get_bus_and_slot(assigned_dev->busnr,
248 assigned_dev->devfn);
249 if (!dev) {
250 printk(KERN_INFO "%s: host device not found\n", __func__);
251 r = -EINVAL;
252 goto out_free;
253 }
254 if (pci_enable_device(dev)) {
255 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
256 r = -EBUSY;
257 goto out_put;
258 }
259 r = pci_request_regions(dev, "kvm_assigned_device");
260 if (r) {
261 printk(KERN_INFO "%s: Could not get access to device regions\n",
262 __func__);
263 goto out_disable;
264 }
265 match->assigned_dev_id = assigned_dev->assigned_dev_id;
266 match->host_busnr = assigned_dev->busnr;
267 match->host_devfn = assigned_dev->devfn;
268 match->dev = dev;
269
270 match->kvm = kvm;
271
272 list_add(&match->list, &kvm->arch.assigned_dev_head);
273
274out:
275 mutex_unlock(&kvm->lock);
276 return r;
277out_disable:
278 pci_disable_device(dev);
279out_put:
280 pci_dev_put(dev);
281out_free:
282 kfree(match);
283 mutex_unlock(&kvm->lock);
284 return r;
285}
286
287static void kvm_free_assigned_devices(struct kvm *kvm)
288{
289 struct list_head *ptr, *ptr2;
290 struct kvm_assigned_dev_kernel *assigned_dev;
291
292 list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
293 assigned_dev = list_entry(ptr,
294 struct kvm_assigned_dev_kernel,
295 list);
296
297 if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) {
298 free_irq(assigned_dev->host_irq,
299 (void *)assigned_dev);
300
301 kvm_unregister_irq_ack_notifier(kvm,
302 &assigned_dev->
303 ack_notifier);
304 }
305
306 if (cancel_work_sync(&assigned_dev->interrupt_work))
307 /* We had pending work. That means we will have to take
308 * care of kvm_put_kvm.
309 */
310 kvm_put_kvm(kvm);
311
312 pci_release_regions(assigned_dev->dev);
313 pci_disable_device(assigned_dev->dev);
314 pci_dev_put(assigned_dev->dev);
315
316 list_del(&assigned_dev->list);
317 kfree(assigned_dev);
318 }
319}
101 320
102unsigned long segment_base(u16 selector) 321unsigned long segment_base(u16 selector)
103{ 322{
@@ -1766,6 +1985,28 @@ long kvm_arch_vm_ioctl(struct file *filp,
1766 r = 0; 1985 r = 0;
1767 break; 1986 break;
1768 } 1987 }
1988 case KVM_ASSIGN_PCI_DEVICE: {
1989 struct kvm_assigned_pci_dev assigned_dev;
1990
1991 r = -EFAULT;
1992 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1993 goto out;
1994 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
1995 if (r)
1996 goto out;
1997 break;
1998 }
1999 case KVM_ASSIGN_IRQ: {
2000 struct kvm_assigned_irq assigned_irq;
2001
2002 r = -EFAULT;
2003 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
2004 goto out;
2005 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
2006 if (r)
2007 goto out;
2008 break;
2009 }
1769 case KVM_GET_PIT: { 2010 case KVM_GET_PIT: {
1770 struct kvm_pit_state ps; 2011 struct kvm_pit_state ps;
1771 r = -EFAULT; 2012 r = -EFAULT;
@@ -3945,6 +4186,7 @@ struct kvm *kvm_arch_create_vm(void)
3945 return ERR_PTR(-ENOMEM); 4186 return ERR_PTR(-ENOMEM);
3946 4187
3947 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 4188 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
4189 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
3948 4190
3949 return kvm; 4191 return kvm;
3950} 4192}
@@ -3977,6 +4219,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
3977 4219
3978void kvm_arch_destroy_vm(struct kvm *kvm) 4220void kvm_arch_destroy_vm(struct kvm *kvm)
3979{ 4221{
4222 kvm_free_assigned_devices(kvm);
3980 kvm_free_pit(kvm); 4223 kvm_free_pit(kvm);
3981 kfree(kvm->arch.vpic); 4224 kfree(kvm->arch.vpic);
3982 kfree(kvm->arch.vioapic); 4225 kfree(kvm->arch.vioapic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index d451928fc841..99dddfcecf60 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -327,6 +327,21 @@ struct kvm_irq_ack_notifier {
327 void (*irq_acked)(struct kvm_irq_ack_notifier *kian); 327 void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
328}; 328};
329 329
330struct kvm_assigned_dev_kernel {
331 struct kvm_irq_ack_notifier ack_notifier;
332 struct work_struct interrupt_work;
333 struct list_head list;
334 struct kvm_assigned_pci_dev assigned_dev;
335 int assigned_dev_id;
336 int host_busnr;
337 int host_devfn;
338 int host_irq;
339 int guest_irq;
340 int irq_requested;
341 struct pci_dev *dev;
342 struct kvm *kvm;
343};
344
330struct kvm_arch{ 345struct kvm_arch{
331 int naliases; 346 int naliases;
332 struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; 347 struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -339,6 +354,7 @@ struct kvm_arch{
339 * Hash table of struct kvm_mmu_page. 354 * Hash table of struct kvm_mmu_page.
340 */ 355 */
341 struct list_head active_mmu_pages; 356 struct list_head active_mmu_pages;
357 struct list_head assigned_dev_head;
342 struct kvm_pic *vpic; 358 struct kvm_pic *vpic;
343 struct kvm_ioapic *vioapic; 359 struct kvm_ioapic *vioapic;
344 struct kvm_pit *vpit; 360 struct kvm_pit *vpit;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index d29b64881447..ef4bc6f89778 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -383,6 +383,7 @@ struct kvm_trace_rec {
383#define KVM_CAP_MP_STATE 14 383#define KVM_CAP_MP_STATE 14
384#define KVM_CAP_COALESCED_MMIO 15 384#define KVM_CAP_COALESCED_MMIO 15
385#define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ 385#define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */
386#define KVM_CAP_DEVICE_ASSIGNMENT 17
386 387
387/* 388/*
388 * ioctls for VM fds 389 * ioctls for VM fds
@@ -412,6 +413,10 @@ struct kvm_trace_rec {
412 _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) 413 _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
413#define KVM_UNREGISTER_COALESCED_MMIO \ 414#define KVM_UNREGISTER_COALESCED_MMIO \
414 _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) 415 _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
416#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
417 struct kvm_assigned_pci_dev)
418#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
419 struct kvm_assigned_irq)
415 420
416/* 421/*
417 * ioctls for vcpu fds 422 * ioctls for vcpu fds
@@ -476,4 +481,18 @@ struct kvm_trace_rec {
476#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) 481#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18)
477#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) 482#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19)
478 483
484struct kvm_assigned_pci_dev {
485 __u32 assigned_dev_id;
486 __u32 busnr;
487 __u32 devfn;
488 __u32 flags;
489};
490
491struct kvm_assigned_irq {
492 __u32 assigned_dev_id;
493 __u32 host_irq;
494 __u32 guest_irq;
495 __u32 flags;
496};
497
479#endif 498#endif