aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2008-10-19 22:17:52 -0400
committerPaul Mundt <lethal@linux-sh.org>2008-10-19 22:17:52 -0400
commit4cb40f795af36b3deb743f6ccf6c3fd542c61c8d (patch)
treedb3d7519932549bf528f5b8e4cb8350356cd544d /virt/kvm
parent79ed2a9216dd3cc35c4f2c5dbaddadb195af83ac (diff)
parent0cfd81031a26717fe14380d18275f8e217571615 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: Documentation/kernel-parameters.txt arch/sh/include/asm/elf.h
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/ioapic.c22
-rw-r--r--virt/kvm/ioapic.h10
-rw-r--r--virt/kvm/irq_comm.c60
-rw-r--r--virt/kvm/kvm_main.c382
-rw-r--r--virt/kvm/kvm_trace.c30
-rw-r--r--virt/kvm/vtd.c191
6 files changed, 620 insertions, 75 deletions
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index c0d22870ee9c..53772bb46320 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -39,6 +39,7 @@
39 39
40#include "ioapic.h" 40#include "ioapic.h"
41#include "lapic.h" 41#include "lapic.h"
42#include "irq.h"
42 43
43#if 0 44#if 0
44#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) 45#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
@@ -285,26 +286,31 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
285 } 286 }
286} 287}
287 288
288static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi) 289static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi,
290 int trigger_mode)
289{ 291{
290 union ioapic_redir_entry *ent; 292 union ioapic_redir_entry *ent;
291 293
292 ent = &ioapic->redirtbl[gsi]; 294 ent = &ioapic->redirtbl[gsi];
293 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
294 295
295 ent->fields.remote_irr = 0; 296 kvm_notify_acked_irq(ioapic->kvm, gsi);
296 if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) 297
297 ioapic_service(ioapic, gsi); 298 if (trigger_mode == IOAPIC_LEVEL_TRIG) {
299 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
300 ent->fields.remote_irr = 0;
301 if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
302 ioapic_service(ioapic, gsi);
303 }
298} 304}
299 305
300void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) 306void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
301{ 307{
302 struct kvm_ioapic *ioapic = kvm->arch.vioapic; 308 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
303 int i; 309 int i;
304 310
305 for (i = 0; i < IOAPIC_NUM_PINS; i++) 311 for (i = 0; i < IOAPIC_NUM_PINS; i++)
306 if (ioapic->redirtbl[i].fields.vector == vector) 312 if (ioapic->redirtbl[i].fields.vector == vector)
307 __kvm_ioapic_update_eoi(ioapic, i); 313 __kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
308} 314}
309 315
310static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr, 316static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr,
@@ -380,7 +386,7 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
380 break; 386 break;
381#ifdef CONFIG_IA64 387#ifdef CONFIG_IA64
382 case IOAPIC_REG_EOI: 388 case IOAPIC_REG_EOI:
383 kvm_ioapic_update_eoi(ioapic->kvm, data); 389 kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG);
384 break; 390 break;
385#endif 391#endif
386 392
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 7f16675fe783..cd7ae7691c9d 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -58,6 +58,7 @@ struct kvm_ioapic {
58 } redirtbl[IOAPIC_NUM_PINS]; 58 } redirtbl[IOAPIC_NUM_PINS];
59 struct kvm_io_device dev; 59 struct kvm_io_device dev;
60 struct kvm *kvm; 60 struct kvm *kvm;
61 void (*ack_notifier)(void *opaque, int irq);
61}; 62};
62 63
63#ifdef DEBUG 64#ifdef DEBUG
@@ -78,16 +79,9 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
78 return kvm->arch.vioapic; 79 return kvm->arch.vioapic;
79} 80}
80 81
81#ifdef CONFIG_IA64
82static inline int irqchip_in_kernel(struct kvm *kvm)
83{
84 return 1;
85}
86#endif
87
88struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, 82struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
89 unsigned long bitmap); 83 unsigned long bitmap);
90void kvm_ioapic_update_eoi(struct kvm *kvm, int vector); 84void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
91int kvm_ioapic_init(struct kvm *kvm); 85int kvm_ioapic_init(struct kvm *kvm);
92void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); 86void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
93void kvm_ioapic_reset(struct kvm_ioapic *ioapic); 87void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
new file mode 100644
index 000000000000..d0169f5e6047
--- /dev/null
+++ b/virt/kvm/irq_comm.c
@@ -0,0 +1,60 @@
1/*
2 * irq_comm.c: Common API for in kernel interrupt controller
3 * Copyright (c) 2007, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Authors:
18 * Yaozu (Eddie) Dong <Eddie.dong@intel.com>
19 *
20 */
21
22#include <linux/kvm_host.h>
23#include "irq.h"
24
25#include "ioapic.h"
26
27/* This should be called with the kvm->lock mutex held */
28void kvm_set_irq(struct kvm *kvm, int irq, int level)
29{
30 /* Not possible to detect if the guest uses the PIC or the
31 * IOAPIC. So set the bit in both. The guest will ignore
32 * writes to the unused one.
33 */
34 kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level);
35#ifdef CONFIG_X86
36 kvm_pic_set_irq(pic_irqchip(kvm), irq, level);
37#endif
38}
39
40void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
41{
42 struct kvm_irq_ack_notifier *kian;
43 struct hlist_node *n;
44
45 hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
46 if (kian->gsi == gsi)
47 kian->irq_acked(kian);
48}
49
50void kvm_register_irq_ack_notifier(struct kvm *kvm,
51 struct kvm_irq_ack_notifier *kian)
52{
53 hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
54}
55
56void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
57 struct kvm_irq_ack_notifier *kian)
58{
59 hlist_del(&kian->link);
60}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7dd9b0b85e4e..cf0ab8ed3845 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -51,6 +51,12 @@
51#include "coalesced_mmio.h" 51#include "coalesced_mmio.h"
52#endif 52#endif
53 53
54#ifdef KVM_CAP_DEVICE_ASSIGNMENT
55#include <linux/pci.h>
56#include <linux/interrupt.h>
57#include "irq.h"
58#endif
59
54MODULE_AUTHOR("Qumranet"); 60MODULE_AUTHOR("Qumranet");
55MODULE_LICENSE("GPL"); 61MODULE_LICENSE("GPL");
56 62
@@ -71,11 +77,253 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
71 77
72bool kvm_rebooting; 78bool kvm_rebooting;
73 79
80#ifdef KVM_CAP_DEVICE_ASSIGNMENT
81static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
82 int assigned_dev_id)
83{
84 struct list_head *ptr;
85 struct kvm_assigned_dev_kernel *match;
86
87 list_for_each(ptr, head) {
88 match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
89 if (match->assigned_dev_id == assigned_dev_id)
90 return match;
91 }
92 return NULL;
93}
94
95static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
96{
97 struct kvm_assigned_dev_kernel *assigned_dev;
98
99 assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
100 interrupt_work);
101
102 /* This is taken to safely inject irq inside the guest. When
103 * the interrupt injection (or the ioapic code) uses a
104 * finer-grained lock, update this
105 */
106 mutex_lock(&assigned_dev->kvm->lock);
107 kvm_set_irq(assigned_dev->kvm,
108 assigned_dev->guest_irq, 1);
109 mutex_unlock(&assigned_dev->kvm->lock);
110 kvm_put_kvm(assigned_dev->kvm);
111}
112
113/* FIXME: Implement the OR logic needed to make shared interrupts on
114 * this line behave properly
115 */
116static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
117{
118 struct kvm_assigned_dev_kernel *assigned_dev =
119 (struct kvm_assigned_dev_kernel *) dev_id;
120
121 kvm_get_kvm(assigned_dev->kvm);
122 schedule_work(&assigned_dev->interrupt_work);
123 disable_irq_nosync(irq);
124 return IRQ_HANDLED;
125}
126
127/* Ack the irq line for an assigned device */
128static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
129{
130 struct kvm_assigned_dev_kernel *dev;
131
132 if (kian->gsi == -1)
133 return;
134
135 dev = container_of(kian, struct kvm_assigned_dev_kernel,
136 ack_notifier);
137 kvm_set_irq(dev->kvm, dev->guest_irq, 0);
138 enable_irq(dev->host_irq);
139}
140
141static void kvm_free_assigned_device(struct kvm *kvm,
142 struct kvm_assigned_dev_kernel
143 *assigned_dev)
144{
145 if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
146 free_irq(assigned_dev->host_irq, (void *)assigned_dev);
147
148 kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
149
150 if (cancel_work_sync(&assigned_dev->interrupt_work))
151 /* We had pending work. That means we will have to take
152 * care of kvm_put_kvm.
153 */
154 kvm_put_kvm(kvm);
155
156 pci_release_regions(assigned_dev->dev);
157 pci_disable_device(assigned_dev->dev);
158 pci_dev_put(assigned_dev->dev);
159
160 list_del(&assigned_dev->list);
161 kfree(assigned_dev);
162}
163
164void kvm_free_all_assigned_devices(struct kvm *kvm)
165{
166 struct list_head *ptr, *ptr2;
167 struct kvm_assigned_dev_kernel *assigned_dev;
168
169 list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
170 assigned_dev = list_entry(ptr,
171 struct kvm_assigned_dev_kernel,
172 list);
173
174 kvm_free_assigned_device(kvm, assigned_dev);
175 }
176}
177
178static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
179 struct kvm_assigned_irq
180 *assigned_irq)
181{
182 int r = 0;
183 struct kvm_assigned_dev_kernel *match;
184
185 mutex_lock(&kvm->lock);
186
187 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
188 assigned_irq->assigned_dev_id);
189 if (!match) {
190 mutex_unlock(&kvm->lock);
191 return -EINVAL;
192 }
193
194 if (match->irq_requested) {
195 match->guest_irq = assigned_irq->guest_irq;
196 match->ack_notifier.gsi = assigned_irq->guest_irq;
197 mutex_unlock(&kvm->lock);
198 return 0;
199 }
200
201 INIT_WORK(&match->interrupt_work,
202 kvm_assigned_dev_interrupt_work_handler);
203
204 if (irqchip_in_kernel(kvm)) {
205 if (!capable(CAP_SYS_RAWIO)) {
206 r = -EPERM;
207 goto out_release;
208 }
209
210 if (assigned_irq->host_irq)
211 match->host_irq = assigned_irq->host_irq;
212 else
213 match->host_irq = match->dev->irq;
214 match->guest_irq = assigned_irq->guest_irq;
215 match->ack_notifier.gsi = assigned_irq->guest_irq;
216 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
217 kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
218
219 /* Even though this is PCI, we don't want to use shared
220 * interrupts. Sharing host devices with guest-assigned devices
221 * on the same interrupt line is not a happy situation: there
222 * are going to be long delays in accepting, acking, etc.
223 */
224 if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
225 "kvm_assigned_device", (void *)match)) {
226 r = -EIO;
227 goto out_release;
228 }
229 }
230
231 match->irq_requested = true;
232 mutex_unlock(&kvm->lock);
233 return r;
234out_release:
235 mutex_unlock(&kvm->lock);
236 kvm_free_assigned_device(kvm, match);
237 return r;
238}
239
240static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
241 struct kvm_assigned_pci_dev *assigned_dev)
242{
243 int r = 0;
244 struct kvm_assigned_dev_kernel *match;
245 struct pci_dev *dev;
246
247 mutex_lock(&kvm->lock);
248
249 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
250 assigned_dev->assigned_dev_id);
251 if (match) {
252 /* device already assigned */
253 r = -EINVAL;
254 goto out;
255 }
256
257 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
258 if (match == NULL) {
259 printk(KERN_INFO "%s: Couldn't allocate memory\n",
260 __func__);
261 r = -ENOMEM;
262 goto out;
263 }
264 dev = pci_get_bus_and_slot(assigned_dev->busnr,
265 assigned_dev->devfn);
266 if (!dev) {
267 printk(KERN_INFO "%s: host device not found\n", __func__);
268 r = -EINVAL;
269 goto out_free;
270 }
271 if (pci_enable_device(dev)) {
272 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
273 r = -EBUSY;
274 goto out_put;
275 }
276 r = pci_request_regions(dev, "kvm_assigned_device");
277 if (r) {
278 printk(KERN_INFO "%s: Could not get access to device regions\n",
279 __func__);
280 goto out_disable;
281 }
282 match->assigned_dev_id = assigned_dev->assigned_dev_id;
283 match->host_busnr = assigned_dev->busnr;
284 match->host_devfn = assigned_dev->devfn;
285 match->dev = dev;
286
287 match->kvm = kvm;
288
289 list_add(&match->list, &kvm->arch.assigned_dev_head);
290
291 if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
292 r = kvm_iommu_map_guest(kvm, match);
293 if (r)
294 goto out_list_del;
295 }
296
297out:
298 mutex_unlock(&kvm->lock);
299 return r;
300out_list_del:
301 list_del(&match->list);
302 pci_release_regions(dev);
303out_disable:
304 pci_disable_device(dev);
305out_put:
306 pci_dev_put(dev);
307out_free:
308 kfree(match);
309 mutex_unlock(&kvm->lock);
310 return r;
311}
312#endif
313
74static inline int valid_vcpu(int n) 314static inline int valid_vcpu(int n)
75{ 315{
76 return likely(n >= 0 && n < KVM_MAX_VCPUS); 316 return likely(n >= 0 && n < KVM_MAX_VCPUS);
77} 317}
78 318
319inline int kvm_is_mmio_pfn(pfn_t pfn)
320{
321 if (pfn_valid(pfn))
322 return PageReserved(pfn_to_page(pfn));
323
324 return true;
325}
326
79/* 327/*
80 * Switches to specified vcpu, until a matching vcpu_put() 328 * Switches to specified vcpu, until a matching vcpu_put()
81 */ 329 */
@@ -570,6 +818,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
570 } 818 }
571 819
572 kvm_free_physmem_slot(&old, &new); 820 kvm_free_physmem_slot(&old, &new);
821#ifdef CONFIG_DMAR
822 /* map the pages in iommu page table */
823 r = kvm_iommu_map_pages(kvm, base_gfn, npages);
824 if (r)
825 goto out;
826#endif
573 return 0; 827 return 0;
574 828
575out_free: 829out_free:
@@ -708,9 +962,6 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
708} 962}
709EXPORT_SYMBOL_GPL(gfn_to_hva); 963EXPORT_SYMBOL_GPL(gfn_to_hva);
710 964
711/*
712 * Requires current->mm->mmap_sem to be held
713 */
714pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 965pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
715{ 966{
716 struct page *page[1]; 967 struct page *page[1];
@@ -726,21 +977,24 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
726 return page_to_pfn(bad_page); 977 return page_to_pfn(bad_page);
727 } 978 }
728 979
729 npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, 980 npages = get_user_pages_fast(addr, 1, 1, page);
730 NULL);
731 981
732 if (unlikely(npages != 1)) { 982 if (unlikely(npages != 1)) {
733 struct vm_area_struct *vma; 983 struct vm_area_struct *vma;
734 984
985 down_read(&current->mm->mmap_sem);
735 vma = find_vma(current->mm, addr); 986 vma = find_vma(current->mm, addr);
987
736 if (vma == NULL || addr < vma->vm_start || 988 if (vma == NULL || addr < vma->vm_start ||
737 !(vma->vm_flags & VM_PFNMAP)) { 989 !(vma->vm_flags & VM_PFNMAP)) {
990 up_read(&current->mm->mmap_sem);
738 get_page(bad_page); 991 get_page(bad_page);
739 return page_to_pfn(bad_page); 992 return page_to_pfn(bad_page);
740 } 993 }
741 994
742 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 995 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
743 BUG_ON(pfn_valid(pfn)); 996 up_read(&current->mm->mmap_sem);
997 BUG_ON(!kvm_is_mmio_pfn(pfn));
744 } else 998 } else
745 pfn = page_to_pfn(page[0]); 999 pfn = page_to_pfn(page[0]);
746 1000
@@ -754,10 +1008,10 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
754 pfn_t pfn; 1008 pfn_t pfn;
755 1009
756 pfn = gfn_to_pfn(kvm, gfn); 1010 pfn = gfn_to_pfn(kvm, gfn);
757 if (pfn_valid(pfn)) 1011 if (!kvm_is_mmio_pfn(pfn))
758 return pfn_to_page(pfn); 1012 return pfn_to_page(pfn);
759 1013
760 WARN_ON(!pfn_valid(pfn)); 1014 WARN_ON(kvm_is_mmio_pfn(pfn));
761 1015
762 get_page(bad_page); 1016 get_page(bad_page);
763 return bad_page; 1017 return bad_page;
@@ -773,7 +1027,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);
773 1027
774void kvm_release_pfn_clean(pfn_t pfn) 1028void kvm_release_pfn_clean(pfn_t pfn)
775{ 1029{
776 if (pfn_valid(pfn)) 1030 if (!kvm_is_mmio_pfn(pfn))
777 put_page(pfn_to_page(pfn)); 1031 put_page(pfn_to_page(pfn));
778} 1032}
779EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); 1033EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
@@ -799,7 +1053,7 @@ EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
799 1053
800void kvm_set_pfn_dirty(pfn_t pfn) 1054void kvm_set_pfn_dirty(pfn_t pfn)
801{ 1055{
802 if (pfn_valid(pfn)) { 1056 if (!kvm_is_mmio_pfn(pfn)) {
803 struct page *page = pfn_to_page(pfn); 1057 struct page *page = pfn_to_page(pfn);
804 if (!PageReserved(page)) 1058 if (!PageReserved(page))
805 SetPageDirty(page); 1059 SetPageDirty(page);
@@ -809,14 +1063,14 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
809 1063
810void kvm_set_pfn_accessed(pfn_t pfn) 1064void kvm_set_pfn_accessed(pfn_t pfn)
811{ 1065{
812 if (pfn_valid(pfn)) 1066 if (!kvm_is_mmio_pfn(pfn))
813 mark_page_accessed(pfn_to_page(pfn)); 1067 mark_page_accessed(pfn_to_page(pfn));
814} 1068}
815EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); 1069EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
816 1070
817void kvm_get_pfn(pfn_t pfn) 1071void kvm_get_pfn(pfn_t pfn)
818{ 1072{
819 if (pfn_valid(pfn)) 1073 if (!kvm_is_mmio_pfn(pfn))
820 get_page(pfn_to_page(pfn)); 1074 get_page(pfn_to_page(pfn));
821} 1075}
822EXPORT_SYMBOL_GPL(kvm_get_pfn); 1076EXPORT_SYMBOL_GPL(kvm_get_pfn);
@@ -972,12 +1226,12 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
972 for (;;) { 1226 for (;;) {
973 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1227 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
974 1228
975 if (kvm_cpu_has_interrupt(vcpu)) 1229 if (kvm_cpu_has_interrupt(vcpu) ||
976 break; 1230 kvm_cpu_has_pending_timer(vcpu) ||
977 if (kvm_cpu_has_pending_timer(vcpu)) 1231 kvm_arch_vcpu_runnable(vcpu)) {
978 break; 1232 set_bit(KVM_REQ_UNHALT, &vcpu->requests);
979 if (kvm_arch_vcpu_runnable(vcpu))
980 break; 1233 break;
1234 }
981 if (signal_pending(current)) 1235 if (signal_pending(current))
982 break; 1236 break;
983 1237
@@ -1074,12 +1328,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
1074 1328
1075 r = kvm_arch_vcpu_setup(vcpu); 1329 r = kvm_arch_vcpu_setup(vcpu);
1076 if (r) 1330 if (r)
1077 goto vcpu_destroy; 1331 return r;
1078 1332
1079 mutex_lock(&kvm->lock); 1333 mutex_lock(&kvm->lock);
1080 if (kvm->vcpus[n]) { 1334 if (kvm->vcpus[n]) {
1081 r = -EEXIST; 1335 r = -EEXIST;
1082 mutex_unlock(&kvm->lock);
1083 goto vcpu_destroy; 1336 goto vcpu_destroy;
1084 } 1337 }
1085 kvm->vcpus[n] = vcpu; 1338 kvm->vcpus[n] = vcpu;
@@ -1095,8 +1348,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
1095unlink: 1348unlink:
1096 mutex_lock(&kvm->lock); 1349 mutex_lock(&kvm->lock);
1097 kvm->vcpus[n] = NULL; 1350 kvm->vcpus[n] = NULL;
1098 mutex_unlock(&kvm->lock);
1099vcpu_destroy: 1351vcpu_destroy:
1352 mutex_unlock(&kvm->lock);
1100 kvm_arch_vcpu_destroy(vcpu); 1353 kvm_arch_vcpu_destroy(vcpu);
1101 return r; 1354 return r;
1102} 1355}
@@ -1118,6 +1371,8 @@ static long kvm_vcpu_ioctl(struct file *filp,
1118 struct kvm_vcpu *vcpu = filp->private_data; 1371 struct kvm_vcpu *vcpu = filp->private_data;
1119 void __user *argp = (void __user *)arg; 1372 void __user *argp = (void __user *)arg;
1120 int r; 1373 int r;
1374 struct kvm_fpu *fpu = NULL;
1375 struct kvm_sregs *kvm_sregs = NULL;
1121 1376
1122 if (vcpu->kvm->mm != current->mm) 1377 if (vcpu->kvm->mm != current->mm)
1123 return -EIO; 1378 return -EIO;
@@ -1165,25 +1420,28 @@ out_free2:
1165 break; 1420 break;
1166 } 1421 }
1167 case KVM_GET_SREGS: { 1422 case KVM_GET_SREGS: {
1168 struct kvm_sregs kvm_sregs; 1423 kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
1169 1424 r = -ENOMEM;
1170 memset(&kvm_sregs, 0, sizeof kvm_sregs); 1425 if (!kvm_sregs)
1171 r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs); 1426 goto out;
1427 r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs);
1172 if (r) 1428 if (r)
1173 goto out; 1429 goto out;
1174 r = -EFAULT; 1430 r = -EFAULT;
1175 if (copy_to_user(argp, &kvm_sregs, sizeof kvm_sregs)) 1431 if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs)))
1176 goto out; 1432 goto out;
1177 r = 0; 1433 r = 0;
1178 break; 1434 break;
1179 } 1435 }
1180 case KVM_SET_SREGS: { 1436 case KVM_SET_SREGS: {
1181 struct kvm_sregs kvm_sregs; 1437 kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
1182 1438 r = -ENOMEM;
1439 if (!kvm_sregs)
1440 goto out;
1183 r = -EFAULT; 1441 r = -EFAULT;
1184 if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs)) 1442 if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs)))
1185 goto out; 1443 goto out;
1186 r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs); 1444 r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs);
1187 if (r) 1445 if (r)
1188 goto out; 1446 goto out;
1189 r = 0; 1447 r = 0;
@@ -1264,25 +1522,28 @@ out_free2:
1264 break; 1522 break;
1265 } 1523 }
1266 case KVM_GET_FPU: { 1524 case KVM_GET_FPU: {
1267 struct kvm_fpu fpu; 1525 fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
1268 1526 r = -ENOMEM;
1269 memset(&fpu, 0, sizeof fpu); 1527 if (!fpu)
1270 r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, &fpu); 1528 goto out;
1529 r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu);
1271 if (r) 1530 if (r)
1272 goto out; 1531 goto out;
1273 r = -EFAULT; 1532 r = -EFAULT;
1274 if (copy_to_user(argp, &fpu, sizeof fpu)) 1533 if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu)))
1275 goto out; 1534 goto out;
1276 r = 0; 1535 r = 0;
1277 break; 1536 break;
1278 } 1537 }
1279 case KVM_SET_FPU: { 1538 case KVM_SET_FPU: {
1280 struct kvm_fpu fpu; 1539 fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
1281 1540 r = -ENOMEM;
1541 if (!fpu)
1542 goto out;
1282 r = -EFAULT; 1543 r = -EFAULT;
1283 if (copy_from_user(&fpu, argp, sizeof fpu)) 1544 if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu)))
1284 goto out; 1545 goto out;
1285 r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, &fpu); 1546 r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
1286 if (r) 1547 if (r)
1287 goto out; 1548 goto out;
1288 r = 0; 1549 r = 0;
@@ -1292,6 +1553,8 @@ out_free2:
1292 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); 1553 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
1293 } 1554 }
1294out: 1555out:
1556 kfree(fpu);
1557 kfree(kvm_sregs);
1295 return r; 1558 return r;
1296} 1559}
1297 1560
@@ -1360,6 +1623,30 @@ static long kvm_vm_ioctl(struct file *filp,
1360 break; 1623 break;
1361 } 1624 }
1362#endif 1625#endif
1626#ifdef KVM_CAP_DEVICE_ASSIGNMENT
1627 case KVM_ASSIGN_PCI_DEVICE: {
1628 struct kvm_assigned_pci_dev assigned_dev;
1629
1630 r = -EFAULT;
1631 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1632 goto out;
1633 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
1634 if (r)
1635 goto out;
1636 break;
1637 }
1638 case KVM_ASSIGN_IRQ: {
1639 struct kvm_assigned_irq assigned_irq;
1640
1641 r = -EFAULT;
1642 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
1643 goto out;
1644 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
1645 if (r)
1646 goto out;
1647 break;
1648 }
1649#endif
1363 default: 1650 default:
1364 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 1651 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
1365 } 1652 }
@@ -1369,17 +1656,22 @@ out:
1369 1656
1370static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1657static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1371{ 1658{
1659 struct page *page[1];
1660 unsigned long addr;
1661 int npages;
1662 gfn_t gfn = vmf->pgoff;
1372 struct kvm *kvm = vma->vm_file->private_data; 1663 struct kvm *kvm = vma->vm_file->private_data;
1373 struct page *page;
1374 1664
1375 if (!kvm_is_visible_gfn(kvm, vmf->pgoff)) 1665 addr = gfn_to_hva(kvm, gfn);
1666 if (kvm_is_error_hva(addr))
1376 return VM_FAULT_SIGBUS; 1667 return VM_FAULT_SIGBUS;
1377 page = gfn_to_page(kvm, vmf->pgoff); 1668
1378 if (is_error_page(page)) { 1669 npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
1379 kvm_release_page_clean(page); 1670 NULL);
1671 if (unlikely(npages != 1))
1380 return VM_FAULT_SIGBUS; 1672 return VM_FAULT_SIGBUS;
1381 } 1673
1382 vmf->page = page; 1674 vmf->page = page[0];
1383 return 0; 1675 return 0;
1384} 1676}
1385 1677
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
index 58141f31ea8f..41dcc845f78c 100644
--- a/virt/kvm/kvm_trace.c
+++ b/virt/kvm/kvm_trace.c
@@ -17,6 +17,7 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/relay.h> 18#include <linux/relay.h>
19#include <linux/debugfs.h> 19#include <linux/debugfs.h>
20#include <linux/ktime.h>
20 21
21#include <linux/kvm_host.h> 22#include <linux/kvm_host.h>
22 23
@@ -35,16 +36,16 @@ static struct kvm_trace *kvm_trace;
35struct kvm_trace_probe { 36struct kvm_trace_probe {
36 const char *name; 37 const char *name;
37 const char *format; 38 const char *format;
38 u32 cycle_in; 39 u32 timestamp_in;
39 marker_probe_func *probe_func; 40 marker_probe_func *probe_func;
40}; 41};
41 42
42static inline int calc_rec_size(int cycle, int extra) 43static inline int calc_rec_size(int timestamp, int extra)
43{ 44{
44 int rec_size = KVM_TRC_HEAD_SIZE; 45 int rec_size = KVM_TRC_HEAD_SIZE;
45 46
46 rec_size += extra; 47 rec_size += extra;
47 return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; 48 return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
48} 49}
49 50
50static void kvm_add_trace(void *probe_private, void *call_data, 51static void kvm_add_trace(void *probe_private, void *call_data,
@@ -54,12 +55,13 @@ static void kvm_add_trace(void *probe_private, void *call_data,
54 struct kvm_trace *kt = kvm_trace; 55 struct kvm_trace *kt = kvm_trace;
55 struct kvm_trace_rec rec; 56 struct kvm_trace_rec rec;
56 struct kvm_vcpu *vcpu; 57 struct kvm_vcpu *vcpu;
57 int i, extra, size; 58 int i, size;
59 u32 extra;
58 60
59 if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) 61 if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
60 return; 62 return;
61 63
62 rec.event = va_arg(*args, u32); 64 rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32));
63 vcpu = va_arg(*args, struct kvm_vcpu *); 65 vcpu = va_arg(*args, struct kvm_vcpu *);
64 rec.pid = current->tgid; 66 rec.pid = current->tgid;
65 rec.vcpu_id = vcpu->vcpu_id; 67 rec.vcpu_id = vcpu->vcpu_id;
@@ -67,21 +69,21 @@ static void kvm_add_trace(void *probe_private, void *call_data,
67 extra = va_arg(*args, u32); 69 extra = va_arg(*args, u32);
68 WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); 70 WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
69 extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); 71 extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX);
70 rec.extra_u32 = extra;
71 72
72 rec.cycle_in = p->cycle_in; 73 rec.rec_val |= TRACE_REC_TCS(p->timestamp_in)
74 | TRACE_REC_NUM_DATA_ARGS(extra);
73 75
74 if (rec.cycle_in) { 76 if (p->timestamp_in) {
75 rec.u.cycle.cycle_u64 = get_cycles(); 77 rec.u.timestamp.timestamp = ktime_to_ns(ktime_get());
76 78
77 for (i = 0; i < rec.extra_u32; i++) 79 for (i = 0; i < extra; i++)
78 rec.u.cycle.extra_u32[i] = va_arg(*args, u32); 80 rec.u.timestamp.extra_u32[i] = va_arg(*args, u32);
79 } else { 81 } else {
80 for (i = 0; i < rec.extra_u32; i++) 82 for (i = 0; i < extra; i++)
81 rec.u.nocycle.extra_u32[i] = va_arg(*args, u32); 83 rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32);
82 } 84 }
83 85
84 size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32)); 86 size = calc_rec_size(p->timestamp_in, extra * sizeof(u32));
85 relay_write(kt->rchan, &rec, size); 87 relay_write(kt->rchan, &rec, size);
86} 88}
87 89
diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c
new file mode 100644
index 000000000000..a770874f3a3a
--- /dev/null
+++ b/virt/kvm/vtd.c
@@ -0,0 +1,191 @@
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Copyright IBM Corporation, 2008
19 * Author: Allen M. Kay <allen.m.kay@intel.com>
20 * Author: Weidong Han <weidong.han@intel.com>
21 * Author: Ben-Ami Yassour <benami@il.ibm.com>
22 */
23
24#include <linux/list.h>
25#include <linux/kvm_host.h>
26#include <linux/pci.h>
27#include <linux/dmar.h>
28#include <linux/intel-iommu.h>
29
30static int kvm_iommu_unmap_memslots(struct kvm *kvm);
31static void kvm_iommu_put_pages(struct kvm *kvm,
32 gfn_t base_gfn, unsigned long npages);
33
34int kvm_iommu_map_pages(struct kvm *kvm,
35 gfn_t base_gfn, unsigned long npages)
36{
37 gfn_t gfn = base_gfn;
38 pfn_t pfn;
39 int i, r = 0;
40 struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
41
42 /* check if iommu exists and in use */
43 if (!domain)
44 return 0;
45
46 for (i = 0; i < npages; i++) {
47 /* check if already mapped */
48 pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
49 gfn_to_gpa(gfn));
50 if (pfn)
51 continue;
52
53 pfn = gfn_to_pfn(kvm, gfn);
54 r = intel_iommu_page_mapping(domain,
55 gfn_to_gpa(gfn),
56 pfn_to_hpa(pfn),
57 PAGE_SIZE,
58 DMA_PTE_READ |
59 DMA_PTE_WRITE);
60 if (r) {
61 printk(KERN_ERR "kvm_iommu_map_pages:"
62 "iommu failed to map pfn=%lx\n", pfn);
63 goto unmap_pages;
64 }
65 gfn++;
66 }
67 return 0;
68
69unmap_pages:
70 kvm_iommu_put_pages(kvm, base_gfn, i);
71 return r;
72}
73
74static int kvm_iommu_map_memslots(struct kvm *kvm)
75{
76 int i, r;
77
78 down_read(&kvm->slots_lock);
79 for (i = 0; i < kvm->nmemslots; i++) {
80 r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
81 kvm->memslots[i].npages);
82 if (r)
83 break;
84 }
85 up_read(&kvm->slots_lock);
86 return r;
87}
88
89int kvm_iommu_map_guest(struct kvm *kvm,
90 struct kvm_assigned_dev_kernel *assigned_dev)
91{
92 struct pci_dev *pdev = NULL;
93 int r;
94
95 if (!intel_iommu_found()) {
96 printk(KERN_ERR "%s: intel iommu not found\n", __func__);
97 return -ENODEV;
98 }
99
100 printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
101 assigned_dev->host_busnr,
102 PCI_SLOT(assigned_dev->host_devfn),
103 PCI_FUNC(assigned_dev->host_devfn));
104
105 pdev = assigned_dev->dev;
106
107 if (pdev == NULL) {
108 if (kvm->arch.intel_iommu_domain) {
109 intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
110 kvm->arch.intel_iommu_domain = NULL;
111 }
112 return -ENODEV;
113 }
114
115 kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
116 if (!kvm->arch.intel_iommu_domain)
117 return -ENODEV;
118
119 r = kvm_iommu_map_memslots(kvm);
120 if (r)
121 goto out_unmap;
122
123 intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
124 pdev->bus->number, pdev->devfn);
125
126 r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
127 pdev);
128 if (r) {
129 printk(KERN_ERR "Domain context map for %s failed",
130 pci_name(pdev));
131 goto out_unmap;
132 }
133 return 0;
134
135out_unmap:
136 kvm_iommu_unmap_memslots(kvm);
137 return r;
138}
139
140static void kvm_iommu_put_pages(struct kvm *kvm,
141 gfn_t base_gfn, unsigned long npages)
142{
143 gfn_t gfn = base_gfn;
144 pfn_t pfn;
145 struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
146 int i;
147
148 for (i = 0; i < npages; i++) {
149 pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
150 gfn_to_gpa(gfn));
151 kvm_release_pfn_clean(pfn);
152 gfn++;
153 }
154}
155
156static int kvm_iommu_unmap_memslots(struct kvm *kvm)
157{
158 int i;
159 down_read(&kvm->slots_lock);
160 for (i = 0; i < kvm->nmemslots; i++) {
161 kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
162 kvm->memslots[i].npages);
163 }
164 up_read(&kvm->slots_lock);
165
166 return 0;
167}
168
169int kvm_iommu_unmap_guest(struct kvm *kvm)
170{
171 struct kvm_assigned_dev_kernel *entry;
172 struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
173
174 /* check if iommu exists and in use */
175 if (!domain)
176 return 0;
177
178 list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
179 printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
180 entry->host_busnr,
181 PCI_SLOT(entry->host_devfn),
182 PCI_FUNC(entry->host_devfn));
183
184 /* detach kvm dmar domain */
185 intel_iommu_detach_dev(domain, entry->host_busnr,
186 entry->host_devfn);
187 }
188 kvm_iommu_unmap_memslots(kvm);
189 intel_iommu_domain_exit(domain);
190 return 0;
191}