aboutsummaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/assigned-dev.c95
-rw-r--r--virt/kvm/iommu.c30
-rw-r--r--virt/kvm/kvm_main.c110
3 files changed, 209 insertions, 26 deletions
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index 6cc4b97ec45..af7910228fb 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -17,6 +17,8 @@
17#include <linux/pci.h> 17#include <linux/pci.h>
18#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/namei.h>
21#include <linux/fs.h>
20#include "irq.h" 22#include "irq.h"
21 23
22static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, 24static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
@@ -474,12 +476,76 @@ out:
474 return r; 476 return r;
475} 477}
476 478
479/*
480 * We want to test whether the caller has been granted permissions to
481 * use this device. To be able to configure and control the device,
482 * the user needs access to PCI configuration space and BAR resources.
483 * These are accessed through PCI sysfs. PCI config space is often
484 * passed to the process calling this ioctl via file descriptor, so we
485 * can't rely on access to that file. We can check for permissions
486 * on each of the BAR resource files, which is a pretty clear
487 * indicator that the user has been granted access to the device.
488 */
489static int probe_sysfs_permissions(struct pci_dev *dev)
490{
491#ifdef CONFIG_SYSFS
492 int i;
493 bool bar_found = false;
494
495 for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
496 char *kpath, *syspath;
497 struct path path;
498 struct inode *inode;
499 int r;
500
501 if (!pci_resource_len(dev, i))
502 continue;
503
504 kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
505 if (!kpath)
506 return -ENOMEM;
507
508 /* Per sysfs-rules, sysfs is always at /sys */
509 syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
510 kfree(kpath);
511 if (!syspath)
512 return -ENOMEM;
513
514 r = kern_path(syspath, LOOKUP_FOLLOW, &path);
515 kfree(syspath);
516 if (r)
517 return r;
518
519 inode = path.dentry->d_inode;
520
521 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
522 path_put(&path);
523 if (r)
524 return r;
525
526 bar_found = true;
527 }
528
529 /* If no resources, probably something special */
530 if (!bar_found)
531 return -EPERM;
532
533 return 0;
534#else
535 return -EINVAL; /* No way to control the device without sysfs */
536#endif
537}
538
477static int kvm_vm_ioctl_assign_device(struct kvm *kvm, 539static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
478 struct kvm_assigned_pci_dev *assigned_dev) 540 struct kvm_assigned_pci_dev *assigned_dev)
479{ 541{
480 int r = 0, idx; 542 int r = 0, idx;
481 struct kvm_assigned_dev_kernel *match; 543 struct kvm_assigned_dev_kernel *match;
482 struct pci_dev *dev; 544 struct pci_dev *dev;
545 u8 header_type;
546
547 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
548 return -EINVAL;
483 549
484 mutex_lock(&kvm->lock); 550 mutex_lock(&kvm->lock);
485 idx = srcu_read_lock(&kvm->srcu); 551 idx = srcu_read_lock(&kvm->srcu);
@@ -507,6 +573,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
507 r = -EINVAL; 573 r = -EINVAL;
508 goto out_free; 574 goto out_free;
509 } 575 }
576
577 /* Don't allow bridges to be assigned */
578 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
579 if ((header_type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) {
580 r = -EPERM;
581 goto out_put;
582 }
583
584 r = probe_sysfs_permissions(dev);
585 if (r)
586 goto out_put;
587
510 if (pci_enable_device(dev)) { 588 if (pci_enable_device(dev)) {
511 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); 589 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
512 r = -EBUSY; 590 r = -EBUSY;
@@ -538,16 +616,14 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
538 616
539 list_add(&match->list, &kvm->arch.assigned_dev_head); 617 list_add(&match->list, &kvm->arch.assigned_dev_head);
540 618
541 if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { 619 if (!kvm->arch.iommu_domain) {
542 if (!kvm->arch.iommu_domain) { 620 r = kvm_iommu_map_guest(kvm);
543 r = kvm_iommu_map_guest(kvm);
544 if (r)
545 goto out_list_del;
546 }
547 r = kvm_assign_device(kvm, match);
548 if (r) 621 if (r)
549 goto out_list_del; 622 goto out_list_del;
550 } 623 }
624 r = kvm_assign_device(kvm, match);
625 if (r)
626 goto out_list_del;
551 627
552out: 628out:
553 srcu_read_unlock(&kvm->srcu, idx); 629 srcu_read_unlock(&kvm->srcu, idx);
@@ -587,8 +663,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
587 goto out; 663 goto out;
588 } 664 }
589 665
590 if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) 666 kvm_deassign_device(kvm, match);
591 kvm_deassign_device(kvm, match);
592 667
593 kvm_free_assigned_device(kvm, match); 668 kvm_free_assigned_device(kvm, match);
594 669
@@ -617,7 +692,7 @@ static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
617 if (adev->entries_nr == 0) { 692 if (adev->entries_nr == 0) {
618 adev->entries_nr = entry_nr->entry_nr; 693 adev->entries_nr = entry_nr->entry_nr;
619 if (adev->entries_nr == 0 || 694 if (adev->entries_nr == 0 ||
620 adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { 695 adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
621 r = -EINVAL; 696 r = -EINVAL;
622 goto msix_nr_out; 697 goto msix_nr_out;
623 } 698 }
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 62a9caf0563..511e160f706 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -30,6 +30,12 @@
30#include <linux/iommu.h> 30#include <linux/iommu.h>
31#include <linux/intel-iommu.h> 31#include <linux/intel-iommu.h>
32 32
33static int allow_unsafe_assigned_interrupts;
34module_param_named(allow_unsafe_assigned_interrupts,
35 allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
36MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
37 "Enable device assignment on platforms without interrupt remapping support.");
38
33static int kvm_iommu_unmap_memslots(struct kvm *kvm); 39static int kvm_iommu_unmap_memslots(struct kvm *kvm);
34static void kvm_iommu_put_pages(struct kvm *kvm, 40static void kvm_iommu_put_pages(struct kvm *kvm,
35 gfn_t base_gfn, unsigned long npages); 41 gfn_t base_gfn, unsigned long npages);
@@ -105,7 +111,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
105 111
106 /* Map into IO address space */ 112 /* Map into IO address space */
107 r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), 113 r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
108 get_order(page_size), flags); 114 page_size, flags);
109 if (r) { 115 if (r) {
110 printk(KERN_ERR "kvm_iommu_map_address:" 116 printk(KERN_ERR "kvm_iommu_map_address:"
111 "iommu failed to map pfn=%llx\n", pfn); 117 "iommu failed to map pfn=%llx\n", pfn);
@@ -222,15 +228,27 @@ int kvm_iommu_map_guest(struct kvm *kvm)
222{ 228{
223 int r; 229 int r;
224 230
225 if (!iommu_found()) { 231 if (!iommu_present(&pci_bus_type)) {
226 printk(KERN_ERR "%s: iommu not found\n", __func__); 232 printk(KERN_ERR "%s: iommu not found\n", __func__);
227 return -ENODEV; 233 return -ENODEV;
228 } 234 }
229 235
230 kvm->arch.iommu_domain = iommu_domain_alloc(); 236 kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
231 if (!kvm->arch.iommu_domain) 237 if (!kvm->arch.iommu_domain)
232 return -ENOMEM; 238 return -ENOMEM;
233 239
240 if (!allow_unsafe_assigned_interrupts &&
241 !iommu_domain_has_cap(kvm->arch.iommu_domain,
242 IOMMU_CAP_INTR_REMAP)) {
243 printk(KERN_WARNING "%s: No interrupt remapping support,"
244 " disallowing device assignment."
245 " Re-enble with \"allow_unsafe_assigned_interrupts=1\""
246 " module option.\n", __func__);
247 iommu_domain_free(kvm->arch.iommu_domain);
248 kvm->arch.iommu_domain = NULL;
249 return -EPERM;
250 }
251
234 r = kvm_iommu_map_memslots(kvm); 252 r = kvm_iommu_map_memslots(kvm);
235 if (r) 253 if (r)
236 goto out_unmap; 254 goto out_unmap;
@@ -268,15 +286,15 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
268 286
269 while (gfn < end_gfn) { 287 while (gfn < end_gfn) {
270 unsigned long unmap_pages; 288 unsigned long unmap_pages;
271 int order; 289 size_t size;
272 290
273 /* Get physical address */ 291 /* Get physical address */
274 phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); 292 phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
275 pfn = phys >> PAGE_SHIFT; 293 pfn = phys >> PAGE_SHIFT;
276 294
277 /* Unmap address from IO address space */ 295 /* Unmap address from IO address space */
278 order = iommu_unmap(domain, gfn_to_gpa(gfn), 0); 296 size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
279 unmap_pages = 1ULL << order; 297 unmap_pages = 1ULL << get_order(size);
280 298
281 /* Unpin all pages we just unmapped to not leak any memory */ 299 /* Unpin all pages we just unmapped to not leak any memory */
282 kvm_unpin_pages(kvm, pfn, unmap_pages); 300 kvm_unpin_pages(kvm, pfn, unmap_pages);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 96ebc067941..aefdda390f5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -84,6 +84,10 @@ struct dentry *kvm_debugfs_dir;
84 84
85static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 85static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
86 unsigned long arg); 86 unsigned long arg);
87#ifdef CONFIG_COMPAT
88static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
89 unsigned long arg);
90#endif
87static int hardware_enable_all(void); 91static int hardware_enable_all(void);
88static void hardware_disable_all(void); 92static void hardware_disable_all(void);
89 93
@@ -97,8 +101,8 @@ static bool largepages_enabled = true;
97static struct page *hwpoison_page; 101static struct page *hwpoison_page;
98static pfn_t hwpoison_pfn; 102static pfn_t hwpoison_pfn;
99 103
100static struct page *fault_page; 104struct page *fault_page;
101static pfn_t fault_pfn; 105pfn_t fault_pfn;
102 106
103inline int kvm_is_mmio_pfn(pfn_t pfn) 107inline int kvm_is_mmio_pfn(pfn_t pfn)
104{ 108{
@@ -827,6 +831,13 @@ skip_lpage:
827 831
828 kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); 832 kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
829 833
834 /*
835 * If the new memory slot is created, we need to clear all
836 * mmio sptes.
837 */
838 if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT)
839 kvm_arch_flush_shadow(kvm);
840
830 kvm_free_physmem_slot(&old, &new); 841 kvm_free_physmem_slot(&old, &new);
831 kfree(old_memslots); 842 kfree(old_memslots);
832 843
@@ -927,6 +938,18 @@ int is_fault_pfn(pfn_t pfn)
927} 938}
928EXPORT_SYMBOL_GPL(is_fault_pfn); 939EXPORT_SYMBOL_GPL(is_fault_pfn);
929 940
941int is_noslot_pfn(pfn_t pfn)
942{
943 return pfn == bad_pfn;
944}
945EXPORT_SYMBOL_GPL(is_noslot_pfn);
946
947int is_invalid_pfn(pfn_t pfn)
948{
949 return pfn == hwpoison_pfn || pfn == fault_pfn;
950}
951EXPORT_SYMBOL_GPL(is_invalid_pfn);
952
930static inline unsigned long bad_hva(void) 953static inline unsigned long bad_hva(void)
931{ 954{
932 return PAGE_OFFSET; 955 return PAGE_OFFSET;
@@ -1345,7 +1368,7 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
1345 addr = gfn_to_hva(kvm, gfn); 1368 addr = gfn_to_hva(kvm, gfn);
1346 if (kvm_is_error_hva(addr)) 1369 if (kvm_is_error_hva(addr))
1347 return -EFAULT; 1370 return -EFAULT;
1348 r = copy_to_user((void __user *)addr + offset, data, len); 1371 r = __copy_to_user((void __user *)addr + offset, data, len);
1349 if (r) 1372 if (r)
1350 return -EFAULT; 1373 return -EFAULT;
1351 mark_page_dirty(kvm, gfn); 1374 mark_page_dirty(kvm, gfn);
@@ -1405,7 +1428,7 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
1405 if (kvm_is_error_hva(ghc->hva)) 1428 if (kvm_is_error_hva(ghc->hva))
1406 return -EFAULT; 1429 return -EFAULT;
1407 1430
1408 r = copy_to_user((void __user *)ghc->hva, data, len); 1431 r = __copy_to_user((void __user *)ghc->hva, data, len);
1409 if (r) 1432 if (r)
1410 return -EFAULT; 1433 return -EFAULT;
1411 mark_page_dirty_in_slot(kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT); 1434 mark_page_dirty_in_slot(kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
@@ -1414,6 +1437,26 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
1414} 1437}
1415EXPORT_SYMBOL_GPL(kvm_write_guest_cached); 1438EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
1416 1439
1440int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
1441 void *data, unsigned long len)
1442{
1443 struct kvm_memslots *slots = kvm_memslots(kvm);
1444 int r;
1445
1446 if (slots->generation != ghc->generation)
1447 kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa);
1448
1449 if (kvm_is_error_hva(ghc->hva))
1450 return -EFAULT;
1451
1452 r = __copy_from_user(data, (void __user *)ghc->hva, len);
1453 if (r)
1454 return -EFAULT;
1455
1456 return 0;
1457}
1458EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
1459
1417int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) 1460int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
1418{ 1461{
1419 return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page, 1462 return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page,
@@ -1586,7 +1629,9 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
1586static struct file_operations kvm_vcpu_fops = { 1629static struct file_operations kvm_vcpu_fops = {
1587 .release = kvm_vcpu_release, 1630 .release = kvm_vcpu_release,
1588 .unlocked_ioctl = kvm_vcpu_ioctl, 1631 .unlocked_ioctl = kvm_vcpu_ioctl,
1589 .compat_ioctl = kvm_vcpu_ioctl, 1632#ifdef CONFIG_COMPAT
1633 .compat_ioctl = kvm_vcpu_compat_ioctl,
1634#endif
1590 .mmap = kvm_vcpu_mmap, 1635 .mmap = kvm_vcpu_mmap,
1591 .llseek = noop_llseek, 1636 .llseek = noop_llseek,
1592}; 1637};
@@ -1615,18 +1660,18 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
1615 1660
1616 r = kvm_arch_vcpu_setup(vcpu); 1661 r = kvm_arch_vcpu_setup(vcpu);
1617 if (r) 1662 if (r)
1618 return r; 1663 goto vcpu_destroy;
1619 1664
1620 mutex_lock(&kvm->lock); 1665 mutex_lock(&kvm->lock);
1621 if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { 1666 if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) {
1622 r = -EINVAL; 1667 r = -EINVAL;
1623 goto vcpu_destroy; 1668 goto unlock_vcpu_destroy;
1624 } 1669 }
1625 1670
1626 kvm_for_each_vcpu(r, v, kvm) 1671 kvm_for_each_vcpu(r, v, kvm)
1627 if (v->vcpu_id == id) { 1672 if (v->vcpu_id == id) {
1628 r = -EEXIST; 1673 r = -EEXIST;
1629 goto vcpu_destroy; 1674 goto unlock_vcpu_destroy;
1630 } 1675 }
1631 1676
1632 BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); 1677 BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
@@ -1636,7 +1681,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
1636 r = create_vcpu_fd(vcpu); 1681 r = create_vcpu_fd(vcpu);
1637 if (r < 0) { 1682 if (r < 0) {
1638 kvm_put_kvm(kvm); 1683 kvm_put_kvm(kvm);
1639 goto vcpu_destroy; 1684 goto unlock_vcpu_destroy;
1640 } 1685 }
1641 1686
1642 kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; 1687 kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
@@ -1650,8 +1695,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
1650 mutex_unlock(&kvm->lock); 1695 mutex_unlock(&kvm->lock);
1651 return r; 1696 return r;
1652 1697
1653vcpu_destroy: 1698unlock_vcpu_destroy:
1654 mutex_unlock(&kvm->lock); 1699 mutex_unlock(&kvm->lock);
1700vcpu_destroy:
1655 kvm_arch_vcpu_destroy(vcpu); 1701 kvm_arch_vcpu_destroy(vcpu);
1656 return r; 1702 return r;
1657} 1703}
@@ -1874,6 +1920,50 @@ out:
1874 return r; 1920 return r;
1875} 1921}
1876 1922
1923#ifdef CONFIG_COMPAT
1924static long kvm_vcpu_compat_ioctl(struct file *filp,
1925 unsigned int ioctl, unsigned long arg)
1926{
1927 struct kvm_vcpu *vcpu = filp->private_data;
1928 void __user *argp = compat_ptr(arg);
1929 int r;
1930
1931 if (vcpu->kvm->mm != current->mm)
1932 return -EIO;
1933
1934 switch (ioctl) {
1935 case KVM_SET_SIGNAL_MASK: {
1936 struct kvm_signal_mask __user *sigmask_arg = argp;
1937 struct kvm_signal_mask kvm_sigmask;
1938 compat_sigset_t csigset;
1939 sigset_t sigset;
1940
1941 if (argp) {
1942 r = -EFAULT;
1943 if (copy_from_user(&kvm_sigmask, argp,
1944 sizeof kvm_sigmask))
1945 goto out;
1946 r = -EINVAL;
1947 if (kvm_sigmask.len != sizeof csigset)
1948 goto out;
1949 r = -EFAULT;
1950 if (copy_from_user(&csigset, sigmask_arg->sigset,
1951 sizeof csigset))
1952 goto out;
1953 }
1954 sigset_from_compat(&sigset, &csigset);
1955 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
1956 break;
1957 }
1958 default:
1959 r = kvm_vcpu_ioctl(filp, ioctl, arg);
1960 }
1961
1962out:
1963 return r;
1964}
1965#endif
1966
1877static long kvm_vm_ioctl(struct file *filp, 1967static long kvm_vm_ioctl(struct file *filp,
1878 unsigned int ioctl, unsigned long arg) 1968 unsigned int ioctl, unsigned long arg)
1879{ 1969{